diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index 6d2e63d28..3a908a7eb 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -21,6 +21,9 @@ #include "pairsKernel.h" +//we use an offset, just for testing to make sure there is no assumption in the broadphase that 'index' starts at 0 +#define TEST_INDEX_OFFSET 1024 + #ifdef B3_USE_MIDI #include "../../../btgui/MidiTest/RtMidi.h" bool chooseMidiPort( RtMidiIn *rtmidi ) @@ -79,10 +82,16 @@ int largeCount = 0; float timeStepPos = 0.000166666; float mAmplitude = 251.f; -int dimensions[3]={10,10,10}; +int dimensions[3]={10,10,10};//initialized with x_dim/y_dim/z_dim const char* axisNames[3] = {"# x-axis","# y-axis","# z-axis"}; extern bool gReset; +static int curUseLargeAabbOption=0; +const char* useLargeAabbOptions[] = +{ + "NoLargeAabb", + "UseLargeAabb", +}; struct BroadphaseEntry { @@ -91,7 +100,6 @@ struct BroadphaseEntry }; - static PairBench* sPairDemo = 0; #define BP_COMBO_INDEX 123 @@ -99,9 +107,13 @@ static PairBench* sPairDemo = 0; static int curSelectedBroadphase = 0; static BroadphaseEntry allBroadphases[]= { - {"Gpu 1-Sap",b3GpuSapBroadphase::CreateFunc}, {"Gpu Grid",b3GpuGridBroadphase::CreateFunc}, - + {"CPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceCpu}, + {"GPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceGpu}, + {"GPU 1-SAP Original",b3GpuSapBroadphase::CreateFuncOriginal}, + {"GPU 1-SAP Barrier",b3GpuSapBroadphase::CreateFuncBarrier}, + {"GPU 1-SAP LDS",b3GpuSapBroadphase::CreateFuncLocalMemory}, + {"GPU 1-SAP LDS Batch",b3GpuSapBroadphase::CreateFuncLocalMemoryBatchWrite}, }; @@ -222,6 +234,33 @@ struct PairComboBoxHander :public Gwen::Event::Handler } + void onSelectUseLargeAabb( Gwen::Controls::Base* pControl ) + { + if (m_active) + { + Gwen::Controls::ComboBox* but = (Gwen::Controls::ComboBox*) pControl; + + Gwen::String str = Gwen::Utility::UnicodeToString( but->GetSelectedItem()->GetText()); + + int numItems = sizeof(useLargeAabbOptions)/sizeof(const char*); + + //find selected item + for (int i=0;ideleteBroadphase(); + sPairDemo->createBroadphase(dimensions[0],dimensions[1],dimensions[2]); + break; + } + } + + + + } + } + }; @@ -246,8 +285,8 @@ struct MySliderEventHandler : public Gwen::Event::Handler { Gwen::Controls::Slider* pSlider = (Gwen::Controls::Slider*)pControl; //printf("value = %f\n", pSlider->GetValue());//UnitPrint( Utility::Format( L"Slider Value: %.2f", pSlider->GetValue() ) ); - - T v = T(pSlider->GetValue()); + float bla = pSlider->GetValue(); + T v = T(bla); SetValue(v); } @@ -296,6 +335,10 @@ void PairMidiCallback( double deltatime, std::vector< unsigned char > *message, void PairBench::initPhysics(const ConstructionInfo& ci) { + dimensions[0] = ci.arraySizeX; + dimensions[1] = ci.arraySizeY; + dimensions[2] = ci.arraySizeZ; + #ifdef B3_USE_MIDI m_data->m_midiIn = new RtMidiIn(); if (!chooseMidiPort(m_data->m_midiIn)) @@ -327,7 +370,7 @@ void PairBench::initPhysics(const ConstructionInfo& ci) { - int startItem = 0; + int startItem = curSelectedBroadphase; int numBroadphases = sizeof(allBroadphases)/sizeof(BroadphaseEntry); Gwen::Controls::ComboBox* combobox = new Gwen::Controls::ComboBox(data->m_demoPage->GetPage()); @@ -353,6 +396,33 @@ void PairBench::initPhysics(const ConstructionInfo& ci) data->m_curYposition+=22; } + { + int startItem = curUseLargeAabbOption; + int numUseLargeAabb = sizeof(useLargeAabbOptions)/sizeof(const char*); + + Gwen::Controls::ComboBox* combobox = new Gwen::Controls::ComboBox(data->m_demoPage->GetPage()); + PairComboBoxHander* handler = new PairComboBoxHander(555); + m_data->m_myControls.push_back(combobox); + + + combobox->onSelection.Add(handler,&PairComboBoxHander::onSelectUseLargeAabb); + int ypos = data->m_curYposition; + combobox->SetPos(10, ypos ); + combobox->SetWidth( 100 ); + + for (int i=0;iAddItem(Gwen::Utility::StringToUnicode(useLargeAabbOptions[i])); + if (i==startItem) + combobox->OnItemSelected(item); + } + + handler->m_active = true; + + data->m_curYposition+=22; + } + data->m_curYposition+=22; + if (1) for (int i=0;i<3;i++) { @@ -434,7 +504,7 @@ void PairBench::initPhysics(const ConstructionInfo& ci) int errNum=0; m_data->m_moveObjectsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"moveObjectsKernel",&errNum,pairBenchProg); m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"sineWaveKernel",&errNum,pairBenchProg); - m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel",&errNum,pairBenchProg); + m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg); m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg); } @@ -469,7 +539,7 @@ void PairBench::createBroadphase(int arraySizeX, int arraySizeY, int arraySizeZ) int shapeId = m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); int group=1; int mask=1; - int index=10; + int index=TEST_INDEX_OFFSET; if (gPairBenchFileName) @@ -598,14 +668,44 @@ void PairBench::createBroadphase(int arraySizeX, int arraySizeY, int arraySizeZ) b3Vector4 color=b3MakeVector4(0,1,0,1); b3Vector4 scaling=b3MakeVector4(1,1,1,1); + bool large = false; + + if (curUseLargeAabbOption) + { + if (i==0 && j==0 && k==0) + { + large = true; + scaling[0] = 1000; + scaling[1] = 1000; + scaling[2] = 1000; + } + } + /*if (j==0) + { + large=true; + scaling[1] = 10000; + } + if (k==0) + { + large=true; + scaling[2] = 10000; + }*/ + + int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); - b3Vector3 aabbHalfExtents=b3MakeVector3(1,1,1); - - b3Vector3 aabbMin = position-aabbHalfExtents; - b3Vector3 aabbMax = position+aabbHalfExtents; + + + b3Vector3 aabbMin = position-scaling; + b3Vector3 aabbMax = position+scaling; - - m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask); + if (large) + { + m_data->m_broadphaseGPU->createLargeProxy(aabbMin,aabbMax,index,group,mask); + + } else + { + m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,index,group,mask); + } index++; } } @@ -674,10 +774,15 @@ void PairBench::clientMoveAndDisplay() //color all objects blue bool animate=true; - int numObjects= m_instancingRenderer->getInternalData()->m_totalNumInstances; + int numObjects= 0; + { + B3_PROFILE("Num Objects"); + numObjects = m_instancingRenderer->getInternalData()->m_totalNumInstances; + } b3Vector4* positions = 0; if (numObjects) { + B3_PROFILE("Sync"); GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; @@ -686,7 +791,11 @@ void PairBench::clientMoveAndDisplay() glBindBuffer(GL_ARRAY_BUFFER, vbo); cl_bool blocking= CL_TRUE; - char* hostPtr= (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + char* hostPtr= 0; + { + B3_PROFILE("glMapBufferRange"); + hostPtr = (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + } GLint err = glGetError(); assert(err==GL_NO_ERROR); positions = (b3Vector4*)hostPtr; @@ -716,71 +825,82 @@ void PairBench::clientMoveAndDisplay() { if (1) { + if (1) + { - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel"); - launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); - launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() ); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() ); - launcher.setConst(timeStepPos); - launcher.setConst(mAmplitude); - launcher.setConst( numObjects); - launcher.launch1D( numObjects); - clFinish(m_clData->m_clQueue); - } - else - { + launcher.setConst(timeStepPos); + launcher.setConst(mAmplitude); + launcher.setConst( numObjects); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + } + else + { - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel"); - launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); - launcher.setConst( numObjects); - launcher.launch1D( numObjects); - clFinish(m_clData->m_clQueue); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setConst( numObjects); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + } } } } bool updateOnGpu=true; - if (updateOnGpu) + if (1) { - B3_PROFILE("updateOnGpu"); - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple"); - launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); - launcher.setConst( numObjects); - launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); - launcher.launch1D( numObjects); - clFinish(m_clData->m_clQueue); - - } else - { - B3_PROFILE("updateOnCpu"); - if (!gPairBenchFileName) + if (updateOnGpu) { - int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size(); + B3_PROFILE("updateOnGpu"); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple"); + launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); + launcher.setConst( numObjects); + launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); + launcher.launch1D( numObjects); + clFinish(m_clData->m_clQueue); + + } else + { + B3_PROFILE("updateOnCpu"); + if (!gPairBenchFileName) + { + int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size(); - b3AlignedObjectArray posOrnColorsCpu; - if (m_data->m_instancePosOrnColor) - m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu); + b3AlignedObjectArray posOrnColorsCpu; + if (m_data->m_instancePosOrnColor) + m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu); - for (int nodeId=0;nodeIdm_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minVec = position-halfExtents; - m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minIndices[3] = nodeId; - m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_maxVec = position+halfExtents; - m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_signedMaxIndices[3]= nodeId; + { + b3Vector3 position = posOrnColorsCpu[nodeId]; + + b3SapAabb orgAabb = m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId]; + b3Vector3 halfExtents = 0.5f*(orgAabb.m_maxVec-orgAabb.m_minVec); + int orgNodeIndex = orgAabb.m_minIndices[3]; + int orgBroadphaseIndex = orgAabb.m_signedMaxIndices[3]; + + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minVec = position-halfExtents; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_minIndices[3] = orgNodeIndex; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_maxVec = position+halfExtents; + m_data->m_broadphaseGPU->getAllAabbsCPU()[nodeId].m_signedMaxIndices[3]= orgBroadphaseIndex; + } + } + m_data->m_broadphaseGPU->writeAabbsToGpu(); } - } - m_data->m_broadphaseGPU->writeAabbsToGpu(); - } + } } unsigned long dt = 0; @@ -791,15 +911,20 @@ void PairBench::clientMoveAndDisplay() B3_PROFILE("calculateOverlappingPairs"); int sz = sizeof(b3Int4)*64*numObjects; - m_data->m_broadphaseGPU->calculateOverlappingPairs(16*numObjects); + int prealloc = 3*1024*1024; + + int maxOverlap = b3Min(prealloc,16*numObjects); + + m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap); int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); - printf("numPairs = %d\n", numPairs); + //printf("numPairs = %d\n", numPairs); dt = cl.getTimeMicroseconds()-dt; } if (m_data->m_gui) { + B3_PROFILE("update Gui"); int allAabbs = m_data->m_broadphaseGPU->getAllAabbsCPU().size(); int numOverlap = m_data->m_broadphaseGPU->getNumOverlap(); @@ -836,6 +961,8 @@ void PairBench::clientMoveAndDisplay() launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); launcher.setBuffer( pairBuf); + int indexOffset = TEST_INDEX_OFFSET; + launcher.setConst(indexOffset); launcher.setConst( numPairs); launcher.launch1D( numPairs); clFinish(m_clData->m_clQueue); diff --git a/Demos3/GpuDemos/broadphase/pairsKernel.cl b/Demos3/GpuDemos/broadphase/pairsKernel.cl index 3c3b0bc3f..b99a22e6b 100644 --- a/Demos3/GpuDemos/broadphase/pairsKernel.cl +++ b/Demos3/GpuDemos/broadphase/pairsKernel.cl @@ -12,15 +12,15 @@ __kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects) colors[iGID] = (float4)(0,0,1,1); } -__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int numPairs) +__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs) { int iPairId = get_global_id(0); if (iPairId>=numPairs) return; __global float4* colors = &posOrnColors[numObjects*2]; - int iObjectA = pairs[iPairId].x; - int iObjectB = pairs[iPairId].y; + int iObjectA = pairs[iPairId].x-indexOffset; + int iObjectB = pairs[iPairId].y-indexOffset; colors[iObjectA] = (float4)(1,0,0,1); colors[iObjectB] = (float4)(1,0,0,1); } @@ -56,15 +56,24 @@ __kernel void updateAabbSimple( __global float4* posOrnColors, const int numNode int nodeId = get_global_id(0); if( nodeId < numNodes ) { + + b3AABBCL orgAabbMin = pAABB[nodeId*2]; + b3AABBCL orgAabbMax = pAABB[nodeId*2+1]; + int orgNodeId = orgAabbMin.uw; + int orgBroadphaseIndex = orgAabbMax.uw; + float4 position = posOrnColors[nodeId]; - float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f); + float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f); + float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f); + float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec); + pAABB[nodeId*2].fx = position.x-halfExtents.x; pAABB[nodeId*2].fy = position.y-halfExtents.y; pAABB[nodeId*2].fz = position.z-halfExtents.z; - pAABB[nodeId*2].uw = nodeId; + pAABB[nodeId*2].uw = orgNodeId; pAABB[nodeId*2+1].fx = position.x+halfExtents.x; pAABB[nodeId*2+1].fy = position.y+halfExtents.y; pAABB[nodeId*2+1].fz = position.z+halfExtents.z; - pAABB[nodeId*2+1].uw = nodeId; + pAABB[nodeId*2+1].uw = orgBroadphaseIndex; } } diff --git a/Demos3/GpuDemos/broadphase/pairsKernel.h b/Demos3/GpuDemos/broadphase/pairsKernel.h index d707edf3a..2a98f2221 100644 --- a/Demos3/GpuDemos/broadphase/pairsKernel.h +++ b/Demos3/GpuDemos/broadphase/pairsKernel.h @@ -13,14 +13,14 @@ static const char* pairsKernelsCL= \ " __global float4* colors = &posOrnColors[numObjects*2];\n" " colors[iGID] = (float4)(0,0,1,1);\n" "}\n" -"__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int numPairs)\n" +"__kernel void colorPairsKernel2(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int indexOffset, int numPairs)\n" "{\n" " int iPairId = get_global_id(0);\n" " if (iPairId>=numPairs)\n" " return;\n" " __global float4* colors = &posOrnColors[numObjects*2];\n" -" int iObjectA = pairs[iPairId].x;\n" -" int iObjectB = pairs[iPairId].y;\n" +" int iObjectA = pairs[iPairId].x-indexOffset;\n" +" int iObjectB = pairs[iPairId].y-indexOffset;\n" " colors[iObjectA] = (float4)(1,0,0,1);\n" " colors[iObjectB] = (float4)(1,0,0,1);\n" "}\n" @@ -53,16 +53,25 @@ static const char* pairsKernelsCL= \ " int nodeId = get_global_id(0);\n" " if( nodeId < numNodes )\n" " {\n" +" \n" +" b3AABBCL orgAabbMin = pAABB[nodeId*2];\n" +" b3AABBCL orgAabbMax = pAABB[nodeId*2+1];\n" +" int orgNodeId = orgAabbMin.uw;\n" +" int orgBroadphaseIndex = orgAabbMax.uw;\n" +" \n" " float4 position = posOrnColors[nodeId];\n" -" float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);\n" +" float4 argAabbMinVec = (float4)(orgAabbMin.fx,orgAabbMin.fy,orgAabbMin.fz,0.f);\n" +" float4 argAabbMaxVec = (float4)(orgAabbMax.fx,orgAabbMax.fy,orgAabbMax.fz,0.f);\n" +" float4 halfExtents = 0.5f*(argAabbMaxVec-argAabbMinVec);\n" +" \n" " pAABB[nodeId*2].fx = position.x-halfExtents.x;\n" " pAABB[nodeId*2].fy = position.y-halfExtents.y;\n" " pAABB[nodeId*2].fz = position.z-halfExtents.z;\n" -" pAABB[nodeId*2].uw = nodeId;\n" +" pAABB[nodeId*2].uw = orgNodeId;\n" " pAABB[nodeId*2+1].fx = position.x+halfExtents.x;\n" " pAABB[nodeId*2+1].fy = position.y+halfExtents.y;\n" " pAABB[nodeId*2+1].fz = position.z+halfExtents.z;\n" -" pAABB[nodeId*2+1].uw = nodeId; \n" +" pAABB[nodeId*2+1].uw = orgBroadphaseIndex; \n" " }\n" "}\n" ; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp index a54886d9f..276d4630f 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp @@ -116,7 +116,7 @@ void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3 aabb.m_minVec = aabbMin; aabb.m_maxVec = aabbMax; aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = userPtr; + aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; m_allAabbsCPU1.push_back(aabb); m_smallAabbsCPU.push_back(aabb); } @@ -126,7 +126,7 @@ void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Ve aabb.m_minVec = aabbMin; aabb.m_maxVec = aabbMax; aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = userPtr; + aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; m_allAabbsCPU1.push_back(aabb); m_largeAabbsCPU.push_back(aabb); } @@ -139,7 +139,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) if (0) { calculateOverlappingPairsHost(maxPairs); - + /* b3AlignedObjectArray cpuPairs; m_gpuPairs.copyToHost(cpuPairs); printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size()); @@ -147,25 +147,53 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) { printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y); } + */ + return; } //sync small AABBs { - int numSmallAabbs = m_smallAabbsGPU.size(); - if (numSmallAabbs) - { - B3_PROFILE("copyAabbsKernelSmall"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ), - b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), - }; - b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numSmallAabbs ); - int num = numSmallAabbs; - launcher.launch1D( num); + + bool syncOnHost = false; + if (syncOnHost) + { + m_allAabbsGPU1.copyToHost(this->m_allAabbsCPU1); + b3AlignedObjectArray hostSmallAabbs; + m_smallAabbsGPU.copyToHost(hostSmallAabbs); + int numSmallAabbs = hostSmallAabbs.size(); + for (int i=0;i=numObjects) + // return; + int src = hostSmallAabbs[i].m_signedMaxIndices[3]; + hostSmallAabbs[i] = m_allAabbsCPU1[src]; + hostSmallAabbs[i].m_signedMaxIndices[3] = src; + } + } + m_smallAabbsGPU.copyFromHost(hostSmallAabbs); + } else + { + int numSmallAabbs = m_smallAabbsGPU.size(); + if (numSmallAabbs) + { + B3_PROFILE("copyAabbsKernelSmall"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ), + b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), + }; + + b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numSmallAabbs ); + int num = numSmallAabbs; + launcher.launch1D( num); + } } + } //sync large AABBs @@ -328,19 +356,19 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) } void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs) { -#if 0 + m_hostPairs.resize(0); m_allAabbsGPU1.copyToHost(m_allAabbsCPU1); - for (int i=0;i=0) // return calculateOverlappingPairsHostIncremental3Sap(); + //calculateOverlappingPairsHost(maxPairs); B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h index a01b9e977..2cc32b014 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h @@ -11,6 +11,7 @@ class b3Vector3; #include "b3GpuBroadphaseInterface.h" + class b3GpuSapBroadphase : public b3GpuBroadphaseInterface { @@ -87,12 +88,44 @@ public: class b3PrefixScanFloat4CL* m_prefixScanFloat4; - b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q ); + enum b3GpuSapKernelType + { + B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1, + B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU, + B3_GPU_SAP_KERNEL_ORIGINAL, + B3_GPU_SAP_KERNEL_BARRIER, + B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY, + B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY_BATCH_WRITE + }; + + b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); virtual ~b3GpuSapBroadphase(); - static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx,cl_device_id device, cl_command_queue q) + static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q) { - return new b3GpuSapBroadphase(ctx,device,q); + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU); + } + + static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q) + { + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU); + } + + static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q) + { + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL); + } + static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q) + { + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER); + } + static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q) + { + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); + } + static b3GpuBroadphaseInterface* CreateFuncLocalMemoryBatchWrite(cl_context ctx,cl_device_id device, cl_command_queue q) + { + return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY_BATCH_WRITE); } virtual void calculateOverlappingPairs(int maxPairs); diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl index 74baa30bd..533dac6e5 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl @@ -100,6 +100,32 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa } } + + +__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) +{ + int i = get_global_id(0); + if (i>=numObjects) + return; + for (int j=i+1;j=numObjects)\n" +" return;\n" +" for (int j=i+1;j