diff --git a/build/premake4.lua b/build/premake4.lua index 02819d144..78db145d5 100644 --- a/build/premake4.lua +++ b/build/premake4.lua @@ -97,10 +97,10 @@ -- include "../opencl/vector_add" include "../opencl/basic_initialize" include "../demo/gpu_initialize" --- include "../opencl/parallel_primitives/host" --- include "../opencl/parallel_primitives/test" --- include "../opencl/parallel_primitives/benchmark" --- include "../opencl/lds_bank_conflict" + include "../opencl/parallel_primitives/host" + include "../opencl/parallel_primitives/test" + include "../opencl/parallel_primitives/benchmark" + include "../opencl/lds_bank_conflict" -- include "../opencl/reduce" -- include "../opencl/gpu_broadphase/test" -- include "../opencl/gpu_narrowphase/test" diff --git a/demo/gpudemo/main_opengl3core.cpp b/demo/gpudemo/main_opengl3core.cpp index 315f1abeb..ec97fccc1 100644 --- a/demo/gpudemo/main_opengl3core.cpp +++ b/demo/gpudemo/main_opengl3core.cpp @@ -67,9 +67,6 @@ GpuDemo::CreateFunc* allDemos[]= { // ConcaveCompound2Scene::MyCreateFunc, - ConcaveCompoundScene::MyCreateFunc, - - GpuCompoundPlaneScene::MyCreateFunc, GpuBoxPlaneScene::MyCreateFunc, @@ -84,10 +81,14 @@ GpuDemo::CreateFunc* allDemos[]= ConcaveScene::MyCreateFunc, - GpuSphereScene::MyCreateFunc, - + + ConcaveCompoundScene::MyCreateFunc, + + GpuCompoundPlaneScene::MyCreateFunc, + + GpuSphereScene::MyCreateFunc, PairBench::MyCreateFunc, diff --git a/demo/gpudemo/rigidbody/ConcaveScene.cpp b/demo/gpudemo/rigidbody/ConcaveScene.cpp index cf32e4de9..a077306db 100644 --- a/demo/gpudemo/rigidbody/ConcaveScene.cpp +++ b/demo/gpudemo/rigidbody/ConcaveScene.cpp @@ -549,7 +549,7 @@ void ConcaveCompound2Scene::createDynamicObjects(const ConstructionInfo& ci) float mass = 1;//j==0? 0.f : 1.f; //b3Vector3 position(i*10*ci.gapX,j*ci.gapY,k*10*ci.gapZ); - b3Vector3 position(i*10*ci.gapX,50+j*ci.gapY,k*10*ci.gapZ); + b3Vector3 position(i*10*ci.gapX,10+j*ci.gapY,k*10*ci.gapZ); // b3Quaternion orn(0,0,0,1); b3Quaternion orn(b3Vector3(0,0,1),1.8); @@ -665,7 +665,7 @@ b3Vector3 childPositions[3] = { { float mass = 1;//j==0? 0.f : 1.f; - b3Vector3 position(i*ci.gapX,150+j*ci.gapY,k*ci.gapZ); + b3Vector3 position(i*ci.gapX,50+j*ci.gapY,k*ci.gapZ); //b3Quaternion orn(0,0,0,1); b3Quaternion orn(b3Vector3(1,0,0),0.7); diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp index f110190d2..e22bfba74 100644 --- a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp +++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp @@ -158,18 +158,22 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() if (animate && numObjects) { BT_PROFILE("gl2cl"); - GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; - int arraySizeInBytes = numObjects * (3)*sizeof(btVector4); - glBindBuffer(GL_ARRAY_BUFFER, vbo); - cl_bool blocking= CL_TRUE; - positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY - GLint err = glGetError(); - assert(err==GL_NO_ERROR); + if (!m_data->m_instancePosOrnColor) { + GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; + int arraySizeInBytes = numObjects * (3)*sizeof(btVector4); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + cl_bool blocking= CL_TRUE; + positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + GLint err = glGetError(); + assert(err==GL_NO_ERROR); m_data->m_instancePosOrnColor = new btOpenCLArray(m_clData->m_clContext,m_clData->m_clQueue); m_data->m_instancePosOrnColor->resize(3*numObjects); m_data->m_instancePosOrnColor->copyFromHostPointer(positions,3*numObjects,0); + glUnmapBuffer( GL_ARRAY_BUFFER); + err = glGetError(); + assert(err==GL_NO_ERROR); } } @@ -196,6 +200,13 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() BT_PROFILE("cl2gl_upload"); GLint err = glGetError(); assert(err==GL_NO_ERROR); + GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; + int arraySizeInBytes = numObjects * (3)*sizeof(btVector4); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + cl_bool blocking= CL_TRUE; + positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + err = glGetError(); + assert(err==GL_NO_ERROR); m_data->m_instancePosOrnColor->copyToHostPointer(positions,3*numObjects,0); glUnmapBuffer( GL_ARRAY_BUFFER); err = glGetError(); diff --git a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp index a4163a704..d87c38d30 100644 --- a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp +++ b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp @@ -591,7 +591,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (gpuBatchContacts) { BT_PROFILE("gpu batchContacts"); - maxNumBatches = 25;//250; + maxNumBatches = 50;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); } else { @@ -629,10 +629,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem int simdWidth =64;//-1;//32; + //int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU maxNumBatches = btMax(numBatches,maxNumBatches); + static int globalMaxBatch = 0; + if (maxNumBatches>globalMaxBatch ) + { + globalMaxBatch = maxNumBatches; + printf("maxNumBatches = %d\n",maxNumBatches); + } clFinish(m_data->m_queue); @@ -724,7 +731,7 @@ static bool sortfnc(const btSortData& a,const btSortData& b) -b3AlignedObjectArray bodyUsed; + @@ -736,11 +743,7 @@ b3AlignedObjectArray old; inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) { - b3AlignedObjectArray bodyUsed; - bodyUsed.resize(numBodies); - for (int q=0;q bodyUsed2; + inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { - BT_PROFILE("sortConstraintByBatch"); + BT_PROFILE("sortConstraintByBatch2"); - - bodyUsed.resize(2*simdWidth); + + bodyUsed2.resize(2*simdWidth); for (int q=0;q<2*simdWidth;q++) - bodyUsed[q]=0; + bodyUsed2[q]=0; int curBodyUsed = 0; @@ -905,7 +911,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n int nCurrentBatch = 0; // clear flag for(int i=0; i bodyUsed; +b3AlignedObjectArray curUsed; + + inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { - BT_PROFILE("sortConstraintByBatch"); + BT_PROFILE("sortConstraintByBatch3"); static int maxSwaps = 0; int numSwaps = 0; + curUsed.resize(2*simdWidth); + static int maxNumConstraints = 0; if (maxNumConstraintsm_pBufContactOutGPU->getBufferCL(); } +const b3Contact4* b3GpuNarrowPhase::getContactsCPU() const +{ + m_data->m_pBufContactOutGPU->copyToHost(*m_data->m_pBufContactOutCPU); + return &m_data->m_pBufContactOutCPU->at(0); +} void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects) { diff --git a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h index c5a3f78b8..467de5278 100644 --- a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h +++ b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h @@ -64,6 +64,9 @@ public: cl_mem getCollidablesGpu(); int getNumCollidablesGpu() const; + + const struct b3Contact4* getContactsCPU() const; + cl_mem getContactsGpu(); int getNumContactsGpu() const; diff --git a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp index 34016b272..a848189f5 100644 --- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp +++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp @@ -23,6 +23,8 @@ #include "Bullet3Common/b3Quickprof.h" #include "b3Config.h" +bool dumpContactStats = false; + b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap ) { m_data = new b3GpuRigidBodyPipelineInternalData; @@ -95,6 +97,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) int numPairs = m_data->m_broadphaseSap->getNumOverlap(); int numContacts = 0; + int numBodies = m_data->m_narrowphase->getNumBodiesGpu(); if (numPairs) @@ -105,8 +108,23 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies); numContacts = m_data->m_narrowphase->getNumContactsGpu(); - //if (numContacts) - // printf("numContacts = %d\n", numContacts); + + if (dumpContactStats && numContacts) + { + m_data->m_narrowphase->getContactsGpu(); + + printf("numContacts = %d\n", numContacts); + + int totalPoints = 0; + const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU(); + + for (int i=0;igetNPoints(); + } + printf("totalPoints=%d\n",totalPoints); + + } } diff --git a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp index 40b9a3f46..6f9e089c0 100644 --- a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp +++ b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp @@ -641,7 +641,7 @@ extern bool gDebugSkipLoadingBinary; int main( int argc, char** argv) { - gDebugSkipLoadingBinary = true; + //gDebugSkipLoadingBinary = true; cl_int ciErrNum; b3CommandLineArgs args(argc,argv);