From 1185de51d53df25cefe08e65f28ed9664e769272 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Fri, 3 May 2013 01:14:34 -0700 Subject: [PATCH] experiment with first-level batching using object index instead of spatial hash in uniform grid (to avoid tuning average object size for uniform grid) --- Demos3/BasicGpuDemo/BasicGpuDemo.cpp | 10 +-- .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 1 + .../RigidBody/b3GpuBatchingPgsSolver.cpp | 12 ++- src/Bullet3OpenCL/RigidBody/b3Solver.cpp | 75 +++++++++++++++++++ src/Bullet3OpenCL/RigidBody/b3Solver.h | 2 +- .../RigidBody/kernels/solverSetup2.cl | 63 +++++++++++++++- .../RigidBody/kernels/solverSetup2.h | 63 +++++++++++++++- 7 files changed, 211 insertions(+), 15 deletions(-) diff --git a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp index bca6dc85c..73dc48cc0 100644 --- a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp +++ b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp @@ -15,9 +15,9 @@ subject to the following restrictions: ///create 125 (5x5x5) dynamic object -#define ARRAY_SIZE_X 5 -#define ARRAY_SIZE_Y 5 -#define ARRAY_SIZE_Z 5 +#define ARRAY_SIZE_X 30 +#define ARRAY_SIZE_Y 20 +#define ARRAY_SIZE_Z 30 //maximum number of objects (and allow user to shoot additional boxes) #define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024) @@ -164,7 +164,7 @@ void BasicGpuDemo::exitCL() BasicGpuDemo::BasicGpuDemo() { m_clData = new btInternalData; - setCameraDistance(btScalar(SCALING*20.)); + setCameraDistance(btScalar(SCALING*120.)); this->setAzi(45); this->setEle(45); @@ -222,7 +222,7 @@ void BasicGpuDemo::initPhysics() m_dynamicsWorld->setGravity(btVector3(0,-10,0)); ///create a few basic rigid bodies - btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.))); + btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(150.),btScalar(50.),btScalar(150.))); //groundShape->initializePolyhedralFeatures(); // btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),50); diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index e052cfa05..08fe726cb 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -119,6 +119,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci) setupScene(ci); + m_data->m_rigidBodyPipeline->writeAllInstancesToGpu(); np->writeAllBodiesToGpu(); bp->writeAabbsToGpu(); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index dbffe25b9..7ce6b8f85 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -424,7 +424,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem float dt=1./60.; b3ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; - csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent; + csCfg.m_averageExtent = 0.3;//0.1;//2;//.2f;//@TODO m_averageObjExtent; csCfg.m_staticIdx = static0Index; @@ -516,8 +516,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (gpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); - int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - int sortBit = 32; + //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; + //int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); @@ -581,6 +581,12 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem clFinish(m_data->m_queue); +// { +// b3AlignedObjectArray histogram; +// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); +// printf(",,,\n"); +// } + if (nContacts) { diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 70e67659f..2f4390219 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -551,6 +551,75 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray* bodyBu } +void checkConstraintBatch(const b3OpenCLArray* bodyBuf, + const b3OpenCLArray* shapeBuf, + b3OpenCLArray* constraint, + b3OpenCLArray* m_numConstraints, + b3OpenCLArray* m_offsets, + int batchId + ) +{ +// b3BufferInfoCL( m_numConstraints->getBufferCL() ), +// b3BufferInfoCL( m_offsets->getBufferCL() ) + + const int nn = b3SolverBase::N_SPLIT*b3SolverBase::N_SPLIT; + int numWorkItems = 64*nn/b3SolverBase::N_BATCHES; + + b3AlignedObjectArray gN; + m_numConstraints->copyToHost(gN); + b3AlignedObjectArray gOffsets; + m_offsets->copyToHost(gOffsets); + int nSplit = b3SolverBase::N_SPLIT; + int bIdx = batchId; + + b3AlignedObjectArray cpuConstraints; + constraint->copyToHost(cpuConstraints); + + printf("batch = %d\n", batchId); + + int numWorkgroups = nn/b3SolverBase::N_BATCHES; + b3AlignedObjectArray usedBodies; + + + for (int wgIdx=0;wgIdx>1); + int cellIdx = xIdx+yIdx*nSplit; + printf("cellIdx=%d\n",cellIdx); + if( gN[cellIdx] == 0 ) + continue; + + const int start = gOffsets[cellIdx]; + const int end = start + gN[cellIdx]; + + for (int c=start;c* bodyBuf, const b3OpenCLArray* shapeBuf, b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches) { @@ -580,6 +649,12 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray* body { for(int ib=0; ib