diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp index e0bcdf52c..334dc792e 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp @@ -78,7 +78,7 @@ struct b3GpuPgsJacobiSolverInternalData b3AlignedObjectArray m_cpuConstraints; - + b3AlignedObjectArray m_batchSizes; }; @@ -194,11 +194,11 @@ struct b3BatchConstraint }; static b3AlignedObjectArray batchConstraints; -static b3AlignedObjectArray batches; + void b3GpuPgsConstraintSolver::recomputeBatches() { - batches.clear(); + m_gpuData->m_batchSizes.clear(); } @@ -288,7 +288,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue); } - if (batches.size()==0) + if (m_gpuData->m_batchSizes.size()==0) { B3_PROFILE("initBatchConstraintsKernel"); @@ -366,7 +366,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue); - if (batches.size()==0) + if (m_gpuData->m_batchSizes.size()==0) m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); //m_gpuData->m_gpuConstraintRows->copyToHost(verify); //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); @@ -574,7 +574,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - if (batches.size()==0) + if (m_gpuData->m_batchSizes.size()==0) m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); else m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); @@ -685,13 +685,13 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); - bool createBatches = batches.size()==0; + bool createBatches = m_gpuData->m_batchSizes.size()==0; { if (createBatches) { - batches.resize(0); + m_gpuData->m_batchSizes.resize(0); { m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); @@ -738,10 +738,10 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr int batchOffset = 0; int constraintOffset=0; - int numBatches = batches.size(); + int numBatches = m_gpuData->m_batchSizes.size(); for (int bb=0;bbm_batchSizes[bb]; if (useGpuSolveJointConstraintRows) @@ -967,7 +967,7 @@ inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3( b3BatchConstraint* } } } - batches.push_back(nCurrentBatch); + m_gpuData->m_batchSizes.push_back(nCurrentBatch); batchIdx ++; } } diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp index 6a32273be..19c4bad82 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp @@ -1,5 +1,5 @@ - +bool oneBigBatch = true; bool gCpuBatchContacts = false; bool gCpuSolveConstraint = false; bool gCpuRadixSort=false; @@ -59,6 +59,8 @@ struct b3GpuBatchingPgsSolverInternalData cl_kernel m_batchingKernel; cl_kernel m_batchingKernelNew; cl_kernel m_solveContactKernel; + cl_kernel m_solveSingleContactKernel; + cl_kernel m_solveSingleFrictionKernel; cl_kernel m_solveFrictionKernel; cl_kernel m_contactToConstraintKernel; cl_kernel m_setSortDataKernel; @@ -91,6 +93,10 @@ struct b3GpuBatchingPgsSolverInternalData b3AlignedObjectArray m_idxBuffer; b3AlignedObjectArray m_sortData; b3AlignedObjectArray m_old; + + b3AlignedObjectArray m_batchSizes; + b3OpenCLArray* m_batchSizesGpu; + }; @@ -104,7 +110,7 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, m_data->m_queue = q; m_data->m_pairCapacity = pairCapacity; m_data->m_nIterations = 4; - + m_data->m_batchSizesGpu = new b3OpenCLArray(ctx,q); m_data->m_bodyBufferGPU = new b3OpenCLArray(ctx,q); m_data->m_inertiaBufferGPU = new b3OpenCLArray(ctx,q); m_data->m_pBufContactOutGPU = new b3OpenCLArray(ctx,q); @@ -166,8 +172,14 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros ); b3Assert(m_data->m_solveFrictionKernel); - m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, 0, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros ); + m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros ); b3Assert(m_data->m_solveContactKernel); + + m_data->m_solveSingleContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "solveSingleContactKernel", &pErrNum, solveContactProg,additionalMacros ); + b3Assert(m_data->m_solveSingleContactKernel); + + m_data->m_solveSingleFrictionKernel =b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "solveSingleFrictionKernel", &pErrNum, solveFrictionProg,additionalMacros ); + b3Assert(m_data->m_solveSingleFrictionKernel); m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros ); b3Assert(m_data->m_contactToConstraintKernel); @@ -223,6 +235,7 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, b3GpuPgsContactSolver::~b3GpuPgsContactSolver() { + delete m_data->m_batchSizesGpu; delete m_data->m_bodyBufferGPU; delete m_data->m_inertiaBufferGPU; delete m_data->m_pBufContactOutGPU; @@ -244,7 +257,8 @@ b3GpuPgsContactSolver::~b3GpuPgsContactSolver() clReleaseKernel(m_data->m_batchingKernel); clReleaseKernel(m_data->m_batchingKernelNew); - + clReleaseKernel(m_data->m_solveSingleContactKernel); + clReleaseKernel(m_data->m_solveSingleFrictionKernel); clReleaseKernel( m_data->m_solveContactKernel); clReleaseKernel( m_data->m_solveFrictionKernel); @@ -279,9 +293,68 @@ struct b3ConstraintCfg +void b3GpuPgsContactSolver::solveContactConstraintBatchSizes( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray* batchSizes)//const b3OpenCLArray* gpuBatchSizes) +{ + B3_PROFILE("solveContactConstraintBatchSizes"); + int numBatches = batchSizes->size()/B3_MAX_NUM_BATCHES; + for(int iter=0; iterat(cellId*B3_MAX_NUM_BATCHES+ii); + if (!numInBatch) + break; + + { + b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleContactKernel,"m_solveSingleContactKernel" ); + launcher.setBuffer(bodyBuf->getBufferCL() ); + launcher.setBuffer(shapeBuf->getBufferCL() ); + launcher.setBuffer( constraint->getBufferCL() ); + launcher.setConst(cellId); + launcher.setConst(offset); + launcher.setConst(numInBatch); + launcher.launch1D(numInBatch); + offset+=numInBatch; + } + } + } + } + + + for(int iter=0; iterat(cellId*B3_MAX_NUM_BATCHES+ii); + if (!numInBatch) + break; + + { + b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleFrictionKernel,"m_solveSingleFrictionKernel" ); + launcher.setBuffer(bodyBuf->getBufferCL() ); + launcher.setBuffer(shapeBuf->getBufferCL() ); + launcher.setBuffer( constraint->getBufferCL() ); + launcher.setConst(cellId); + launcher.setConst(offset); + launcher.setConst(numInBatch); + launcher.launch1D(numInBatch); + offset+=numInBatch; + } + } + } + } +} void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations) + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray* batchSizes)//,const b3OpenCLArray* gpuBatchSizes) { //sort the contacts @@ -677,7 +750,8 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem int nContactOut = m_data->m_pBufContactOutGPU->size(); bool useSolver = true; - + + if (useSolver) { float dt=1./60.; @@ -697,6 +771,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem int maxNumBatches = 0; + if (!oneBigBatch) { if( m_data->m_solverGPU->m_contactBuffer2) @@ -989,7 +1064,23 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem int numNonzeroGrid=0; + if (oneBigBatch) { + m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); + int totalNumConstraints = cpuContacts.size(); + int simdWidth =numBodies+1;//-1;//64;//-1;//32; + int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU + maxNumBatches = b3Max(numBatches,maxNumBatches); + static int globalMaxBatch = 0; + if (maxNumBatches>globalMaxBatch ) + { + globalMaxBatch = maxNumBatches; + b3Printf("maxNumBatches = %d\n",maxNumBatches); + } + + } else + { + m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); B3_PROFILE("cpu batch grid"); for(int i=0; im_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) @@ -1023,19 +1114,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } - //printf("maxNumBatches = %d\n", maxNumBatches); - - if (nContacts) - { - B3_PROFILE("gpu convertToConstraints"); - m_data->m_solverGPU->convertToConstraints( bodyBuf, - shapeBuf, m_data->m_solverGPU->m_contactBuffer2, - contactConstraintOut, - additionalData, nContacts, - (b3SolverBase::ConstraintCfg&) csCfg ); - clFinish(m_data->m_queue); - } - + } @@ -1044,6 +1123,61 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } + //printf("maxNumBatches = %d\n", maxNumBatches); + + if (oneBigBatch) + { + if (nContacts) + { + B3_PROFILE("cpu batchContacts"); + static b3AlignedObjectArray cpuContacts; +// b3OpenCLArray* contactsIn = m_data->m_solverGPU->m_contactBuffer2; + { + B3_PROFILE("copyToHost"); + m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); + } + b3OpenCLArray* countsNative = m_data->m_solverGPU->m_numConstraints; + b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; + + + + int numNonzeroGrid=0; + + { + m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); + int totalNumConstraints = cpuContacts.size(); + int simdWidth =numBodies+1;//-1;//64;//-1;//32; + int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU + maxNumBatches = b3Max(numBatches,maxNumBatches); + static int globalMaxBatch = 0; + if (maxNumBatches>globalMaxBatch ) + { + globalMaxBatch = maxNumBatches; + b3Printf("maxNumBatches = %d\n",maxNumBatches); + } + + } + { + B3_PROFILE("m_contactBuffer->copyFromHost"); + m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray&)cpuContacts); + } + + } + + } + + if (nContacts) + { + B3_PROFILE("gpu convertToConstraints"); + m_data->m_solverGPU->convertToConstraints( bodyBuf, + shapeBuf, m_data->m_solverGPU->m_contactBuffer2, + contactConstraintOut, + additionalData, nContacts, + (b3SolverBase::ConstraintCfg&) csCfg ); + clFinish(m_data->m_queue); + } + + if (1) { int numIter = 4; @@ -1061,19 +1195,30 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem maxNumBatches); */ - solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches,numIter); + //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); + if (oneBigBatch) + { + solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, + m_data->m_inertiaBufferGPU, + m_data->m_contactCGPU,0, + nContactOut , + maxNumBatches,numIter,&m_data->m_batchSizes); + } else + { + solveContactConstraint( + m_data->m_bodyBufferGPU, + m_data->m_inertiaBufferGPU, + m_data->m_contactCGPU,0, + nContactOut , + maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); + } } else { B3_PROFILE("Host solveContactConstraint"); - m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches); + m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); } @@ -1402,7 +1547,7 @@ b3AlignedObjectArray bodyUsed; b3AlignedObjectArray curUsed; -inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) +inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies, int* batchSizes) { B3_PROFILE("sortConstraintByBatch3"); @@ -1453,6 +1598,8 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu { numIter++; int nCurrentBatch = 0; + batchSizes[batchIdx] = 0; + // clear flag for(int i=0; i=B3_MAX_NUM_BATCHES) + { + b3Error("batchIdx>=B3_MAX_NUM_BATCHES"); + b3Assert(0); + break; + } + + batchSizes[batchIdx] += nCurrentBatch; + batchIdx ++; + } } @@ -1528,6 +1687,8 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu } #endif + batchSizes[batchIdx] =0; + if (maxSwaps* bodyBuf, const b3OpenCLArray* shapeBuf, + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes);//const b3OpenCLArray* gpuBatchSizes); - void solveContactConstraint( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations); + void solveContactConstraint( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes);//const b3OpenCLArray* gpuBatchSizes); public: diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 1d313aded..b2bb087b4 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -360,25 +360,75 @@ void solveContact(b3GpuConstraint4& cs, struct SolveTask// : public ThreadPool::Task { SolveTask(b3AlignedObjectArray& bodies, b3AlignedObjectArray& shapes, b3AlignedObjectArray& constraints, - int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray* wgUsedBodies, int curWgidx) + int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray* wgUsedBodies, int curWgidx, b3AlignedObjectArray* batchSizes, int cellIndex) : m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ), m_solveFriction( true ),m_maxNumBatches(maxNumBatches), - m_wgUsedBodies(wgUsedBodies),m_curWgidx(curWgidx) + m_curWgidx(curWgidx), + m_batchSizes(batchSizes), + m_cellIndex(cellIndex) {} unsigned short int getType(){ return 0; } void run(int tIdx) { - b3AlignedObjectArray usedBodies; - //printf("run..............\n"); - - - for (int bb=0;bb=0; ic--) - //for(int ic=0; icat(m_cellIndex*B3_MAX_NUM_BATCHES+ii); + if (!numInBatch) + break; + + for (int jj=0;jj( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, + (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, + maxRambdaDt, minRambdaDt ); + } + else + { + float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; + float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + float sum = 0; + for(int j=0; j<4; j++) + { + sum +=m_constraints[i].m_appliedRambdaDt[j]; + } + frictionCoeff = 0.7f; + for(int j=0; j<4; j++) + { + maxRambdaDt[j] = frictionCoeff*sum; + minRambdaDt[j] = -maxRambdaDt[j]; + } + solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, + (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, + maxRambdaDt, minRambdaDt ); + + } + } + offset+=numInBatch; + + + } +/* for (int bb=0;bb=0; ic--) + for(int ic=0; icaIdx) - { - b3Assert(m_wgUsedBodies[w][aIdx]==0); - } - } - if (bodyB.m_invMass) - { - if (m_wgUsedBodies[w].size()>bIdx) - { - b3Assert(m_wgUsedBodies[w][bIdx]==0); - } - } - } - } - } - - - - if (bodyB.m_invMass) - { - b3Assert(usedBodies[bIdx]==0); - usedBodies[bIdx]++; - } - - if( !m_solveFriction ) { float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; @@ -461,13 +450,11 @@ struct SolveTask// : public ThreadPool::Task solveContact( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, maxRambdaDt, minRambdaDt ); - } else { float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; for(int j=0; j<4; j++) { @@ -479,31 +466,14 @@ struct SolveTask// : public ThreadPool::Task maxRambdaDt[j] = frictionCoeff*sum; minRambdaDt[j] = -maxRambdaDt[j]; } - - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, + solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, maxRambdaDt, minRambdaDt ); } } - - if (m_wgUsedBodies) - { - if (m_wgUsedBodies[m_curWgidx].size()& m_bodies; b3AlignedObjectArray& m_shapes; b3AlignedObjectArray& m_constraints; - b3AlignedObjectArray* m_wgUsedBodies; + b3AlignedObjectArray* m_batchSizes; + int m_cellIndex; int m_curWgidx; int m_start; int m_nConstraints; @@ -522,7 +493,7 @@ struct SolveTask// : public ThreadPool::Task void b3Solver::solveContactConstraintHost( b3OpenCLArray* bodyBuf, b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches) + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches,b3AlignedObjectArray* batchSizes) { #if 0 @@ -634,7 +605,7 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray* bodyBu int numConstraintsInCell = numConstraintsHost[cellIdx]; const int end = start + numConstraintsInCell; - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx); + SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx,batchSizes,cellIdx); task.m_solveFriction = false; task.run(0); @@ -670,7 +641,7 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray* bodyBu int numConstraintsInCell = numConstraintsHost[cellIdx]; const int end = start + numConstraintsInCell; - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0); + SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0,batchSizes,cellIdx); task.m_solveFriction = true; task.run(0); @@ -683,14 +654,14 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray* bodyBu { for(int iter=0; iter* constraint, void* additionalData, int n ,int maxNumBatches); void solveContactConstraintHost( b3OpenCLArray* bodyBuf, b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches); + b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches, b3AlignedObjectArray* batchSizes); void convertToConstraints( const b3OpenCLArray* bodyBuf, diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl index a8d4e1c08..19c7f4d37 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl @@ -476,3 +476,22 @@ void BatchSolveKernelContact(__global Body* gBodies, } + + + +__kernel void solveSingleContactKernel(__global Body* gBodies, + __global Shape* gShapes, + __global Constraint4* gConstraints, + int cellIdx, + int batchOffset, + int numConstraintsInBatch + ) +{ + + int index = get_global_id(0); + if (index < numConstraintsInBatch) + { + int idx=batchOffset+index; + solveContactConstraint( gBodies, gShapes, &gConstraints[idx] ); + } +} diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h index cb836040d..61634326d 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h @@ -371,4 +371,19 @@ static const char* solveContactCL= \ " \n" " \n" "}\n" +"__kernel void solveSingleContactKernel(__global Body* gBodies,\n" +" __global Shape* gShapes,\n" +" __global Constraint4* gConstraints,\n" +" int cellIdx,\n" +" int batchOffset,\n" +" int numConstraintsInBatch\n" +" )\n" +"{\n" +" int index = get_global_id(0);\n" +" if (index < numConstraintsInBatch)\n" +" {\n" +" int idx=batchOffset+index;\n" +" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" +" } \n" +"}\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl index b4181b52c..f64e5252b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl @@ -498,3 +498,27 @@ void BatchSolveKernelFriction(__global Body* gBodies, } + + + + + + +__kernel void solveSingleFrictionKernel(__global Body* gBodies, + __global Shape* gShapes, + __global Constraint4* gConstraints, + int cellIdx, + int batchOffset, + int numConstraintsInBatch + ) +{ + + int index = get_global_id(0); + if (index < numConstraintsInBatch) + { + + int idx=batchOffset+index; + + solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] ); + } +} \ No newline at end of file diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h index 02d31f0d0..a4804f64f 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h @@ -399,4 +399,21 @@ static const char* solveFrictionCL= \ " \n" " \n" "}\n" +"__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n" +" __global Shape* gShapes,\n" +" __global Constraint4* gConstraints,\n" +" int cellIdx,\n" +" int batchOffset,\n" +" int numConstraintsInBatch\n" +" )\n" +"{\n" +" int index = get_global_id(0);\n" +" if (index < numConstraintsInBatch)\n" +" {\n" +" \n" +" int idx=batchOffset+index;\n" +" \n" +" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" +" } \n" +"}\n" ;