diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 120f596ff..8be2c3929 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -373,7 +373,7 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArraym_queue); + clFinish(m_data->m_queue); } @@ -416,7 +416,7 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArraym_queue); + clFinish(m_data->m_queue); } #ifdef DEBUG_ME @@ -838,7 +838,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); - //clFinish(m_data->m_queue); + //we use the clFinish for proper benchmark/profile + clFinish(m_data->m_queue); } @@ -900,7 +901,9 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem b3Printf("maxNumBatches = %d\n",maxNumBatches); } - //clFinish(m_data->m_queue); + //we use the clFinish for proper benchmark/profile + clFinish(m_data->m_queue); + } } @@ -925,7 +928,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem contactConstraintOut, additionalData, nContacts, (b3SolverBase::ConstraintCfg&) csCfg ); - //clFinish(m_data->m_queue); + clFinish(m_data->m_queue); } @@ -968,7 +971,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches); } - //clFinish(m_data->m_queue); + } diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 139795b1a..903e26f90 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -159,6 +159,7 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, { cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); + //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); b3Assert(batchingProg); m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl index 5087f23b0..f0efbbe43 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl @@ -216,19 +216,21 @@ __kernel void CreateBatches( __global const Contact4* gConstraints, __global Con if( aUsed==0 && bUsed==0 ) { - int aAvailable; - int bAvailable; + int aAvailable=1; + int bAvailable=1; int ea = abs(e.m_a); int eb = abs(e.m_b); - aAvailable = tryWrite( ldsCheckBuffer, ea ); - bAvailable = tryWrite( ldsCheckBuffer, eb ); - bool aStatic = (e.m_a<0) ||(ea==m_staticIdx); bool bStatic = (e.m_b<0) ||(eb==m_staticIdx); - aAvailable = aStatic? 1: aAvailable; - bAvailable = bStatic? 1: bAvailable; + if (!aStatic) + aAvailable = tryWrite( ldsCheckBuffer, ea ); + if (!bStatic) + bAvailable = tryWrite( ldsCheckBuffer, eb ); + + //aAvailable = aStatic? 1: aAvailable; + //bAvailable = bStatic? 1: bAvailable; bool success = (aAvailable && bAvailable); if(success) diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index c21a5fdaf..5118951b0 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -218,19 +218,21 @@ static const char* batchingKernelsCL= \ "\n" " if( aUsed==0 && bUsed==0 )\n" " {\n" -" int aAvailable;\n" -" int bAvailable;\n" +" int aAvailable=1;\n" +" int bAvailable=1;\n" " int ea = abs(e.m_a);\n" " int eb = abs(e.m_b);\n" "\n" -" aAvailable = tryWrite( ldsCheckBuffer, ea );\n" -" bAvailable = tryWrite( ldsCheckBuffer, eb );\n" -"\n" " bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n" " bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n" " \n" -" aAvailable = aStatic? 1: aAvailable;\n" -" bAvailable = bStatic? 1: bAvailable;\n" +" if (!aStatic)\n" +" aAvailable = tryWrite( ldsCheckBuffer, ea );\n" +" if (!bStatic)\n" +" bAvailable = tryWrite( ldsCheckBuffer, eb );\n" +" \n" +" //aAvailable = aStatic? 1: aAvailable;\n" +" //bAvailable = bStatic? 1: bAvailable;\n" "\n" " bool success = (aAvailable && bAvailable);\n" " if(success)\n"