add some command-line options
--use_jacobi --allow_opencl_cpu b3LauncherCL constructor takes string, to make it easier to determine failing OpenCL kernel b3SetCustomErrorMessageFunc, printf error and exit(0)
This commit is contained in:
@@ -790,7 +790,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
|
||||
{
|
||||
B3_PROFILE("m_countBodiesKernel");
|
||||
b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
|
||||
b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel,"m_countBodiesKernel");
|
||||
launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
|
||||
@@ -815,7 +815,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
|
||||
{
|
||||
B3_PROFILE("contactToConstraintSplitKernel");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
|
||||
b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel,"m_contactToConstraintSplitKernel");
|
||||
launcher.setBuffer(contactBuf);
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(inertiaBuf);
|
||||
@@ -840,11 +840,12 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
|
||||
{
|
||||
B3_PROFILE("m_clearVelocitiesKernel");
|
||||
b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
|
||||
b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel,"m_clearVelocitiesKernel");
|
||||
launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
|
||||
launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
|
||||
launch.setConst(totalNumSplitBodies);
|
||||
launch.launch1D(totalNumSplitBodies);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
@@ -854,7 +855,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
{
|
||||
{
|
||||
B3_PROFILE("m_solveContactKernel");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel );
|
||||
b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" );
|
||||
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(inertiaBuf);
|
||||
@@ -869,14 +870,14 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
launcher.setConst(numManifolds);
|
||||
|
||||
launcher.launch1D(numManifolds);
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("average velocities");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
|
||||
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel");
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
|
||||
@@ -884,13 +885,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
|
||||
launcher.setConst(numBodies);
|
||||
launcher.launch1D(numBodies);
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("m_solveFrictionKernel");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
|
||||
b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel");
|
||||
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(inertiaBuf);
|
||||
@@ -905,13 +906,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
launcher.setConst(numManifolds);
|
||||
|
||||
launcher.launch1D(numManifolds);
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("average velocities");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
|
||||
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel");
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
|
||||
@@ -919,7 +920,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
|
||||
launcher.setConst(numBodies);
|
||||
launcher.launch1D(numBodies);
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
@@ -929,7 +930,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
|
||||
|
||||
{
|
||||
B3_PROFILE("update body velocities");
|
||||
b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
|
||||
b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel,"m_updateBodyVelocitiesKernel");
|
||||
launcher.setBuffer(bodyBuf);
|
||||
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
|
||||
|
||||
@@ -231,7 +231,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
||||
{
|
||||
B3_PROFILE("m_initSolverBodiesKernel");
|
||||
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel,"m_initSolverBodiesKernel");
|
||||
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
|
||||
launcher.setBuffer(gpuBodies->getBufferCL());
|
||||
launcher.setConst(numBodies);
|
||||
@@ -280,7 +280,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
||||
{
|
||||
B3_PROFILE("getInfo1Kernel");
|
||||
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel,"m_getInfo1Kernel");
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
|
||||
launcher.setBuffer(gpuConstraints->getBufferCL());
|
||||
launcher.setConst(numConstraints);
|
||||
@@ -300,7 +300,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
||||
|
||||
{
|
||||
B3_PROFILE("init batch constraints");
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel,"m_initBatchConstraintsKernel");
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
|
||||
@@ -348,7 +348,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
||||
{
|
||||
{
|
||||
B3_PROFILE("getInfo2Kernel");
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel,"m_getInfo2Kernel");
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
|
||||
@@ -759,7 +759,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr
|
||||
int numConstraintsInBatch*/
|
||||
|
||||
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels,"m_solveJointConstraintRowsKernels");
|
||||
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
|
||||
@@ -1040,7 +1040,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b
|
||||
if (gpuBreakConstraints)
|
||||
{
|
||||
B3_PROFILE("breakViolatedConstraintsKernel");
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel,"m_breakViolatedConstraintsKernel");
|
||||
launcher.setBuffer(gpuConstraints->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
|
||||
@@ -1090,7 +1090,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b
|
||||
{
|
||||
B3_PROFILE("GPU write back velocities and transforms");
|
||||
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel);
|
||||
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel,"m_writeBackVelocitiesKernel");
|
||||
launcher.setBuffer(gpuBodies->getBufferCL());
|
||||
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
|
||||
launcher.setConst(numBodies);
|
||||
|
||||
@@ -1,32 +1,16 @@
|
||||
|
||||
//#define USE_CPU
|
||||
#ifdef USE_CPU
|
||||
bool b3GpuBatchContacts = false;
|
||||
bool b3GpuSolveConstraint = false;
|
||||
bool gpuRadixSort=false;
|
||||
bool gpuSetSortData = false;
|
||||
|
||||
bool optionalSortContactsDeterminism = true;
|
||||
bool gpuSortContactsDeterminism = false;
|
||||
bool useCpuCopyConstraints = true;
|
||||
bool gCpuBatchContacts = false;
|
||||
bool gCpuSolveConstraint = false;
|
||||
bool gCpuRadixSort=false;
|
||||
bool gCpuSetSortData = false;
|
||||
bool gCpuSortContactsDeterminism = false;
|
||||
bool gUseCpuCopyConstraints = false;
|
||||
bool gUseScanHost = false;
|
||||
bool gReorderContactsOnCpu = false;
|
||||
|
||||
bool useScanHost = true;
|
||||
bool reorderContactsOnCpu = true;
|
||||
bool optionalSortContactsDeterminism = true;
|
||||
|
||||
#else
|
||||
bool b3GpuBatchContacts = true;
|
||||
bool b3GpuSolveConstraint = true;
|
||||
bool gpuRadixSort=true;
|
||||
bool gpuSetSortData = true;
|
||||
|
||||
bool optionalSortContactsDeterminism = true;
|
||||
bool gpuSortContactsDeterminism = true;
|
||||
bool useCpuCopyConstraints = false;
|
||||
|
||||
bool useScanHost = false;
|
||||
bool reorderContactsOnCpu = false;
|
||||
|
||||
#endif
|
||||
|
||||
#include "b3GpuPgsContactSolver.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||
@@ -336,7 +320,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3Rigid
|
||||
cdata.z = ib;
|
||||
|
||||
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel );
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" );
|
||||
#if 1
|
||||
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
@@ -441,7 +425,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3Rigid
|
||||
,b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel );
|
||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
@@ -598,7 +582,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
if (optionalSortContactsDeterminism)
|
||||
{
|
||||
if (gpuSortContactsDeterminism)
|
||||
if (!gCpuSortContactsDeterminism)
|
||||
{
|
||||
B3_PROFILE("GPU Sort contact constraints (determinism)");
|
||||
|
||||
@@ -608,7 +592,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);
|
||||
|
||||
{
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||
launcher.setConst(numContacts);
|
||||
@@ -616,7 +600,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
|
||||
{
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||
launcher.setConst(numContacts);
|
||||
@@ -624,7 +608,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
|
||||
{
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||
launcher.setConst(numContacts);
|
||||
@@ -634,7 +618,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
|
||||
|
||||
{
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||
launcher.setConst(numContacts);
|
||||
@@ -651,7 +635,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||
// , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||
@@ -755,7 +739,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
|
||||
|
||||
if (gpuSetSortData)
|
||||
if (!gCpuSetSortData)
|
||||
{ // 2. set cell idx
|
||||
B3_PROFILE("GPU set cell idx");
|
||||
struct CB
|
||||
@@ -779,7 +763,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata.m_nContacts );
|
||||
launcher.setConst( cdata.m_scale );
|
||||
@@ -812,7 +796,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
|
||||
|
||||
if (gpuRadixSort)
|
||||
if (!gCpuRadixSort)
|
||||
{ // 3. sort by cell idx
|
||||
B3_PROFILE("gpuRadixSort");
|
||||
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||
@@ -836,7 +820,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
|
||||
|
||||
if (useScanHost)
|
||||
if (gUseScanHost)
|
||||
{
|
||||
// 4. find entries
|
||||
B3_PROFILE("cpuBoundSearch");
|
||||
@@ -879,7 +863,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
if (nContacts)
|
||||
{ // 5. sort constraints by cellIdx
|
||||
if (reorderContactsOnCpu)
|
||||
if (gReorderContactsOnCpu)
|
||||
{
|
||||
B3_PROFILE("cpu m_reorderContactKernel");
|
||||
b3AlignedObjectArray<b3SortData> sortDataHost;
|
||||
@@ -918,7 +902,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||
, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
@@ -944,7 +928,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (nContacts)
|
||||
{
|
||||
|
||||
if (useCpuCopyConstraints)
|
||||
if (gUseCpuCopyConstraints)
|
||||
{
|
||||
for (int i=0;i<nContacts;i++)
|
||||
{
|
||||
@@ -962,7 +946,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
@@ -975,7 +959,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
bool compareGPU = false;
|
||||
if (nContacts)
|
||||
{
|
||||
if (b3GpuBatchContacts)
|
||||
if (!gCpuBatchContacts)
|
||||
{
|
||||
B3_PROFILE("gpu batchContacts");
|
||||
maxNumBatches = 150;//250;
|
||||
@@ -984,10 +968,12 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
} else
|
||||
{
|
||||
B3_PROFILE("cpu batchContacts");
|
||||
b3AlignedObjectArray<b3Contact4> cpuContacts;
|
||||
static b3AlignedObjectArray<b3Contact4> cpuContacts;
|
||||
b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
|
||||
{
|
||||
B3_PROFILE("copyToHost");
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
}
|
||||
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
|
||||
@@ -1025,7 +1011,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
}
|
||||
}
|
||||
clFinish(m_data->m_queue);
|
||||
//clFinish(m_data->m_queue);
|
||||
}
|
||||
{
|
||||
B3_PROFILE("m_contactBuffer->copyFromHost");
|
||||
@@ -1063,7 +1049,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
int numIter = 4;
|
||||
|
||||
m_data->m_solverGPU->m_nIterations = numIter;//10
|
||||
if (b3GpuSolveConstraint)
|
||||
if (!gCpuSolveConstraint)
|
||||
{
|
||||
B3_PROFILE("GPU solveContactConstraint");
|
||||
|
||||
|
||||
@@ -33,27 +33,15 @@ subject to the following restrictions:
|
||||
#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
|
||||
#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
|
||||
|
||||
bool useBullet2CpuSolver = true;
|
||||
|
||||
//choice of contact solver
|
||||
bool useJacobi = false;
|
||||
|
||||
//#define USE_CPU
|
||||
#ifdef USE_CPU
|
||||
bool useDbvt = true;
|
||||
bool useBullet2CpuSolver = true;
|
||||
bool dumpContactStats = false;
|
||||
bool calcWorldSpaceAabbOnCpu = true;
|
||||
bool useCalculateOverlappingPairsHost = true;
|
||||
bool integrateOnCpu = true;
|
||||
|
||||
#else
|
||||
bool useDbvt = false;
|
||||
bool useBullet2CpuSolver = true;
|
||||
bool dumpContactStats = false;
|
||||
bool calcWorldSpaceAabbOnCpu = false;//true;
|
||||
bool useCalculateOverlappingPairsHost = false;
|
||||
bool integrateOnCpu = false;
|
||||
|
||||
#endif
|
||||
bool gUseJacobi = false;
|
||||
bool gUseDbvt = false;
|
||||
bool gDumpContactStats = false;
|
||||
bool gCalcWorldSpaceAabbOnCpu = false;
|
||||
bool gUseCalculateOverlappingPairsHost = false;
|
||||
bool gIntegrateOnCpu = false;
|
||||
|
||||
#define TEST_OTHER_GPU_SOLVER 1
|
||||
#ifdef TEST_OTHER_GPU_SOLVER
|
||||
@@ -241,7 +229,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
//compute overlapping pairs
|
||||
{
|
||||
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
{
|
||||
B3_PROFILE("setAabb");
|
||||
@@ -261,7 +249,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
|
||||
} else
|
||||
{
|
||||
if (useCalculateOverlappingPairsHost)
|
||||
if (gUseCalculateOverlappingPairsHost)
|
||||
{
|
||||
m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
|
||||
} else
|
||||
@@ -284,7 +272,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
{
|
||||
cl_mem pairs =0;
|
||||
cl_mem aabbsWS =0;
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
|
||||
m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
|
||||
@@ -300,13 +288,13 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies);
|
||||
numContacts = m_data->m_narrowphase->getNumContactsGpu();
|
||||
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
///store the cached information (contact locations in the 'z' component)
|
||||
B3_PROFILE("m_overlappingPairsGPU->copyToHost");
|
||||
m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
|
||||
}
|
||||
if (dumpContactStats && numContacts)
|
||||
if (gDumpContactStats && numContacts)
|
||||
{
|
||||
m_data->m_narrowphase->getContactsGpu();
|
||||
|
||||
@@ -369,7 +357,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
|
||||
#ifdef TEST_OTHER_GPU_SOLVER
|
||||
|
||||
if (useJacobi)
|
||||
if (gUseJacobi)
|
||||
{
|
||||
bool useGpu = true;
|
||||
if (useGpu)
|
||||
@@ -453,7 +441,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep)
|
||||
int numBodies = m_data->m_narrowphase->getNumRigidBodies();
|
||||
float angularDamp = 0.99f;
|
||||
|
||||
if (integrateOnCpu)
|
||||
if (gIntegrateOnCpu)
|
||||
{
|
||||
if(numBodies)
|
||||
{
|
||||
@@ -470,7 +458,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep)
|
||||
}
|
||||
} else
|
||||
{
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel,"m_integrateTransformsKernel");
|
||||
launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
|
||||
|
||||
launcher.setConst(numBodies);
|
||||
@@ -492,12 +480,12 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
|
||||
if (!numBodies)
|
||||
return;
|
||||
|
||||
if (calcWorldSpaceAabbOnCpu)
|
||||
if (gCalcWorldSpaceAabbOnCpu)
|
||||
{
|
||||
|
||||
if (numBodies)
|
||||
{
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
m_data->m_allAabbsCPU.resize(numBodies);
|
||||
m_data->m_narrowphase->readbackAllBodiesToCpu();
|
||||
@@ -521,7 +509,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
|
||||
} else
|
||||
{
|
||||
//__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel);
|
||||
b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel,"m_updateAabbsKernel");
|
||||
launcher.setConst(numBodies);
|
||||
cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
|
||||
launcher.setBuffer(bodies);
|
||||
@@ -531,7 +519,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
|
||||
launcher.setBuffer(localAabbs);
|
||||
|
||||
cl_mem worldAabbs =0;
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
|
||||
} else
|
||||
@@ -624,7 +612,7 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po
|
||||
|
||||
if (bodyIndex>=0)
|
||||
{
|
||||
if (useDbvt)
|
||||
if (gUseDbvt)
|
||||
{
|
||||
m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1);
|
||||
b3SapAabb aabb;
|
||||
|
||||
@@ -18,7 +18,7 @@ subject to the following restrictions:
|
||||
|
||||
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
|
||||
bool useNewBatchingKernel = true;
|
||||
bool convertConstraintOnCpu = false;
|
||||
bool gConvertConstraintOnCpu = false;
|
||||
|
||||
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
|
||||
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"
|
||||
@@ -824,7 +824,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
||||
cdata.z = ib;
|
||||
|
||||
|
||||
b3LauncherCL launcher( m_queue, m_solveContactKernel );
|
||||
b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel");
|
||||
#if 1
|
||||
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
@@ -929,7 +929,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
||||
,b3BufferInfoCL(&gpuDebugInfo)
|
||||
#endif //DEBUG_ME
|
||||
};
|
||||
b3LauncherCL launcher( m_queue, m_solveFrictionKernel );
|
||||
b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata.x );
|
||||
launcher.setConst( cdata.y );
|
||||
@@ -979,7 +979,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
|
||||
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
||||
|
||||
|
||||
if (convertConstraintOnCpu)
|
||||
if (gConvertConstraintOnCpu)
|
||||
{
|
||||
b3AlignedObjectArray<b3RigidBodyCL> gBodies;
|
||||
bodyBuf->copyToHost(gBodies);
|
||||
@@ -1031,7 +1031,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
|
||||
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
|
||||
b3BufferInfoCL( contactCOut->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel );
|
||||
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
//launcher.setConst( cdata );
|
||||
|
||||
@@ -1169,7 +1169,7 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
|
||||
//b3LauncherCL launcher( m_queue, m_batchingKernel);
|
||||
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
|
||||
|
||||
b3LauncherCL launcher( m_queue, k);
|
||||
b3LauncherCL launcher( m_queue, k,"*batchingKernel");
|
||||
if (!useNewBatchingKernel )
|
||||
{
|
||||
launcher.setBuffer( contacts->getBufferCL() );
|
||||
|
||||
Reference in New Issue
Block a user