added a variation of the constraint solver, that works on CPU OpenCL (oneBigBatch), one stage (batching) happens on CPU for this mode
This commit is contained in:
committed by
erwincoumans
parent
1d5c651753
commit
8a7ad65177
@@ -78,7 +78,7 @@ struct b3GpuPgsJacobiSolverInternalData
|
|||||||
|
|
||||||
b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints;
|
b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints;
|
||||||
|
|
||||||
|
b3AlignedObjectArray<int> m_batchSizes;
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
@@ -194,11 +194,11 @@ struct b3BatchConstraint
|
|||||||
};
|
};
|
||||||
|
|
||||||
static b3AlignedObjectArray<b3BatchConstraint> batchConstraints;
|
static b3AlignedObjectArray<b3BatchConstraint> batchConstraints;
|
||||||
static b3AlignedObjectArray<int> batches;
|
|
||||||
|
|
||||||
void b3GpuPgsConstraintSolver::recomputeBatches()
|
void b3GpuPgsConstraintSolver::recomputeBatches()
|
||||||
{
|
{
|
||||||
batches.clear();
|
m_gpuData->m_batchSizes.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -288,7 +288,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
|||||||
clFinish(m_gpuData->m_queue);
|
clFinish(m_gpuData->m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batches.size()==0)
|
if (m_gpuData->m_batchSizes.size()==0)
|
||||||
{
|
{
|
||||||
B3_PROFILE("initBatchConstraintsKernel");
|
B3_PROFILE("initBatchConstraintsKernel");
|
||||||
|
|
||||||
@@ -366,7 +366,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
|||||||
launcher.launch1D(numConstraints);
|
launcher.launch1D(numConstraints);
|
||||||
clFinish(m_gpuData->m_queue);
|
clFinish(m_gpuData->m_queue);
|
||||||
|
|
||||||
if (batches.size()==0)
|
if (m_gpuData->m_batchSizes.size()==0)
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
|
//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
|
||||||
//m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
|
//m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
|
||||||
@@ -574,7 +574,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
|
|||||||
m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
|
m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
|
||||||
m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);
|
m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);
|
||||||
|
|
||||||
if (batches.size()==0)
|
if (m_gpuData->m_batchSizes.size()==0)
|
||||||
m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
|
||||||
else
|
else
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
@@ -685,13 +685,13 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr
|
|||||||
//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
|
//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
|
||||||
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
|
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
|
||||||
|
|
||||||
bool createBatches = batches.size()==0;
|
bool createBatches = m_gpuData->m_batchSizes.size()==0;
|
||||||
{
|
{
|
||||||
|
|
||||||
if (createBatches)
|
if (createBatches)
|
||||||
{
|
{
|
||||||
|
|
||||||
batches.resize(0);
|
m_gpuData->m_batchSizes.resize(0);
|
||||||
|
|
||||||
{
|
{
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
@@ -738,10 +738,10 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr
|
|||||||
|
|
||||||
int batchOffset = 0;
|
int batchOffset = 0;
|
||||||
int constraintOffset=0;
|
int constraintOffset=0;
|
||||||
int numBatches = batches.size();
|
int numBatches = m_gpuData->m_batchSizes.size();
|
||||||
for (int bb=0;bb<numBatches;bb++)
|
for (int bb=0;bb<numBatches;bb++)
|
||||||
{
|
{
|
||||||
int numConstraintsInBatch = batches[bb];
|
int numConstraintsInBatch = m_gpuData->m_batchSizes[bb];
|
||||||
|
|
||||||
|
|
||||||
if (useGpuSolveJointConstraintRows)
|
if (useGpuSolveJointConstraintRows)
|
||||||
@@ -967,7 +967,7 @@ inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3( b3BatchConstraint*
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
batches.push_back(nCurrentBatch);
|
m_gpuData->m_batchSizes.push_back(nCurrentBatch);
|
||||||
batchIdx ++;
|
batchIdx ++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
|
bool oneBigBatch = true;
|
||||||
bool gCpuBatchContacts = false;
|
bool gCpuBatchContacts = false;
|
||||||
bool gCpuSolveConstraint = false;
|
bool gCpuSolveConstraint = false;
|
||||||
bool gCpuRadixSort=false;
|
bool gCpuRadixSort=false;
|
||||||
@@ -59,6 +59,8 @@ struct b3GpuBatchingPgsSolverInternalData
|
|||||||
cl_kernel m_batchingKernel;
|
cl_kernel m_batchingKernel;
|
||||||
cl_kernel m_batchingKernelNew;
|
cl_kernel m_batchingKernelNew;
|
||||||
cl_kernel m_solveContactKernel;
|
cl_kernel m_solveContactKernel;
|
||||||
|
cl_kernel m_solveSingleContactKernel;
|
||||||
|
cl_kernel m_solveSingleFrictionKernel;
|
||||||
cl_kernel m_solveFrictionKernel;
|
cl_kernel m_solveFrictionKernel;
|
||||||
cl_kernel m_contactToConstraintKernel;
|
cl_kernel m_contactToConstraintKernel;
|
||||||
cl_kernel m_setSortDataKernel;
|
cl_kernel m_setSortDataKernel;
|
||||||
@@ -91,6 +93,10 @@ struct b3GpuBatchingPgsSolverInternalData
|
|||||||
b3AlignedObjectArray<unsigned int> m_idxBuffer;
|
b3AlignedObjectArray<unsigned int> m_idxBuffer;
|
||||||
b3AlignedObjectArray<b3SortData> m_sortData;
|
b3AlignedObjectArray<b3SortData> m_sortData;
|
||||||
b3AlignedObjectArray<b3Contact4> m_old;
|
b3AlignedObjectArray<b3Contact4> m_old;
|
||||||
|
|
||||||
|
b3AlignedObjectArray<int> m_batchSizes;
|
||||||
|
b3OpenCLArray<int>* m_batchSizesGpu;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -104,7 +110,7 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device,
|
|||||||
m_data->m_queue = q;
|
m_data->m_queue = q;
|
||||||
m_data->m_pairCapacity = pairCapacity;
|
m_data->m_pairCapacity = pairCapacity;
|
||||||
m_data->m_nIterations = 4;
|
m_data->m_nIterations = 4;
|
||||||
|
m_data->m_batchSizesGpu = new b3OpenCLArray<int>(ctx,q);
|
||||||
m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyCL>(ctx,q);
|
m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyCL>(ctx,q);
|
||||||
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,q);
|
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,q);
|
||||||
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q);
|
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q);
|
||||||
@@ -166,8 +172,14 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device,
|
|||||||
m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
||||||
b3Assert(m_data->m_solveFrictionKernel);
|
b3Assert(m_data->m_solveFrictionKernel);
|
||||||
|
|
||||||
m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, 0, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
||||||
b3Assert(m_data->m_solveContactKernel);
|
b3Assert(m_data->m_solveContactKernel);
|
||||||
|
|
||||||
|
m_data->m_solveSingleContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "solveSingleContactKernel", &pErrNum, solveContactProg,additionalMacros );
|
||||||
|
b3Assert(m_data->m_solveSingleContactKernel);
|
||||||
|
|
||||||
|
m_data->m_solveSingleFrictionKernel =b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "solveSingleFrictionKernel", &pErrNum, solveFrictionProg,additionalMacros );
|
||||||
|
b3Assert(m_data->m_solveSingleFrictionKernel);
|
||||||
|
|
||||||
m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
||||||
b3Assert(m_data->m_contactToConstraintKernel);
|
b3Assert(m_data->m_contactToConstraintKernel);
|
||||||
@@ -223,6 +235,7 @@ b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device,
|
|||||||
|
|
||||||
b3GpuPgsContactSolver::~b3GpuPgsContactSolver()
|
b3GpuPgsContactSolver::~b3GpuPgsContactSolver()
|
||||||
{
|
{
|
||||||
|
delete m_data->m_batchSizesGpu;
|
||||||
delete m_data->m_bodyBufferGPU;
|
delete m_data->m_bodyBufferGPU;
|
||||||
delete m_data->m_inertiaBufferGPU;
|
delete m_data->m_inertiaBufferGPU;
|
||||||
delete m_data->m_pBufContactOutGPU;
|
delete m_data->m_pBufContactOutGPU;
|
||||||
@@ -244,7 +257,8 @@ b3GpuPgsContactSolver::~b3GpuPgsContactSolver()
|
|||||||
|
|
||||||
clReleaseKernel(m_data->m_batchingKernel);
|
clReleaseKernel(m_data->m_batchingKernel);
|
||||||
clReleaseKernel(m_data->m_batchingKernelNew);
|
clReleaseKernel(m_data->m_batchingKernelNew);
|
||||||
|
clReleaseKernel(m_data->m_solveSingleContactKernel);
|
||||||
|
clReleaseKernel(m_data->m_solveSingleFrictionKernel);
|
||||||
clReleaseKernel( m_data->m_solveContactKernel);
|
clReleaseKernel( m_data->m_solveContactKernel);
|
||||||
clReleaseKernel( m_data->m_solveFrictionKernel);
|
clReleaseKernel( m_data->m_solveFrictionKernel);
|
||||||
|
|
||||||
@@ -279,9 +293,68 @@ struct b3ConstraintCfg
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void b3GpuPgsContactSolver::solveContactConstraintBatchSizes( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//const b3OpenCLArray<int>* gpuBatchSizes)
|
||||||
|
{
|
||||||
|
B3_PROFILE("solveContactConstraintBatchSizes");
|
||||||
|
int numBatches = batchSizes->size()/B3_MAX_NUM_BATCHES;
|
||||||
|
for(int iter=0; iter<numIterations; iter++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for (int cellId=0;cellId<numBatches;cellId++)
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
|
||||||
|
{
|
||||||
|
int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii);
|
||||||
|
if (!numInBatch)
|
||||||
|
break;
|
||||||
|
|
||||||
|
{
|
||||||
|
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleContactKernel,"m_solveSingleContactKernel" );
|
||||||
|
launcher.setBuffer(bodyBuf->getBufferCL() );
|
||||||
|
launcher.setBuffer(shapeBuf->getBufferCL() );
|
||||||
|
launcher.setBuffer( constraint->getBufferCL() );
|
||||||
|
launcher.setConst(cellId);
|
||||||
|
launcher.setConst(offset);
|
||||||
|
launcher.setConst(numInBatch);
|
||||||
|
launcher.launch1D(numInBatch);
|
||||||
|
offset+=numInBatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for(int iter=0; iter<numIterations; iter++)
|
||||||
|
{
|
||||||
|
for (int cellId=0;cellId<numBatches;cellId++)
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
|
||||||
|
{
|
||||||
|
int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii);
|
||||||
|
if (!numInBatch)
|
||||||
|
break;
|
||||||
|
|
||||||
|
{
|
||||||
|
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleFrictionKernel,"m_solveSingleFrictionKernel" );
|
||||||
|
launcher.setBuffer(bodyBuf->getBufferCL() );
|
||||||
|
launcher.setBuffer(shapeBuf->getBufferCL() );
|
||||||
|
launcher.setBuffer( constraint->getBufferCL() );
|
||||||
|
launcher.setConst(cellId);
|
||||||
|
launcher.setConst(offset);
|
||||||
|
launcher.setConst(numInBatch);
|
||||||
|
launcher.launch1D(numInBatch);
|
||||||
|
offset+=numInBatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//,const b3OpenCLArray<int>* gpuBatchSizes)
|
||||||
{
|
{
|
||||||
|
|
||||||
//sort the contacts
|
//sort the contacts
|
||||||
@@ -677,7 +750,8 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
int nContactOut = m_data->m_pBufContactOutGPU->size();
|
int nContactOut = m_data->m_pBufContactOutGPU->size();
|
||||||
|
|
||||||
bool useSolver = true;
|
bool useSolver = true;
|
||||||
|
|
||||||
|
|
||||||
if (useSolver)
|
if (useSolver)
|
||||||
{
|
{
|
||||||
float dt=1./60.;
|
float dt=1./60.;
|
||||||
@@ -697,6 +771,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
int maxNumBatches = 0;
|
int maxNumBatches = 0;
|
||||||
|
|
||||||
|
if (!oneBigBatch)
|
||||||
{
|
{
|
||||||
|
|
||||||
if( m_data->m_solverGPU->m_contactBuffer2)
|
if( m_data->m_solverGPU->m_contactBuffer2)
|
||||||
@@ -989,7 +1064,23 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
int numNonzeroGrid=0;
|
int numNonzeroGrid=0;
|
||||||
|
|
||||||
|
if (oneBigBatch)
|
||||||
{
|
{
|
||||||
|
m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
|
||||||
|
int totalNumConstraints = cpuContacts.size();
|
||||||
|
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
||||||
|
int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU
|
||||||
|
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
||||||
|
static int globalMaxBatch = 0;
|
||||||
|
if (maxNumBatches>globalMaxBatch )
|
||||||
|
{
|
||||||
|
globalMaxBatch = maxNumBatches;
|
||||||
|
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES);
|
||||||
B3_PROFILE("cpu batch grid");
|
B3_PROFILE("cpu batch grid");
|
||||||
for(int i=0; i<B3_SOLVER_N_CELLS; i++)
|
for(int i=0; i<B3_SOLVER_N_CELLS; i++)
|
||||||
{
|
{
|
||||||
@@ -999,7 +1090,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
{
|
{
|
||||||
numNonzeroGrid++;
|
numNonzeroGrid++;
|
||||||
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
||||||
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU
|
||||||
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
||||||
static int globalMaxBatch = 0;
|
static int globalMaxBatch = 0;
|
||||||
if (maxNumBatches>globalMaxBatch )
|
if (maxNumBatches>globalMaxBatch )
|
||||||
@@ -1023,19 +1114,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//printf("maxNumBatches = %d\n", maxNumBatches);
|
|
||||||
|
|
||||||
if (nContacts)
|
|
||||||
{
|
|
||||||
B3_PROFILE("gpu convertToConstraints");
|
|
||||||
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
|
||||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
|
||||||
contactConstraintOut,
|
|
||||||
additionalData, nContacts,
|
|
||||||
(b3SolverBase::ConstraintCfg&) csCfg );
|
|
||||||
clFinish(m_data->m_queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1044,6 +1123,61 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//printf("maxNumBatches = %d\n", maxNumBatches);
|
||||||
|
|
||||||
|
if (oneBigBatch)
|
||||||
|
{
|
||||||
|
if (nContacts)
|
||||||
|
{
|
||||||
|
B3_PROFILE("cpu batchContacts");
|
||||||
|
static b3AlignedObjectArray<b3Contact4> cpuContacts;
|
||||||
|
// b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||||
|
{
|
||||||
|
B3_PROFILE("copyToHost");
|
||||||
|
m_data->m_pBufContactOutGPU->copyToHost(cpuContacts);
|
||||||
|
}
|
||||||
|
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||||
|
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int numNonzeroGrid=0;
|
||||||
|
|
||||||
|
{
|
||||||
|
m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
|
||||||
|
int totalNumConstraints = cpuContacts.size();
|
||||||
|
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
||||||
|
int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU
|
||||||
|
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
||||||
|
static int globalMaxBatch = 0;
|
||||||
|
if (maxNumBatches>globalMaxBatch )
|
||||||
|
{
|
||||||
|
globalMaxBatch = maxNumBatches;
|
||||||
|
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
{
|
||||||
|
B3_PROFILE("m_contactBuffer->copyFromHost");
|
||||||
|
m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nContacts)
|
||||||
|
{
|
||||||
|
B3_PROFILE("gpu convertToConstraints");
|
||||||
|
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
||||||
|
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
||||||
|
contactConstraintOut,
|
||||||
|
additionalData, nContacts,
|
||||||
|
(b3SolverBase::ConstraintCfg&) csCfg );
|
||||||
|
clFinish(m_data->m_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
int numIter = 4;
|
int numIter = 4;
|
||||||
@@ -1061,19 +1195,30 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
maxNumBatches);
|
maxNumBatches);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
solveContactConstraint(
|
//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);
|
||||||
m_data->m_bodyBufferGPU,
|
|
||||||
m_data->m_inertiaBufferGPU,
|
|
||||||
m_data->m_contactCGPU,0,
|
|
||||||
nContactOut ,
|
|
||||||
maxNumBatches,numIter);
|
|
||||||
|
|
||||||
|
if (oneBigBatch)
|
||||||
|
{
|
||||||
|
solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU,
|
||||||
|
m_data->m_inertiaBufferGPU,
|
||||||
|
m_data->m_contactCGPU,0,
|
||||||
|
nContactOut ,
|
||||||
|
maxNumBatches,numIter,&m_data->m_batchSizes);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
solveContactConstraint(
|
||||||
|
m_data->m_bodyBufferGPU,
|
||||||
|
m_data->m_inertiaBufferGPU,
|
||||||
|
m_data->m_contactCGPU,0,
|
||||||
|
nContactOut ,
|
||||||
|
maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
B3_PROFILE("Host solveContactConstraint");
|
B3_PROFILE("Host solveContactConstraint");
|
||||||
|
|
||||||
m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches);
|
m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1402,7 +1547,7 @@ b3AlignedObjectArray<int> bodyUsed;
|
|||||||
b3AlignedObjectArray<int> curUsed;
|
b3AlignedObjectArray<int> curUsed;
|
||||||
|
|
||||||
|
|
||||||
inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies, int* batchSizes)
|
||||||
{
|
{
|
||||||
|
|
||||||
B3_PROFILE("sortConstraintByBatch3");
|
B3_PROFILE("sortConstraintByBatch3");
|
||||||
@@ -1453,6 +1598,8 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu
|
|||||||
{
|
{
|
||||||
numIter++;
|
numIter++;
|
||||||
int nCurrentBatch = 0;
|
int nCurrentBatch = 0;
|
||||||
|
batchSizes[batchIdx] = 0;
|
||||||
|
|
||||||
// clear flag
|
// clear flag
|
||||||
for(int i=0; i<curBodyUsed; i++)
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
bodyUsed[curUsed[i]/32] = 0;
|
bodyUsed[curUsed[i]/32] = 0;
|
||||||
@@ -1508,6 +1655,7 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu
|
|||||||
nCurrentBatch++;
|
nCurrentBatch++;
|
||||||
if( nCurrentBatch == simdWidth )
|
if( nCurrentBatch == simdWidth )
|
||||||
{
|
{
|
||||||
|
batchSizes[batchIdx] += simdWidth;
|
||||||
nCurrentBatch = 0;
|
nCurrentBatch = 0;
|
||||||
for(int i=0; i<curBodyUsed; i++)
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
bodyUsed[curUsed[i]/32] = 0;
|
bodyUsed[curUsed[i]/32] = 0;
|
||||||
@@ -1516,7 +1664,18 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (batchIdx>=B3_MAX_NUM_BATCHES)
|
||||||
|
{
|
||||||
|
b3Error("batchIdx>=B3_MAX_NUM_BATCHES");
|
||||||
|
b3Assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
batchSizes[batchIdx] += nCurrentBatch;
|
||||||
|
|
||||||
batchIdx ++;
|
batchIdx ++;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1528,6 +1687,8 @@ inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int nu
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
batchSizes[batchIdx] =0;
|
||||||
|
|
||||||
if (maxSwaps<numSwaps)
|
if (maxSwaps<numSwaps)
|
||||||
{
|
{
|
||||||
maxSwaps = numSwaps;
|
maxSwaps = numSwaps;
|
||||||
|
|||||||
@@ -20,12 +20,15 @@ protected:
|
|||||||
|
|
||||||
inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||||
inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||||
inline int sortConstraintByBatch3( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
inline int sortConstraintByBatch3( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies, int* batchSizes);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void solveContactConstraintBatchSizes( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes);//const b3OpenCLArray<int>* gpuBatchSizes);
|
||||||
|
|
||||||
void solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes);//const b3OpenCLArray<int>* gpuBatchSizes);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|||||||
@@ -360,25 +360,75 @@ void solveContact(b3GpuConstraint4& cs,
|
|||||||
struct SolveTask// : public ThreadPool::Task
|
struct SolveTask// : public ThreadPool::Task
|
||||||
{
|
{
|
||||||
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<b3InertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<b3InertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
||||||
int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx)
|
int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex)
|
||||||
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
||||||
m_solveFriction( true ),m_maxNumBatches(maxNumBatches),
|
m_solveFriction( true ),m_maxNumBatches(maxNumBatches),
|
||||||
m_wgUsedBodies(wgUsedBodies),m_curWgidx(curWgidx)
|
m_curWgidx(curWgidx),
|
||||||
|
m_batchSizes(batchSizes),
|
||||||
|
m_cellIndex(cellIndex)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
unsigned short int getType(){ return 0; }
|
unsigned short int getType(){ return 0; }
|
||||||
|
|
||||||
void run(int tIdx)
|
void run(int tIdx)
|
||||||
{
|
{
|
||||||
b3AlignedObjectArray<int> usedBodies;
|
int offset = 0;
|
||||||
//printf("run..............\n");
|
for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
|
||||||
|
|
||||||
|
|
||||||
for (int bb=0;bb<m_maxNumBatches;bb++)
|
|
||||||
{
|
{
|
||||||
usedBodies.resize(0);
|
int numInBatch = m_batchSizes->at(m_cellIndex*B3_MAX_NUM_BATCHES+ii);
|
||||||
for(int ic=m_nConstraints-1; ic>=0; ic--)
|
if (!numInBatch)
|
||||||
//for(int ic=0; ic<m_nConstraints; ic++)
|
break;
|
||||||
|
|
||||||
|
for (int jj=0;jj<numInBatch;jj++)
|
||||||
|
{
|
||||||
|
int i = m_start + offset+jj;
|
||||||
|
int batchId = m_constraints[i].m_batchIdx;
|
||||||
|
b3Assert(batchId==ii);
|
||||||
|
float frictionCoeff = m_constraints[i].getFrictionCoeff();
|
||||||
|
int aIdx = (int)m_constraints[i].m_bodyA;
|
||||||
|
int bIdx = (int)m_constraints[i].m_bodyB;
|
||||||
|
int localBatch = m_constraints[i].m_batchIdx;
|
||||||
|
b3RigidBodyCL& bodyA = m_bodies[aIdx];
|
||||||
|
b3RigidBodyCL& bodyB = m_bodies[bIdx];
|
||||||
|
|
||||||
|
if( !m_solveFriction )
|
||||||
|
{
|
||||||
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
|
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||||
|
|
||||||
|
solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
|
||||||
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
|
||||||
|
maxRambdaDt, minRambdaDt );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
|
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||||
|
float sum = 0;
|
||||||
|
for(int j=0; j<4; j++)
|
||||||
|
{
|
||||||
|
sum +=m_constraints[i].m_appliedRambdaDt[j];
|
||||||
|
}
|
||||||
|
frictionCoeff = 0.7f;
|
||||||
|
for(int j=0; j<4; j++)
|
||||||
|
{
|
||||||
|
maxRambdaDt[j] = frictionCoeff*sum;
|
||||||
|
minRambdaDt[j] = -maxRambdaDt[j];
|
||||||
|
}
|
||||||
|
solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
|
||||||
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
|
||||||
|
maxRambdaDt, minRambdaDt );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset+=numInBatch;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
/* for (int bb=0;bb<m_maxNumBatches;bb++)
|
||||||
|
{
|
||||||
|
//for(int ic=m_nConstraints-1; ic>=0; ic--)
|
||||||
|
for(int ic=0; ic<m_nConstraints; ic++)
|
||||||
{
|
{
|
||||||
|
|
||||||
int i = m_start + ic;
|
int i = m_start + ic;
|
||||||
@@ -392,67 +442,6 @@ struct SolveTask// : public ThreadPool::Task
|
|||||||
b3RigidBodyCL& bodyA = m_bodies[aIdx];
|
b3RigidBodyCL& bodyA = m_bodies[aIdx];
|
||||||
b3RigidBodyCL& bodyB = m_bodies[bIdx];
|
b3RigidBodyCL& bodyB = m_bodies[bIdx];
|
||||||
|
|
||||||
if ((bodyA.m_invMass) && (bodyB.m_invMass))
|
|
||||||
{
|
|
||||||
// printf("aIdx=%d, bIdx=%d\n", aIdx,bIdx);
|
|
||||||
}
|
|
||||||
if (bIdx==10)
|
|
||||||
{
|
|
||||||
//printf("ic(b)=%d, localBatch=%d\n",ic,localBatch);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (aIdx==10)
|
|
||||||
{
|
|
||||||
//printf("ic(a)=%d, localBatch=%d\n",ic,localBatch);
|
|
||||||
}
|
|
||||||
if (usedBodies.size()<(aIdx+1))
|
|
||||||
{
|
|
||||||
usedBodies.resize(aIdx+1,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (usedBodies.size()<(bIdx+1))
|
|
||||||
{
|
|
||||||
usedBodies.resize(bIdx+1,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bodyA.m_invMass)
|
|
||||||
{
|
|
||||||
b3Assert(usedBodies[aIdx]==0);
|
|
||||||
usedBodies[aIdx]++;
|
|
||||||
}
|
|
||||||
if (m_wgUsedBodies)
|
|
||||||
{
|
|
||||||
for (int w=0;w<B3_SOLVER_N_CELLS;w++)
|
|
||||||
{
|
|
||||||
if (w!=m_curWgidx)
|
|
||||||
{
|
|
||||||
if (bodyA.m_invMass)
|
|
||||||
{
|
|
||||||
if (m_wgUsedBodies[w].size()>aIdx)
|
|
||||||
{
|
|
||||||
b3Assert(m_wgUsedBodies[w][aIdx]==0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bodyB.m_invMass)
|
|
||||||
{
|
|
||||||
if (m_wgUsedBodies[w].size()>bIdx)
|
|
||||||
{
|
|
||||||
b3Assert(m_wgUsedBodies[w][bIdx]==0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (bodyB.m_invMass)
|
|
||||||
{
|
|
||||||
b3Assert(usedBodies[bIdx]==0);
|
|
||||||
usedBodies[bIdx]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if( !m_solveFriction )
|
if( !m_solveFriction )
|
||||||
{
|
{
|
||||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
@@ -461,13 +450,11 @@ struct SolveTask// : public ThreadPool::Task
|
|||||||
solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
|
solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
|
||||||
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
|
||||||
maxRambdaDt, minRambdaDt );
|
maxRambdaDt, minRambdaDt );
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||||
|
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for(int j=0; j<4; j++)
|
for(int j=0; j<4; j++)
|
||||||
{
|
{
|
||||||
@@ -479,31 +466,14 @@ struct SolveTask// : public ThreadPool::Task
|
|||||||
maxRambdaDt[j] = frictionCoeff*sum;
|
maxRambdaDt[j] = frictionCoeff*sum;
|
||||||
minRambdaDt[j] = -maxRambdaDt[j];
|
minRambdaDt[j] = -maxRambdaDt[j];
|
||||||
}
|
}
|
||||||
|
solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
|
||||||
solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
|
|
||||||
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
|
||||||
maxRambdaDt, minRambdaDt );
|
maxRambdaDt, minRambdaDt );
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_wgUsedBodies)
|
|
||||||
{
|
|
||||||
if (m_wgUsedBodies[m_curWgidx].size()<usedBodies.size())
|
|
||||||
{
|
|
||||||
m_wgUsedBodies[m_curWgidx].resize(usedBodies.size());
|
|
||||||
}
|
|
||||||
for (int i=0;i<usedBodies.size();i++)
|
|
||||||
{
|
|
||||||
if (usedBodies[i])
|
|
||||||
{
|
|
||||||
//printf("cell %d uses body %d\n", m_curWgidx,i);
|
|
||||||
m_wgUsedBodies[m_curWgidx][i]=1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -512,7 +482,8 @@ struct SolveTask// : public ThreadPool::Task
|
|||||||
b3AlignedObjectArray<b3RigidBodyCL>& m_bodies;
|
b3AlignedObjectArray<b3RigidBodyCL>& m_bodies;
|
||||||
b3AlignedObjectArray<b3InertiaCL>& m_shapes;
|
b3AlignedObjectArray<b3InertiaCL>& m_shapes;
|
||||||
b3AlignedObjectArray<b3GpuConstraint4>& m_constraints;
|
b3AlignedObjectArray<b3GpuConstraint4>& m_constraints;
|
||||||
b3AlignedObjectArray<int>* m_wgUsedBodies;
|
b3AlignedObjectArray<int>* m_batchSizes;
|
||||||
|
int m_cellIndex;
|
||||||
int m_curWgidx;
|
int m_curWgidx;
|
||||||
int m_start;
|
int m_start;
|
||||||
int m_nConstraints;
|
int m_nConstraints;
|
||||||
@@ -522,7 +493,7 @@ struct SolveTask// : public ThreadPool::Task
|
|||||||
|
|
||||||
|
|
||||||
void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,b3AlignedObjectArray<int>* batchSizes)
|
||||||
{
|
{
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@@ -634,7 +605,7 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
||||||
const int end = start + numConstraintsInCell;
|
const int end = start + numConstraintsInCell;
|
||||||
|
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx);
|
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx,batchSizes,cellIdx);
|
||||||
task.m_solveFriction = false;
|
task.m_solveFriction = false;
|
||||||
task.run(0);
|
task.run(0);
|
||||||
|
|
||||||
@@ -670,7 +641,7 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
||||||
const int end = start + numConstraintsInCell;
|
const int end = start + numConstraintsInCell;
|
||||||
|
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0);
|
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0,batchSizes,cellIdx);
|
||||||
task.m_solveFriction = true;
|
task.m_solveFriction = true;
|
||||||
task.run(0);
|
task.run(0);
|
||||||
|
|
||||||
@@ -683,14 +654,14 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
{
|
{
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
{
|
{
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0);
|
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0);
|
||||||
task.m_solveFriction = false;
|
task.m_solveFriction = false;
|
||||||
task.run(0);
|
task.run(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
{
|
{
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0);
|
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0);
|
||||||
task.m_solveFriction = true;
|
task.m_solveFriction = true;
|
||||||
task.run(0);
|
task.run(0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ enum
|
|||||||
B3_SOLVER_N_SPLIT_Z = 8,//,
|
B3_SOLVER_N_SPLIT_Z = 8,//,
|
||||||
B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X*B3_SOLVER_N_SPLIT_Y*B3_SOLVER_N_SPLIT_Z,
|
B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X*B3_SOLVER_N_SPLIT_Y*B3_SOLVER_N_SPLIT_Z,
|
||||||
B3_SOLVER_N_BATCHES = 8,//4,//8,//4,
|
B3_SOLVER_N_BATCHES = 8,//4,//8,//4,
|
||||||
|
B3_MAX_NUM_BATCHES = 128,
|
||||||
};
|
};
|
||||||
|
|
||||||
class b3SolverBase
|
class b3SolverBase
|
||||||
@@ -106,7 +107,7 @@ class b3Solver : public b3SolverBase
|
|||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
||||||
|
|
||||||
void solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBuf, b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, b3AlignedObjectArray<int>* batchSizes);
|
||||||
|
|
||||||
|
|
||||||
void convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
void convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||||
|
|||||||
@@ -476,3 +476,22 @@ void BatchSolveKernelContact(__global Body* gBodies,
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void solveSingleContactKernel(__global Body* gBodies,
|
||||||
|
__global Shape* gShapes,
|
||||||
|
__global Constraint4* gConstraints,
|
||||||
|
int cellIdx,
|
||||||
|
int batchOffset,
|
||||||
|
int numConstraintsInBatch
|
||||||
|
)
|
||||||
|
{
|
||||||
|
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if (index < numConstraintsInBatch)
|
||||||
|
{
|
||||||
|
int idx=batchOffset+index;
|
||||||
|
solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -371,4 +371,19 @@ static const char* solveContactCL= \
|
|||||||
" \n"
|
" \n"
|
||||||
" \n"
|
" \n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"__kernel void solveSingleContactKernel(__global Body* gBodies,\n"
|
||||||
|
" __global Shape* gShapes,\n"
|
||||||
|
" __global Constraint4* gConstraints,\n"
|
||||||
|
" int cellIdx,\n"
|
||||||
|
" int batchOffset,\n"
|
||||||
|
" int numConstraintsInBatch\n"
|
||||||
|
" )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if (index < numConstraintsInBatch)\n"
|
||||||
|
" {\n"
|
||||||
|
" int idx=batchOffset+index;\n"
|
||||||
|
" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
||||||
|
" } \n"
|
||||||
|
"}\n"
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -498,3 +498,27 @@ void BatchSolveKernelFriction(__global Body* gBodies,
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void solveSingleFrictionKernel(__global Body* gBodies,
|
||||||
|
__global Shape* gShapes,
|
||||||
|
__global Constraint4* gConstraints,
|
||||||
|
int cellIdx,
|
||||||
|
int batchOffset,
|
||||||
|
int numConstraintsInBatch
|
||||||
|
)
|
||||||
|
{
|
||||||
|
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if (index < numConstraintsInBatch)
|
||||||
|
{
|
||||||
|
|
||||||
|
int idx=batchOffset+index;
|
||||||
|
|
||||||
|
solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -399,4 +399,21 @@ static const char* solveFrictionCL= \
|
|||||||
" \n"
|
" \n"
|
||||||
" \n"
|
" \n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n"
|
||||||
|
" __global Shape* gShapes,\n"
|
||||||
|
" __global Constraint4* gConstraints,\n"
|
||||||
|
" int cellIdx,\n"
|
||||||
|
" int batchOffset,\n"
|
||||||
|
" int numConstraintsInBatch\n"
|
||||||
|
" )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if (index < numConstraintsInBatch)\n"
|
||||||
|
" {\n"
|
||||||
|
" \n"
|
||||||
|
" int idx=batchOffset+index;\n"
|
||||||
|
" \n"
|
||||||
|
" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
||||||
|
" } \n"
|
||||||
|
"}\n"
|
||||||
;
|
;
|
||||||
|
|||||||
Reference in New Issue
Block a user