remove clFinish and copyToHost from GPU joint solver, performance is looking better now.
This commit is contained in:
@@ -187,7 +187,7 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const
|
|||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
{
|
{
|
||||||
|
///enable next line to force CPU constraint solving
|
||||||
//c = new b3Point2PointConstraint(pid,prevBody,b3Vector3(-1.1,0,0),b3Vector3(1.1,0,0));
|
//c = new b3Point2PointConstraint(pid,prevBody,b3Vector3(-1.1,0,0),b3Vector3(1.1,0,0));
|
||||||
// c->setBreakingImpulseThreshold(14);
|
// c->setBreakingImpulseThreshold(14);
|
||||||
b3Vector3 pivotInA(-1.1,0,0);
|
b3Vector3 pivotInA(-1.1,0,0);
|
||||||
|
|||||||
@@ -351,7 +351,7 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -388,7 +388,7 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
}
|
}
|
||||||
#ifdef DEBUG_ME
|
#ifdef DEBUG_ME
|
||||||
@@ -458,7 +458,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -586,7 +586,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
// {
|
// {
|
||||||
// b3AlignedObjectArray<unsigned int> histogram;
|
// b3AlignedObjectArray<unsigned int> histogram;
|
||||||
@@ -604,7 +604,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst( cdata );
|
launcher.setConst( cdata );
|
||||||
launcher.launch1D( nContacts, 64 );
|
launcher.launch1D( nContacts, 64 );
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -666,7 +666,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -691,7 +691,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
contactConstraintOut,
|
contactConstraintOut,
|
||||||
additionalData, nContacts,
|
additionalData, nContacts,
|
||||||
(b3SolverBase::ConstraintCfg&) csCfg );
|
(b3SolverBase::ConstraintCfg&) csCfg );
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -723,7 +723,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches);
|
m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -207,7 +207,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3Rigi
|
|||||||
launcher.setBuffer(gpuBodies->getBufferCL());
|
launcher.setBuffer(gpuBodies->getBufferCL());
|
||||||
launcher.setConst(numBodies);
|
launcher.setConst(numBodies);
|
||||||
launcher.launch1D(numBodies);
|
launcher.launch1D(numBodies);
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
|
|
||||||
// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
||||||
} else
|
} else
|
||||||
@@ -264,20 +264,20 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3Rigi
|
|||||||
launcher.setConst(numConstraints);
|
launcher.setConst(numConstraints);
|
||||||
launcher.launch1D(numConstraints);
|
launcher.launch1D(numConstraints);
|
||||||
}
|
}
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
if (batches.size()==0)
|
if (batches.size()==0)
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
m_gpuData->m_gpuConstraintInfo1->copyToHost(m_tmpConstraintSizesPool);
|
//m_gpuData->m_gpuConstraintInfo1->copyToHost(m_tmpConstraintSizesPool);
|
||||||
b3OpenCLArray<unsigned int> dst(m_gpuData->m_context,m_gpuData->m_queue);
|
b3OpenCLArray<unsigned int> dst(m_gpuData->m_context,m_gpuData->m_queue);
|
||||||
dst.resize(numConstraints);
|
dst.resize(numConstraints);
|
||||||
unsigned int total=0;
|
unsigned int total=0;
|
||||||
m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,dst,numConstraints,&total);
|
m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,dst,numConstraints,&total);
|
||||||
unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints-1);
|
unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints-1);
|
||||||
b3AlignedObjectArray<unsigned int> dstHost;
|
//b3AlignedObjectArray<unsigned int> dstHost;
|
||||||
dst.copyToHost(dstHost);
|
//dst.copyToHost(dstHost);
|
||||||
totalNumRows = total+lastElem;
|
totalNumRows = total+lastElem;
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -287,7 +287,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3Rigi
|
|||||||
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
|
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
|
||||||
launcher.setConst(numConstraints);
|
launcher.setConst(numConstraints);
|
||||||
launcher.launch1D(numConstraints);
|
launcher.launch1D(numConstraints);
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
}
|
}
|
||||||
if (batches.size()==0)
|
if (batches.size()==0)
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
@@ -346,12 +346,12 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3Rigi
|
|||||||
launcher.setConst(infoGlobal.m_numIterations);
|
launcher.setConst(infoGlobal.m_numIterations);
|
||||||
launcher.setConst(numConstraints);
|
launcher.setConst(numConstraints);
|
||||||
launcher.launch1D(numConstraints);
|
launcher.launch1D(numConstraints);
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
|
|
||||||
if (batches.size()==0)
|
if (batches.size()==0)
|
||||||
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
|
||||||
//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
|
//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
|
||||||
m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
|
//m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -645,7 +645,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b
|
|||||||
{
|
{
|
||||||
//only create the batches once.
|
//only create the batches once.
|
||||||
//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
|
//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
|
||||||
bool createBatches = true;//batches.size()==0;
|
bool createBatches = batches.size()==0;
|
||||||
{
|
{
|
||||||
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
|
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
|
||||||
if (createBatches)
|
if (createBatches)
|
||||||
@@ -701,7 +701,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b
|
|||||||
launcher.setConst(numConstraintsInBatch);
|
launcher.setConst(numConstraintsInBatch);
|
||||||
|
|
||||||
launcher.launch1D(numConstraintsInBatch);
|
launcher.launch1D(numConstraintsInBatch);
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
|
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
@@ -740,8 +740,8 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b
|
|||||||
|
|
||||||
if (useGpu)
|
if (useGpu)
|
||||||
{
|
{
|
||||||
B3_PROFILE("copy to host");
|
//B3_PROFILE("copy to host");
|
||||||
m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
//m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
||||||
}
|
}
|
||||||
//int sz = sizeof(b3GpuSolverBody);
|
//int sz = sizeof(b3GpuSolverBody);
|
||||||
//printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz);
|
//printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz);
|
||||||
@@ -971,7 +971,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b3Rig
|
|||||||
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
|
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
|
||||||
launcher.setConst(numBodies);
|
launcher.setConst(numBodies);
|
||||||
launcher.launch1D(numBodies);
|
launcher.launch1D(numBodies);
|
||||||
clFinish(m_gpuData->m_queue);
|
//clFinish(m_gpuData->m_queue);
|
||||||
// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
|
||||||
// m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies);
|
// m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies);
|
||||||
//m_gpuData->m_gpuBodies->copyToHost(testBodies);
|
//m_gpuData->m_gpuBodies->copyToHost(testBodies);
|
||||||
|
|||||||
Reference in New Issue
Block a user