Accidently left some very slow copyToHost in the batching code, removing it makes it faster :-)
This commit is contained in:
@@ -895,7 +895,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
}
|
}
|
||||||
m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);
|
m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);
|
||||||
|
|
||||||
/* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
|
/* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int nContacts = cb.x;\n"
|
" int nContacts = cb.x;\n"
|
||||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||||
@@ -934,11 +934,11 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
//clFinish(m_data->m_queue);
|
//clFinish(m_data->m_queue);
|
||||||
|
|
||||||
// {
|
// {
|
||||||
// b3AlignedObjectArray<unsigned int> histogram;
|
// b3AlignedObjectArray<unsigned int> histogram;
|
||||||
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
|
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
|
||||||
// printf(",,,\n");
|
// printf(",,,\n");
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
|
||||||
if (nContacts)
|
if (nContacts)
|
||||||
@@ -949,7 +949,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
for (int i=0;i<nContacts;i++)
|
for (int i=0;i<nContacts;i++)
|
||||||
{
|
{
|
||||||
m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
|
m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
|
||||||
// m_data->m_solverGPU->m_contactBuffer2->getBufferCL();
|
// m_data->m_solverGPU->m_contactBuffer2->getBufferCL();
|
||||||
// m_data->m_pBufContactOutGPU->getBufferCL()
|
// m_data->m_pBufContactOutGPU->getBufferCL()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -980,6 +980,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
B3_PROFILE("gpu batchContacts");
|
B3_PROFILE("gpu batchContacts");
|
||||||
maxNumBatches = 150;//250;
|
maxNumBatches = 150;//250;
|
||||||
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||||
|
clFinish(m_data->m_queue);
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
B3_PROFILE("cpu batchContacts");
|
B3_PROFILE("cpu batchContacts");
|
||||||
@@ -1003,25 +1004,16 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
int numNonzeroGrid=0;
|
int numNonzeroGrid=0;
|
||||||
|
|
||||||
{
|
{
|
||||||
B3_PROFILE("batch grid");
|
B3_PROFILE("cpu batch grid");
|
||||||
for(int i=0; i<B3_SOLVER_N_CELLS; i++)
|
for(int i=0; i<B3_SOLVER_N_CELLS; i++)
|
||||||
{
|
{
|
||||||
int n = (nNativeHost)[i];
|
int n = (nNativeHost)[i];
|
||||||
int offset = (offsetsNativeHost)[i];
|
int offset = (offsetsNativeHost)[i];
|
||||||
|
|
||||||
if( n )
|
if( n )
|
||||||
{
|
{
|
||||||
numNonzeroGrid++;
|
numNonzeroGrid++;
|
||||||
//printf("cpu batch cell %d\n",i);
|
|
||||||
|
|
||||||
|
|
||||||
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
||||||
//int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
|
||||||
//int numBatches = sortConstraintByBatch2( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
|
||||||
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
maxNumBatches = b3Max(numBatches,maxNumBatches);
|
||||||
static int globalMaxBatch = 0;
|
static int globalMaxBatch = 0;
|
||||||
if (maxNumBatches>globalMaxBatch )
|
if (maxNumBatches>globalMaxBatch )
|
||||||
@@ -1029,13 +1021,11 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
globalMaxBatch = maxNumBatches;
|
globalMaxBatch = maxNumBatches;
|
||||||
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
//we use the clFinish for proper benchmark/profile
|
//we use the clFinish for proper benchmark/profile
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
clFinish(m_data->m_queue);
|
clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
B3_PROFILE("m_contactBuffer->copyFromHost");
|
B3_PROFILE("m_contactBuffer->copyFromHost");
|
||||||
@@ -1051,7 +1041,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
if (nContacts)
|
if (nContacts)
|
||||||
{
|
{
|
||||||
//B3_PROFILE("gpu convertToConstraints");
|
B3_PROFILE("gpu convertToConstraints");
|
||||||
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
||||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
||||||
contactConstraintOut,
|
contactConstraintOut,
|
||||||
|
|||||||
@@ -978,6 +978,9 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
|
|||||||
cdata.m_positionDrift = cfg.m_positionDrift;
|
cdata.m_positionDrift = cfg.m_positionDrift;
|
||||||
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
|
||||||
|
|
||||||
|
|
||||||
|
if (convertConstraintOnCpu)
|
||||||
|
{
|
||||||
b3AlignedObjectArray<b3RigidBodyCL> gBodies;
|
b3AlignedObjectArray<b3RigidBodyCL> gBodies;
|
||||||
bodyBuf->copyToHost(gBodies);
|
bodyBuf->copyToHost(gBodies);
|
||||||
|
|
||||||
@@ -990,8 +993,6 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
|
|||||||
b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut;
|
b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut;
|
||||||
gConstraintOut.resize(nContacts);
|
gConstraintOut.resize(nContacts);
|
||||||
|
|
||||||
if (convertConstraintOnCpu)
|
|
||||||
{
|
|
||||||
B3_PROFILE("cpu contactToConstraintKernel");
|
B3_PROFILE("cpu contactToConstraintKernel");
|
||||||
for (int gIdx=0;gIdx<nContacts;gIdx++)
|
for (int gIdx=0;gIdx<nContacts;gIdx++)
|
||||||
{
|
{
|
||||||
|
|||||||
BIN
win32_binary/App_Bullet3_OpenCL_Demos_clew_vs2010.exe
Normal file
BIN
win32_binary/App_Bullet3_OpenCL_Demos_clew_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/App_ImplicitCloth_vs2010.exe
Normal file
BIN
win32_binary/App_ImplicitCloth_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/App_SimpleOpenGL3_vs2010.exe
Normal file
BIN
win32_binary/App_SimpleOpenGL3_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/App_ThreadingTest_vs2010.exe
Normal file
BIN
win32_binary/App_ThreadingTest_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/OpenCL_GUI_Intialize_clew_vs2010.exe
Normal file
BIN
win32_binary/OpenCL_GUI_Intialize_clew_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/Test_OpenCL_Primitives_clew_vs2010.exe
Normal file
BIN
win32_binary/Test_OpenCL_Primitives_clew_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/Test_OpenCL_RadixSortBenchmark_clew_vs2010.exe
Normal file
BIN
win32_binary/Test_OpenCL_RadixSortBenchmark_clew_vs2010.exe
Normal file
Binary file not shown.
BIN
win32_binary/Test_OpenCL_kernel_launch_clew_vs2010.exe
Normal file
BIN
win32_binary/Test_OpenCL_kernel_launch_clew_vs2010.exe
Normal file
Binary file not shown.
Reference in New Issue
Block a user