experiment with first-level batching using object index instead of spatial hash in uniform grid (to avoid tuning average object size for uniform grid)

This commit is contained in:
erwin coumans
2013-05-03 01:14:34 -07:00
parent 6ee9eb9bb5
commit 1185de51d5
7 changed files with 211 additions and 15 deletions

View File

@@ -15,9 +15,9 @@ subject to the following restrictions:
///create 125 (5x5x5) dynamic object
#define ARRAY_SIZE_X 5
#define ARRAY_SIZE_Y 5
#define ARRAY_SIZE_Z 5
#define ARRAY_SIZE_X 30
#define ARRAY_SIZE_Y 20
#define ARRAY_SIZE_Z 30
//maximum number of objects (and allow user to shoot additional boxes)
#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024)
@@ -164,7 +164,7 @@ void BasicGpuDemo::exitCL()
BasicGpuDemo::BasicGpuDemo()
{
m_clData = new btInternalData;
setCameraDistance(btScalar(SCALING*20.));
setCameraDistance(btScalar(SCALING*120.));
this->setAzi(45);
this->setEle(45);
@@ -222,7 +222,7 @@ void BasicGpuDemo::initPhysics()
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
///create a few basic rigid bodies
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(150.),btScalar(50.),btScalar(150.)));
//groundShape->initializePolyhedralFeatures();
// btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),50);

View File

@@ -119,6 +119,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
setupScene(ci);
m_data->m_rigidBodyPipeline->writeAllInstancesToGpu();
np->writeAllBodiesToGpu();
bp->writeAabbsToGpu();

View File

@@ -424,7 +424,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
float dt=1./60.;
b3ConstraintCfg csCfg( dt );
csCfg.m_enableParallelSolve = true;
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
csCfg.m_averageExtent = 0.3;//0.1;//2;//.2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = static0Index;
@@ -516,8 +516,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (gpuRadixSort)
{ // 3. sort by cell idx
B3_PROFILE("gpuRadixSort");
int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
int sortBit = 32;
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
//int sortBit = 32;
//if( n <= 0xffff ) sortBit = 16;
//if( n <= 0xff ) sortBit = 8;
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
@@ -581,6 +581,12 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
clFinish(m_data->m_queue);
// {
// b3AlignedObjectArray<unsigned int> histogram;
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
// printf(",,,\n");
// }
if (nContacts)
{

View File

@@ -551,6 +551,75 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
}
void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
b3OpenCLArray<b3GpuConstraint4>* constraint,
b3OpenCLArray<unsigned int>* m_numConstraints,
b3OpenCLArray<unsigned int>* m_offsets,
int batchId
)
{
// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
// b3BufferInfoCL( m_offsets->getBufferCL() )
const int nn = b3SolverBase::N_SPLIT*b3SolverBase::N_SPLIT;
int numWorkItems = 64*nn/b3SolverBase::N_BATCHES;
b3AlignedObjectArray<unsigned int> gN;
m_numConstraints->copyToHost(gN);
b3AlignedObjectArray<unsigned int> gOffsets;
m_offsets->copyToHost(gOffsets);
int nSplit = b3SolverBase::N_SPLIT;
int bIdx = batchId;
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
constraint->copyToHost(cpuConstraints);
printf("batch = %d\n", batchId);
int numWorkgroups = nn/b3SolverBase::N_BATCHES;
b3AlignedObjectArray<int> usedBodies;
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
{
printf("wgIdx = %d ", wgIdx);
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
int cellIdx = xIdx+yIdx*nSplit;
printf("cellIdx=%d\n",cellIdx);
if( gN[cellIdx] == 0 )
continue;
const int start = gOffsets[cellIdx];
const int end = start + gN[cellIdx];
for (int c=start;c<end;c++)
{
b3GpuConstraint4& constraint = cpuConstraints[c];
//printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB);
if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size())
{
printf("error?\n");
}
if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size())
{
printf("error?\n");
}
}
for (int c=start;c<end;c++)
{
b3GpuConstraint4& constraint = cpuConstraints[c];
usedBodies.push_back(constraint.m_bodyA);
usedBodies.push_back(constraint.m_bodyB);
}
}
}
static bool verify=false;
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
{
@@ -580,6 +649,12 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
{
for(int ib=0; ib<N_BATCHES; ib++)
{
if (verify)
{
checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib);
}
#ifdef DEBUG_ME
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
gpuDebugInfo.write(debugInfo,numWorkItems);

View File

@@ -54,7 +54,7 @@ class b3SolverBase
enum
{
N_SPLIT = 16,
N_BATCHES = 4,
N_BATCHES = 4,//8,//4,
N_OBJ_PER_SPLIT = 10,
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
};

View File

@@ -449,6 +449,34 @@ typedef struct
int m_nSplit;
} ConstBufferSSD;
static const int gridTable4x4[] =
{
0,1,17,16,
1,2,18,19,
17,18,32,3,
16,19,3,34
};
static const int gridTable8x8[] =
{
0, 2, 3, 16, 17, 18, 19, 1,
66, 64, 80, 67, 82, 81, 65, 83,
131,144,128,130,147,129,145,146,
208,195,194,192,193,211,210,209,
21, 22, 23, 5, 4, 6, 7, 20,
86, 85, 69, 87, 70, 68, 84, 71,
151,133,149,150,135,148,132,134,
197,27,214,213,212,199,198,196
};
#define USE_SPATIAL_BATCHING 1
#define USE_4x4_GRID 1
__kernel
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
@@ -460,18 +488,47 @@ int nContacts,float scale,int N_SPLIT, int staticIdx)
if( gIdx < nContacts )
{
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;
int aIdx = abs(aPtrAndSignBit );
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
int bIdx = abs(bPtrAndSignBit);
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);
#if USE_SPATIAL_BATCHING
int idx = (aStatic)? bIdx: aIdx;
float4 p = gBodies[idx].m_pos;
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
int newIndex = (xIdx+zIdx*N_SPLIT);
gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);
#else//USE_SPATIAL_BATCHING
#if USE_4x4_GRID
int aa = aIdx&3;
int bb = bIdx&3;
if (aStatic)
aa = bb;
if (bStatic)
bb = aa;
int gridIndex = aa + bb*4;
int newIndex = gridTable4x4[gridIndex];
#else//USE_4x4_GRID
int aa = aIdx&7;
int bb = bIdx&7;
if (aStatic)
aa = bb;
if (bStatic)
bb = aa;
int gridIndex = aa + bb*8;
int newIndex = gridTable8x8[gridIndex];
#endif//USE_4x4_GRID
#endif//USE_SPATIAL_BATCHING
gSortDataOut[gIdx].x = newIndex;
gSortDataOut[gIdx].y = gIdx;
}
else

View File

@@ -451,6 +451,34 @@ static const char* solverSetup2CL= \
" int m_nSplit;\n"
"} ConstBufferSSD;\n"
"\n"
"\n"
"static const int gridTable4x4[] = \n"
"{\n"
" 0,1,17,16,\n"
" 1,2,18,19,\n"
" 17,18,32,3,\n"
" 16,19,3,34\n"
"};\n"
"\n"
"static const int gridTable8x8[] = \n"
"{\n"
" 0, 2, 3, 16, 17, 18, 19, 1,\n"
" 66, 64, 80, 67, 82, 81, 65, 83,\n"
" 131,144,128,130,147,129,145,146,\n"
" 208,195,194,192,193,211,210,209,\n"
" 21, 22, 23, 5, 4, 6, 7, 20,\n"
" 86, 85, 69, 87, 70, 68, 84, 71,\n"
" 151,133,149,150,135,148,132,134,\n"
" 197,27,214,213,212,199,198,196\n"
" \n"
"};\n"
"\n"
"\n"
"\n"
"\n"
"#define USE_SPATIAL_BATCHING 1\n"
"#define USE_4x4_GRID 1\n"
"\n"
"__kernel\n"
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
@@ -462,18 +490,47 @@ static const char* solverSetup2CL= \
" if( gIdx < nContacts )\n"
" {\n"
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n"
"\n"
" int aIdx = abs(aPtrAndSignBit );\n"
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
" int bIdx = abs(bPtrAndSignBit);\n"
"\n"
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n"
"\n"
"#if USE_SPATIAL_BATCHING \n"
" int idx = (aStatic)? bIdx: aIdx;\n"
" float4 p = gBodies[idx].m_pos;\n"
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
" int newIndex = (xIdx+zIdx*N_SPLIT);\n"
" \n"
" gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);\n"
"#else//USE_SPATIAL_BATCHING\n"
" #if USE_4x4_GRID\n"
" int aa = aIdx&3;\n"
" int bb = bIdx&3;\n"
" if (aStatic)\n"
" aa = bb;\n"
" if (bStatic)\n"
" bb = aa;\n"
"\n"
" int gridIndex = aa + bb*4;\n"
" int newIndex = gridTable4x4[gridIndex];\n"
" #else//USE_4x4_GRID\n"
" int aa = aIdx&7;\n"
" int bb = bIdx&7;\n"
" if (aStatic)\n"
" aa = bb;\n"
" if (bStatic)\n"
" bb = aa;\n"
"\n"
" int gridIndex = aa + bb*8;\n"
" int newIndex = gridTable8x8[gridIndex];\n"
" #endif//USE_4x4_GRID\n"
"#endif//USE_SPATIAL_BATCHING\n"
"\n"
"\n"
" gSortDataOut[gIdx].x = newIndex;\n"
" gSortDataOut[gIdx].y = gIdx;\n"
" }\n"
" else\n"