experiment with first-level batching using object index instead of spatial hash in uniform grid (to avoid tuning average object size for uniform grid)
This commit is contained in:
@@ -15,9 +15,9 @@ subject to the following restrictions:
|
||||
|
||||
|
||||
///create 125 (5x5x5) dynamic object
|
||||
#define ARRAY_SIZE_X 5
|
||||
#define ARRAY_SIZE_Y 5
|
||||
#define ARRAY_SIZE_Z 5
|
||||
#define ARRAY_SIZE_X 30
|
||||
#define ARRAY_SIZE_Y 20
|
||||
#define ARRAY_SIZE_Z 30
|
||||
|
||||
//maximum number of objects (and allow user to shoot additional boxes)
|
||||
#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024)
|
||||
@@ -164,7 +164,7 @@ void BasicGpuDemo::exitCL()
|
||||
BasicGpuDemo::BasicGpuDemo()
|
||||
{
|
||||
m_clData = new btInternalData;
|
||||
setCameraDistance(btScalar(SCALING*20.));
|
||||
setCameraDistance(btScalar(SCALING*120.));
|
||||
this->setAzi(45);
|
||||
this->setEle(45);
|
||||
|
||||
@@ -222,7 +222,7 @@ void BasicGpuDemo::initPhysics()
|
||||
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
|
||||
|
||||
///create a few basic rigid bodies
|
||||
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
|
||||
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(150.),btScalar(50.),btScalar(150.)));
|
||||
//groundShape->initializePolyhedralFeatures();
|
||||
// btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),50);
|
||||
|
||||
|
||||
@@ -119,6 +119,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
|
||||
|
||||
setupScene(ci);
|
||||
|
||||
m_data->m_rigidBodyPipeline->writeAllInstancesToGpu();
|
||||
np->writeAllBodiesToGpu();
|
||||
bp->writeAabbsToGpu();
|
||||
|
||||
|
||||
@@ -424,7 +424,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
float dt=1./60.;
|
||||
b3ConstraintCfg csCfg( dt );
|
||||
csCfg.m_enableParallelSolve = true;
|
||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_averageExtent = 0.3;//0.1;//2;//.2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = static0Index;
|
||||
|
||||
|
||||
@@ -516,8 +516,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (gpuRadixSort)
|
||||
{ // 3. sort by cell idx
|
||||
B3_PROFILE("gpuRadixSort");
|
||||
int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||
int sortBit = 32;
|
||||
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||
//int sortBit = 32;
|
||||
//if( n <= 0xffff ) sortBit = 16;
|
||||
//if( n <= 0xff ) sortBit = 8;
|
||||
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
|
||||
@@ -581,6 +581,12 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
|
||||
// {
|
||||
// b3AlignedObjectArray<unsigned int> histogram;
|
||||
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
|
||||
// printf(",,,\n");
|
||||
// }
|
||||
|
||||
|
||||
if (nContacts)
|
||||
{
|
||||
|
||||
@@ -551,6 +551,75 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
||||
|
||||
}
|
||||
|
||||
void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint,
|
||||
b3OpenCLArray<unsigned int>* m_numConstraints,
|
||||
b3OpenCLArray<unsigned int>* m_offsets,
|
||||
int batchId
|
||||
)
|
||||
{
|
||||
// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||
// b3BufferInfoCL( m_offsets->getBufferCL() )
|
||||
|
||||
const int nn = b3SolverBase::N_SPLIT*b3SolverBase::N_SPLIT;
|
||||
int numWorkItems = 64*nn/b3SolverBase::N_BATCHES;
|
||||
|
||||
b3AlignedObjectArray<unsigned int> gN;
|
||||
m_numConstraints->copyToHost(gN);
|
||||
b3AlignedObjectArray<unsigned int> gOffsets;
|
||||
m_offsets->copyToHost(gOffsets);
|
||||
int nSplit = b3SolverBase::N_SPLIT;
|
||||
int bIdx = batchId;
|
||||
|
||||
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
|
||||
constraint->copyToHost(cpuConstraints);
|
||||
|
||||
printf("batch = %d\n", batchId);
|
||||
|
||||
int numWorkgroups = nn/b3SolverBase::N_BATCHES;
|
||||
b3AlignedObjectArray<int> usedBodies;
|
||||
|
||||
|
||||
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||
{
|
||||
printf("wgIdx = %d ", wgIdx);
|
||||
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
||||
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
||||
int cellIdx = xIdx+yIdx*nSplit;
|
||||
printf("cellIdx=%d\n",cellIdx);
|
||||
if( gN[cellIdx] == 0 )
|
||||
continue;
|
||||
|
||||
const int start = gOffsets[cellIdx];
|
||||
const int end = start + gN[cellIdx];
|
||||
|
||||
for (int c=start;c<end;c++)
|
||||
{
|
||||
b3GpuConstraint4& constraint = cpuConstraints[c];
|
||||
//printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB);
|
||||
if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size())
|
||||
{
|
||||
printf("error?\n");
|
||||
}
|
||||
if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size())
|
||||
{
|
||||
printf("error?\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (int c=start;c<end;c++)
|
||||
{
|
||||
b3GpuConstraint4& constraint = cpuConstraints[c];
|
||||
usedBodies.push_back(constraint.m_bodyA);
|
||||
usedBodies.push_back(constraint.m_bodyB);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify=false;
|
||||
|
||||
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||
{
|
||||
@@ -580,6 +649,12 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
||||
{
|
||||
for(int ib=0; ib<N_BATCHES; ib++)
|
||||
{
|
||||
|
||||
if (verify)
|
||||
{
|
||||
checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ME
|
||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
|
||||
@@ -54,7 +54,7 @@ class b3SolverBase
|
||||
enum
|
||||
{
|
||||
N_SPLIT = 16,
|
||||
N_BATCHES = 4,
|
||||
N_BATCHES = 4,//8,//4,
|
||||
N_OBJ_PER_SPLIT = 10,
|
||||
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
|
||||
};
|
||||
|
||||
@@ -449,6 +449,34 @@ typedef struct
|
||||
int m_nSplit;
|
||||
} ConstBufferSSD;
|
||||
|
||||
|
||||
static const int gridTable4x4[] =
|
||||
{
|
||||
0,1,17,16,
|
||||
1,2,18,19,
|
||||
17,18,32,3,
|
||||
16,19,3,34
|
||||
};
|
||||
|
||||
static const int gridTable8x8[] =
|
||||
{
|
||||
0, 2, 3, 16, 17, 18, 19, 1,
|
||||
66, 64, 80, 67, 82, 81, 65, 83,
|
||||
131,144,128,130,147,129,145,146,
|
||||
208,195,194,192,193,211,210,209,
|
||||
21, 22, 23, 5, 4, 6, 7, 20,
|
||||
86, 85, 69, 87, 70, 68, 84, 71,
|
||||
151,133,149,150,135,148,132,134,
|
||||
197,27,214,213,212,199,198,196
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#define USE_SPATIAL_BATCHING 1
|
||||
#define USE_4x4_GRID 1
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
||||
@@ -460,18 +488,47 @@ int nContacts,float scale,int N_SPLIT, int staticIdx)
|
||||
if( gIdx < nContacts )
|
||||
{
|
||||
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
|
||||
int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;
|
||||
|
||||
int aIdx = abs(aPtrAndSignBit );
|
||||
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
|
||||
int bIdx = abs(bPtrAndSignBit);
|
||||
|
||||
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
|
||||
bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);
|
||||
|
||||
#if USE_SPATIAL_BATCHING
|
||||
int idx = (aStatic)? bIdx: aIdx;
|
||||
float4 p = gBodies[idx].m_pos;
|
||||
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
||||
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
||||
int newIndex = (xIdx+zIdx*N_SPLIT);
|
||||
|
||||
gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);
|
||||
#else//USE_SPATIAL_BATCHING
|
||||
#if USE_4x4_GRID
|
||||
int aa = aIdx&3;
|
||||
int bb = bIdx&3;
|
||||
if (aStatic)
|
||||
aa = bb;
|
||||
if (bStatic)
|
||||
bb = aa;
|
||||
|
||||
int gridIndex = aa + bb*4;
|
||||
int newIndex = gridTable4x4[gridIndex];
|
||||
#else//USE_4x4_GRID
|
||||
int aa = aIdx&7;
|
||||
int bb = bIdx&7;
|
||||
if (aStatic)
|
||||
aa = bb;
|
||||
if (bStatic)
|
||||
bb = aa;
|
||||
|
||||
int gridIndex = aa + bb*8;
|
||||
int newIndex = gridTable8x8[gridIndex];
|
||||
#endif//USE_4x4_GRID
|
||||
#endif//USE_SPATIAL_BATCHING
|
||||
|
||||
|
||||
gSortDataOut[gIdx].x = newIndex;
|
||||
gSortDataOut[gIdx].y = gIdx;
|
||||
}
|
||||
else
|
||||
|
||||
@@ -451,6 +451,34 @@ static const char* solverSetup2CL= \
|
||||
" int m_nSplit;\n"
|
||||
"} ConstBufferSSD;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"static const int gridTable4x4[] = \n"
|
||||
"{\n"
|
||||
" 0,1,17,16,\n"
|
||||
" 1,2,18,19,\n"
|
||||
" 17,18,32,3,\n"
|
||||
" 16,19,3,34\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"static const int gridTable8x8[] = \n"
|
||||
"{\n"
|
||||
" 0, 2, 3, 16, 17, 18, 19, 1,\n"
|
||||
" 66, 64, 80, 67, 82, 81, 65, 83,\n"
|
||||
" 131,144,128,130,147,129,145,146,\n"
|
||||
" 208,195,194,192,193,211,210,209,\n"
|
||||
" 21, 22, 23, 5, 4, 6, 7, 20,\n"
|
||||
" 86, 85, 69, 87, 70, 68, 84, 71,\n"
|
||||
" 151,133,149,150,135,148,132,134,\n"
|
||||
" 197,27,214,213,212,199,198,196\n"
|
||||
" \n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define USE_SPATIAL_BATCHING 1\n"
|
||||
"#define USE_4x4_GRID 1\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
||||
@@ -462,18 +490,47 @@ static const char* solverSetup2CL= \
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
|
||||
" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int aIdx = abs(aPtrAndSignBit );\n"
|
||||
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
|
||||
" int bIdx = abs(bPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
|
||||
" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n"
|
||||
"\n"
|
||||
"#if USE_SPATIAL_BATCHING \n"
|
||||
" int idx = (aStatic)? bIdx: aIdx;\n"
|
||||
" float4 p = gBodies[idx].m_pos;\n"
|
||||
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
||||
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
||||
" int newIndex = (xIdx+zIdx*N_SPLIT);\n"
|
||||
" \n"
|
||||
" gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);\n"
|
||||
"#else//USE_SPATIAL_BATCHING\n"
|
||||
" #if USE_4x4_GRID\n"
|
||||
" int aa = aIdx&3;\n"
|
||||
" int bb = bIdx&3;\n"
|
||||
" if (aStatic)\n"
|
||||
" aa = bb;\n"
|
||||
" if (bStatic)\n"
|
||||
" bb = aa;\n"
|
||||
"\n"
|
||||
" int gridIndex = aa + bb*4;\n"
|
||||
" int newIndex = gridTable4x4[gridIndex];\n"
|
||||
" #else//USE_4x4_GRID\n"
|
||||
" int aa = aIdx&7;\n"
|
||||
" int bb = bIdx&7;\n"
|
||||
" if (aStatic)\n"
|
||||
" aa = bb;\n"
|
||||
" if (bStatic)\n"
|
||||
" bb = aa;\n"
|
||||
"\n"
|
||||
" int gridIndex = aa + bb*8;\n"
|
||||
" int newIndex = gridTable8x8[gridIndex];\n"
|
||||
" #endif//USE_4x4_GRID\n"
|
||||
"#endif//USE_SPATIAL_BATCHING\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" gSortDataOut[gIdx].x = newIndex;\n"
|
||||
" gSortDataOut[gIdx].y = gIdx;\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
|
||||
Reference in New Issue
Block a user