experiment with first-level batching using object index instead of spatial hash in uniform grid (to avoid tuning average object size for uniform grid)
This commit is contained in:
@@ -15,9 +15,9 @@ subject to the following restrictions:
|
|||||||
|
|
||||||
|
|
||||||
///create 125 (5x5x5) dynamic object
|
///create 125 (5x5x5) dynamic object
|
||||||
#define ARRAY_SIZE_X 5
|
#define ARRAY_SIZE_X 30
|
||||||
#define ARRAY_SIZE_Y 5
|
#define ARRAY_SIZE_Y 20
|
||||||
#define ARRAY_SIZE_Z 5
|
#define ARRAY_SIZE_Z 30
|
||||||
|
|
||||||
//maximum number of objects (and allow user to shoot additional boxes)
|
//maximum number of objects (and allow user to shoot additional boxes)
|
||||||
#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024)
|
#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024)
|
||||||
@@ -164,7 +164,7 @@ void BasicGpuDemo::exitCL()
|
|||||||
BasicGpuDemo::BasicGpuDemo()
|
BasicGpuDemo::BasicGpuDemo()
|
||||||
{
|
{
|
||||||
m_clData = new btInternalData;
|
m_clData = new btInternalData;
|
||||||
setCameraDistance(btScalar(SCALING*20.));
|
setCameraDistance(btScalar(SCALING*120.));
|
||||||
this->setAzi(45);
|
this->setAzi(45);
|
||||||
this->setEle(45);
|
this->setEle(45);
|
||||||
|
|
||||||
@@ -222,7 +222,7 @@ void BasicGpuDemo::initPhysics()
|
|||||||
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
|
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
|
||||||
|
|
||||||
///create a few basic rigid bodies
|
///create a few basic rigid bodies
|
||||||
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
|
btBoxShape* groundShape = new btBoxShape(btVector3(btScalar(150.),btScalar(50.),btScalar(150.)));
|
||||||
//groundShape->initializePolyhedralFeatures();
|
//groundShape->initializePolyhedralFeatures();
|
||||||
// btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),50);
|
// btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),50);
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
|
|||||||
|
|
||||||
setupScene(ci);
|
setupScene(ci);
|
||||||
|
|
||||||
|
m_data->m_rigidBodyPipeline->writeAllInstancesToGpu();
|
||||||
np->writeAllBodiesToGpu();
|
np->writeAllBodiesToGpu();
|
||||||
bp->writeAabbsToGpu();
|
bp->writeAabbsToGpu();
|
||||||
|
|
||||||
|
|||||||
@@ -424,7 +424,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
float dt=1./60.;
|
float dt=1./60.;
|
||||||
b3ConstraintCfg csCfg( dt );
|
b3ConstraintCfg csCfg( dt );
|
||||||
csCfg.m_enableParallelSolve = true;
|
csCfg.m_enableParallelSolve = true;
|
||||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
csCfg.m_averageExtent = 0.3;//0.1;//2;//.2f;//@TODO m_averageObjExtent;
|
||||||
csCfg.m_staticIdx = static0Index;
|
csCfg.m_staticIdx = static0Index;
|
||||||
|
|
||||||
|
|
||||||
@@ -516,8 +516,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
if (gpuRadixSort)
|
if (gpuRadixSort)
|
||||||
{ // 3. sort by cell idx
|
{ // 3. sort by cell idx
|
||||||
B3_PROFILE("gpuRadixSort");
|
B3_PROFILE("gpuRadixSort");
|
||||||
int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||||
int sortBit = 32;
|
//int sortBit = 32;
|
||||||
//if( n <= 0xffff ) sortBit = 16;
|
//if( n <= 0xffff ) sortBit = 16;
|
||||||
//if( n <= 0xff ) sortBit = 8;
|
//if( n <= 0xff ) sortBit = 8;
|
||||||
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
|
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
|
||||||
@@ -581,6 +581,12 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
clFinish(m_data->m_queue);
|
clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
// {
|
||||||
|
// b3AlignedObjectArray<unsigned int> histogram;
|
||||||
|
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
|
||||||
|
// printf(",,,\n");
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
if (nContacts)
|
if (nContacts)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -551,6 +551,75 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||||
|
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
|
b3OpenCLArray<b3GpuConstraint4>* constraint,
|
||||||
|
b3OpenCLArray<unsigned int>* m_numConstraints,
|
||||||
|
b3OpenCLArray<unsigned int>* m_offsets,
|
||||||
|
int batchId
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||||
|
// b3BufferInfoCL( m_offsets->getBufferCL() )
|
||||||
|
|
||||||
|
const int nn = b3SolverBase::N_SPLIT*b3SolverBase::N_SPLIT;
|
||||||
|
int numWorkItems = 64*nn/b3SolverBase::N_BATCHES;
|
||||||
|
|
||||||
|
b3AlignedObjectArray<unsigned int> gN;
|
||||||
|
m_numConstraints->copyToHost(gN);
|
||||||
|
b3AlignedObjectArray<unsigned int> gOffsets;
|
||||||
|
m_offsets->copyToHost(gOffsets);
|
||||||
|
int nSplit = b3SolverBase::N_SPLIT;
|
||||||
|
int bIdx = batchId;
|
||||||
|
|
||||||
|
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
|
||||||
|
constraint->copyToHost(cpuConstraints);
|
||||||
|
|
||||||
|
printf("batch = %d\n", batchId);
|
||||||
|
|
||||||
|
int numWorkgroups = nn/b3SolverBase::N_BATCHES;
|
||||||
|
b3AlignedObjectArray<int> usedBodies;
|
||||||
|
|
||||||
|
|
||||||
|
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||||
|
{
|
||||||
|
printf("wgIdx = %d ", wgIdx);
|
||||||
|
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
||||||
|
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
||||||
|
int cellIdx = xIdx+yIdx*nSplit;
|
||||||
|
printf("cellIdx=%d\n",cellIdx);
|
||||||
|
if( gN[cellIdx] == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const int start = gOffsets[cellIdx];
|
||||||
|
const int end = start + gN[cellIdx];
|
||||||
|
|
||||||
|
for (int c=start;c<end;c++)
|
||||||
|
{
|
||||||
|
b3GpuConstraint4& constraint = cpuConstraints[c];
|
||||||
|
//printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB);
|
||||||
|
if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size())
|
||||||
|
{
|
||||||
|
printf("error?\n");
|
||||||
|
}
|
||||||
|
if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size())
|
||||||
|
{
|
||||||
|
printf("error?\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int c=start;c<end;c++)
|
||||||
|
{
|
||||||
|
b3GpuConstraint4& constraint = cpuConstraints[c];
|
||||||
|
usedBodies.push_back(constraint.m_bodyA);
|
||||||
|
usedBodies.push_back(constraint.m_bodyB);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool verify=false;
|
||||||
|
|
||||||
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||||
{
|
{
|
||||||
@@ -580,6 +649,12 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
{
|
{
|
||||||
for(int ib=0; ib<N_BATCHES; ib++)
|
for(int ib=0; ib<N_BATCHES; ib++)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (verify)
|
||||||
|
{
|
||||||
|
checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_ME
|
#ifdef DEBUG_ME
|
||||||
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
|
||||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class b3SolverBase
|
|||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
N_SPLIT = 16,
|
N_SPLIT = 16,
|
||||||
N_BATCHES = 4,
|
N_BATCHES = 4,//8,//4,
|
||||||
N_OBJ_PER_SPLIT = 10,
|
N_OBJ_PER_SPLIT = 10,
|
||||||
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
|
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -449,6 +449,34 @@ typedef struct
|
|||||||
int m_nSplit;
|
int m_nSplit;
|
||||||
} ConstBufferSSD;
|
} ConstBufferSSD;
|
||||||
|
|
||||||
|
|
||||||
|
static const int gridTable4x4[] =
|
||||||
|
{
|
||||||
|
0,1,17,16,
|
||||||
|
1,2,18,19,
|
||||||
|
17,18,32,3,
|
||||||
|
16,19,3,34
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int gridTable8x8[] =
|
||||||
|
{
|
||||||
|
0, 2, 3, 16, 17, 18, 19, 1,
|
||||||
|
66, 64, 80, 67, 82, 81, 65, 83,
|
||||||
|
131,144,128,130,147,129,145,146,
|
||||||
|
208,195,194,192,193,211,210,209,
|
||||||
|
21, 22, 23, 5, 4, 6, 7, 20,
|
||||||
|
86, 85, 69, 87, 70, 68, 84, 71,
|
||||||
|
151,133,149,150,135,148,132,134,
|
||||||
|
197,27,214,213,212,199,198,196
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define USE_SPATIAL_BATCHING 1
|
||||||
|
#define USE_4x4_GRID 1
|
||||||
|
|
||||||
__kernel
|
__kernel
|
||||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||||
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
||||||
@@ -460,18 +488,47 @@ int nContacts,float scale,int N_SPLIT, int staticIdx)
|
|||||||
if( gIdx < nContacts )
|
if( gIdx < nContacts )
|
||||||
{
|
{
|
||||||
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
|
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
|
||||||
|
int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;
|
||||||
|
|
||||||
int aIdx = abs(aPtrAndSignBit );
|
int aIdx = abs(aPtrAndSignBit );
|
||||||
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
|
int bIdx = abs(bPtrAndSignBit);
|
||||||
|
|
||||||
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
|
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
|
||||||
|
bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);
|
||||||
|
|
||||||
|
#if USE_SPATIAL_BATCHING
|
||||||
int idx = (aStatic)? bIdx: aIdx;
|
int idx = (aStatic)? bIdx: aIdx;
|
||||||
float4 p = gBodies[idx].m_pos;
|
float4 p = gBodies[idx].m_pos;
|
||||||
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
||||||
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
||||||
|
int newIndex = (xIdx+zIdx*N_SPLIT);
|
||||||
|
|
||||||
|
#else//USE_SPATIAL_BATCHING
|
||||||
|
#if USE_4x4_GRID
|
||||||
|
int aa = aIdx&3;
|
||||||
|
int bb = bIdx&3;
|
||||||
|
if (aStatic)
|
||||||
|
aa = bb;
|
||||||
|
if (bStatic)
|
||||||
|
bb = aa;
|
||||||
|
|
||||||
gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);
|
int gridIndex = aa + bb*4;
|
||||||
|
int newIndex = gridTable4x4[gridIndex];
|
||||||
|
#else//USE_4x4_GRID
|
||||||
|
int aa = aIdx&7;
|
||||||
|
int bb = bIdx&7;
|
||||||
|
if (aStatic)
|
||||||
|
aa = bb;
|
||||||
|
if (bStatic)
|
||||||
|
bb = aa;
|
||||||
|
|
||||||
|
int gridIndex = aa + bb*8;
|
||||||
|
int newIndex = gridTable8x8[gridIndex];
|
||||||
|
#endif//USE_4x4_GRID
|
||||||
|
#endif//USE_SPATIAL_BATCHING
|
||||||
|
|
||||||
|
|
||||||
|
gSortDataOut[gIdx].x = newIndex;
|
||||||
gSortDataOut[gIdx].y = gIdx;
|
gSortDataOut[gIdx].y = gIdx;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -451,6 +451,34 @@ static const char* solverSetup2CL= \
|
|||||||
" int m_nSplit;\n"
|
" int m_nSplit;\n"
|
||||||
"} ConstBufferSSD;\n"
|
"} ConstBufferSSD;\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"static const int gridTable4x4[] = \n"
|
||||||
|
"{\n"
|
||||||
|
" 0,1,17,16,\n"
|
||||||
|
" 1,2,18,19,\n"
|
||||||
|
" 17,18,32,3,\n"
|
||||||
|
" 16,19,3,34\n"
|
||||||
|
"};\n"
|
||||||
|
"\n"
|
||||||
|
"static const int gridTable8x8[] = \n"
|
||||||
|
"{\n"
|
||||||
|
" 0, 2, 3, 16, 17, 18, 19, 1,\n"
|
||||||
|
" 66, 64, 80, 67, 82, 81, 65, 83,\n"
|
||||||
|
" 131,144,128,130,147,129,145,146,\n"
|
||||||
|
" 208,195,194,192,193,211,210,209,\n"
|
||||||
|
" 21, 22, 23, 5, 4, 6, 7, 20,\n"
|
||||||
|
" 86, 85, 69, 87, 70, 68, 84, 71,\n"
|
||||||
|
" 151,133,149,150,135,148,132,134,\n"
|
||||||
|
" 197,27,214,213,212,199,198,196\n"
|
||||||
|
" \n"
|
||||||
|
"};\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#define USE_SPATIAL_BATCHING 1\n"
|
||||||
|
"#define USE_4x4_GRID 1\n"
|
||||||
|
"\n"
|
||||||
"__kernel\n"
|
"__kernel\n"
|
||||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||||
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
||||||
@@ -462,18 +490,47 @@ static const char* solverSetup2CL= \
|
|||||||
" if( gIdx < nContacts )\n"
|
" if( gIdx < nContacts )\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
|
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
|
||||||
|
" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" int aIdx = abs(aPtrAndSignBit );\n"
|
" int aIdx = abs(aPtrAndSignBit );\n"
|
||||||
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
|
" int bIdx = abs(bPtrAndSignBit);\n"
|
||||||
"\n"
|
"\n"
|
||||||
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
|
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
|
||||||
" \n"
|
" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n"
|
||||||
|
"\n"
|
||||||
|
"#if USE_SPATIAL_BATCHING \n"
|
||||||
" int idx = (aStatic)? bIdx: aIdx;\n"
|
" int idx = (aStatic)? bIdx: aIdx;\n"
|
||||||
" float4 p = gBodies[idx].m_pos;\n"
|
" float4 p = gBodies[idx].m_pos;\n"
|
||||||
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
||||||
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
||||||
|
" int newIndex = (xIdx+zIdx*N_SPLIT);\n"
|
||||||
|
" \n"
|
||||||
|
"#else//USE_SPATIAL_BATCHING\n"
|
||||||
|
" #if USE_4x4_GRID\n"
|
||||||
|
" int aa = aIdx&3;\n"
|
||||||
|
" int bb = bIdx&3;\n"
|
||||||
|
" if (aStatic)\n"
|
||||||
|
" aa = bb;\n"
|
||||||
|
" if (bStatic)\n"
|
||||||
|
" bb = aa;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);\n"
|
" int gridIndex = aa + bb*4;\n"
|
||||||
|
" int newIndex = gridTable4x4[gridIndex];\n"
|
||||||
|
" #else//USE_4x4_GRID\n"
|
||||||
|
" int aa = aIdx&7;\n"
|
||||||
|
" int bb = bIdx&7;\n"
|
||||||
|
" if (aStatic)\n"
|
||||||
|
" aa = bb;\n"
|
||||||
|
" if (bStatic)\n"
|
||||||
|
" bb = aa;\n"
|
||||||
|
"\n"
|
||||||
|
" int gridIndex = aa + bb*8;\n"
|
||||||
|
" int newIndex = gridTable8x8[gridIndex];\n"
|
||||||
|
" #endif//USE_4x4_GRID\n"
|
||||||
|
"#endif//USE_SPATIAL_BATCHING\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
" gSortDataOut[gIdx].x = newIndex;\n"
|
||||||
" gSortDataOut[gIdx].y = gIdx;\n"
|
" gSortDataOut[gIdx].y = gIdx;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" else\n"
|
" else\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user