add support for BVH acceleration for concave trianglemesh collision against convex hulls

bugfix/improvement in batching
This commit is contained in:
erwin coumans
2013-03-20 23:37:34 -07:00
parent 9a693fb850
commit b4f9416cdf
20 changed files with 760 additions and 690 deletions

View File

@@ -818,9 +818,10 @@ void Solver::sortContacts( const btOpenCLArray<btRigidBodyCL>* bodyBuf,
*/
void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
{
int numWorkItems = 64*N_SPLIT*N_SPLIT;
{
BT_PROFILE("batch generation");
@@ -829,7 +830,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
cdata.y = 0;
cdata.z = staticIdx;
int numWorkItems = 64*N_SPLIT*N_SPLIT;
#ifdef BATCH_DEBUG
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
@@ -837,12 +838,14 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
gpuDebugInfo.write(debugInfo,numWorkItems);
#endif
btBufferInfoCL bInfo[] = {
btBufferInfoCL( contacts->getBufferCL() ),
btBufferInfoCL( m_contactBuffer->getBufferCL() ),
btBufferInfoCL( nNative->getBufferCL() ),
btBufferInfoCL( offsetsNative->getBufferCL() )
btBufferInfoCL( offsetsNative->getBufferCL() ),
#ifdef BATCH_DEBUG
, btBufferInfoCL(&gpuDebugInfo)
#endif
@@ -899,7 +902,8 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
btAssert(m_contactBuffer->size()==nContacts);
//contacts->copyFromOpenCLArray( *m_contactBuffer);
//clFinish(m_queue);//needed?
}

View File

@@ -132,7 +132,7 @@ class Solver : public SolverBase
btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData,
int nContacts, const ConstraintCfg& cfg );
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
};

View File

@@ -14,6 +14,8 @@ struct btConfig
int m_maxConvexUniqueEdges;
int m_maxCompoundChildShapes;
int m_maxTriConvexPairCapacity;
btConfig()
:m_maxConvexBodies(128*1024),
@@ -23,7 +25,8 @@ struct btConfig
m_maxConvexVertices(8192),
m_maxConvexIndices(8192),
m_maxConvexUniqueEdges(8192),
m_maxCompoundChildShapes(8192)//??
m_maxCompoundChildShapes(8192),
m_maxTriConvexPairCapacity(256*1024)
{
m_maxBroadphasePairs = 16*m_maxConvexBodies;
}

View File

@@ -202,7 +202,7 @@ btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
struct btConstraintCfg
{
btConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {}
btConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(0) {}
float m_positionDrift;
float m_positionConstraintCoeff;
@@ -403,8 +403,8 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
float dt=1./60.;
btConstraintCfg csCfg( dt );
csCfg.m_enableParallelSolve = true;
csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = -1;//m_static0Index;//m_planeBodyIndex;
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
@@ -572,64 +572,65 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
bool compareGPU = false;
if (nContacts)
{
if (gpuBatchContacts)
{
maxNumBatches=250;//for now
BT_PROFILE("gpu batchContacts");
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
} else
{
BT_PROFILE("cpu batchContacts");
btAlignedObjectArray<btContact4> cpuContacts;
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
contactsIn->copyToHost(cpuContacts);
if (gpuBatchContacts)
{
BT_PROFILE("gpu batchContacts");
maxNumBatches = 50;
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
} else
{
BT_PROFILE("cpu batchContacts");
btAlignedObjectArray<btContact4> cpuContacts;
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
contactsIn->copyToHost(cpuContacts);
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
btAlignedObjectArray<unsigned int> nNativeHost;
btAlignedObjectArray<unsigned int> offsetsNativeHost;
btAlignedObjectArray<unsigned int> nNativeHost;
btAlignedObjectArray<unsigned int> offsetsNativeHost;
{
BT_PROFILE("countsNative/offsetsNative copyToHost");
countsNative->copyToHost(nNativeHost);
offsetsNative->copyToHost(offsetsNativeHost);
}
{
BT_PROFILE("countsNative/offsetsNative copyToHost");
countsNative->copyToHost(nNativeHost);
offsetsNative->copyToHost(offsetsNativeHost);
}
int numNonzeroGrid=0;
int numNonzeroGrid=0;
{
BT_PROFILE("batch grid");
for(int i=0; i<BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; i++)
{
int n = (nNativeHost)[i];
int offset = (offsetsNativeHost)[i];
{
BT_PROFILE("batch grid");
for(int i=0; i<BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; i++)
{
int n = (nNativeHost)[i];
int offset = (offsetsNativeHost)[i];
if( n )
{
numNonzeroGrid++;
//printf("cpu batch\n");
if( n )
{
numNonzeroGrid++;
//printf("cpu batch\n");
int simdWidth = -1;
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ); // on GPU
maxNumBatches = btMax(numBatches,maxNumBatches);
int simdWidth = -1;
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
maxNumBatches = btMax(numBatches,maxNumBatches);
clFinish(m_data->m_queue);
clFinish(m_data->m_queue);
}
}
}
{
BT_PROFILE("m_contactBuffer->copyFromHost");
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
}
// printf("maxNumBatches = %d\n", maxNumBatches);
}
}
}
}
{
BT_PROFILE("m_contactBuffer->copyFromHost");
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
}
}
}
//printf("maxNumBatches = %d\n", maxNumBatches);
if (nContacts)
{
@@ -704,8 +705,14 @@ btAlignedObjectArray<btSortData> sortData;
btAlignedObjectArray<btContact4> old;
inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx)
inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
{
btAlignedObjectArray<int> bodyUsed;
bodyUsed.resize(numBodies);
for (int q=0;q<numBodies;q++)
bodyUsed[q]=0;
BT_PROFILE("sortConstraintByBatch");
int numIter = 0;
sortData.resize(n);
@@ -747,12 +754,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
/*if (bodyAS<0)
printf("A static\n");
if (bodyBS<0)
printf("B static\n");
*/
int bodyA = abs(bodyAS);
int bodyB = abs(bodyBS);
@@ -763,14 +765,20 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
//use inv_mass!
aUnavailable = (bodyAS>=0)&&bodyAS!=staticIdx? aUnavailable:0;//
bUnavailable = (bodyBS>=0)&&bodyBS!=staticIdx? bUnavailable:0;
aUnavailable = !aIsStatic? aUnavailable:0;//
bUnavailable = !bIsStatic? bUnavailable:0;
if( aUnavailable==0 && bUnavailable==0 ) // ok
{
flg[ aIdx/32 ] |= (1<<(aIdx&31));
flg[ bIdx/32 ] |= (1<<(bIdx&31));
if (!!aIsStatic)
flg[ aIdx/32 ] |= (1<<(aIdx&31));
if (!bIsStatic)
flg[ bIdx/32 ] |= (1<<(bIdx&31));
cs[idx].getBatchIdx() = batchIdx;
sortData[idx].m_key = batchIdx;
sortData[idx].m_value = idx;

View File

@@ -15,7 +15,7 @@ protected:
struct btGpuBatchingPgsSolverInternalData* m_data;
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx);
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);

View File

@@ -63,6 +63,10 @@ struct btGpuNarrowPhaseInternalData
btAlignedObjectArray<btSapAabb>* m_localShapeAABBCPU;
btAlignedObjectArray<class btOptimizedBvh*> m_bvhData;
btOpenCLArray<btQuantizedBvhNode>* m_treeNodesGPU;
btOpenCLArray<btBvhSubtreeInfo>* m_subTreesGPU;
btConfig m_config;
};
@@ -137,6 +141,10 @@ m_queue(queue)
m_data->m_numAcceleratedShapes = 0;
m_data->m_numAcceleratedRigidBodies = 0;
m_data->m_treeNodesGPU = 0;
m_data->m_subTreesGPU = 0;
//m_data->m_contactCGPU = new btOpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false);
//m_data->m_frictionCGPU = new btOpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs);
@@ -170,6 +178,8 @@ btGpuNarrowPhase::~btGpuNarrowPhase()
delete m_data->m_worldVertsA1GPU;
delete m_data->m_worldVertsB2GPU;
delete m_data->m_treeNodesGPU;
delete m_data->m_subTreesGPU;
delete m_data->m_convexData;
delete m_data;
@@ -328,6 +338,15 @@ int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
int btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
{
//right now we only support one single mesh, it is on the todo to merge all mesh data etc
btAssert(m_data->m_treeNodesGPU ==0);
btAssert(m_data->m_subTreesGPU ==0);
if (m_data->m_treeNodesGPU)
{
printf("error, only 1 single concave mesh supported at the moment\n");
exit (0);
}
btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]);
int collidableIndex = allocateCollidable();
@@ -377,6 +396,17 @@ int btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
meshInterface->addIndexedMesh(mesh);
bvh->build(meshInterface, useQuantizedAabbCompression, (btVector3&)aabb.m_min, (btVector3&)aabb.m_max);
m_data->m_bvhData.push_back(bvh);
int numNodes = bvh->getQuantizedNodeArray().size();
btOpenCLArray<btQuantizedBvhNode>* treeNodesGPU = new btOpenCLArray<btQuantizedBvhNode>(this->m_context,this->m_queue,numNodes);
treeNodesGPU->copyFromHost(bvh->getQuantizedNodeArray());
int numSubTrees = bvh->getSubtreeInfoArray().size();
btOpenCLArray<btBvhSubtreeInfo>* subTreesGPU = new btOpenCLArray<btBvhSubtreeInfo>(this->m_context,this->m_queue,numSubTrees);
subTreesGPU->copyFromHost(bvh->getSubtreeInfoArray());
m_data->m_treeNodesGPU = treeNodesGPU;
m_data->m_subTreesGPU = subTreesGPU;
return collidableIndex;
}
@@ -412,7 +442,7 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
{
if (i%256==0)
{
printf("i=%d out of %d", i,convex.m_numFaces);
//printf("i=%d out of %d", i,convex.m_numFaces);
}
btVector3 vert0(vertices->at(indices->at(i*3))*scaling);
btVector3 vert1(vertices->at(indices->at(i*3+1))*scaling);
@@ -524,7 +554,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
{
int nContactOut = 0;
int maxTriConvexPairCapacity = 8192;
int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity;
btOpenCLArray<btInt4> triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity);
int numTriConvexPairsOut=0;
@@ -552,6 +582,8 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
*m_data->m_worldVertsA1GPU,
*m_data->m_worldVertsB2GPU,
m_data->m_bvhData,
m_data->m_treeNodesGPU,
m_data->m_subTreesGPU,
numObjects,
maxTriConvexPairCapacity,
triangleConvexPairs,

View File

@@ -224,17 +224,20 @@ __kernel void CreateBatches( __global const Contact4* gConstraints, __global Con
aAvailable = tryWrite( ldsCheckBuffer, ea );
bAvailable = tryWrite( ldsCheckBuffer, eb );
aAvailable = (e.m_a<0)? 1: aAvailable;
bAvailable = (e.m_b<0)? 1: bAvailable;
bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);
bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);
aAvailable = (e.m_a==m_staticIdx)? 1: aAvailable;
bAvailable = (e.m_b==m_staticIdx)? 1: bAvailable;
aAvailable = aStatic? 1: aAvailable;
bAvailable = bStatic? 1: bAvailable;
bool success = (aAvailable && bAvailable);
if(success)
{
writeBuf( ldsFixedBuffer, ea );
writeBuf( ldsFixedBuffer, eb );
if (!aStatic)
writeBuf( ldsFixedBuffer, ea );
if (!bStatic)
writeBuf( ldsFixedBuffer, eb );
}
done = success;
}

View File

@@ -226,17 +226,20 @@ static const char* batchingKernelsCL= \
" aAvailable = tryWrite( ldsCheckBuffer, ea );\n"
" bAvailable = tryWrite( ldsCheckBuffer, eb );\n"
"\n"
" aAvailable = (e.m_a<0)? 1: aAvailable;\n"
" bAvailable = (e.m_b<0)? 1: bAvailable;\n"
" bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n"
" bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n"
" \n"
" aAvailable = (e.m_a==m_staticIdx)? 1: aAvailable;\n"
" bAvailable = (e.m_b==m_staticIdx)? 1: bAvailable;\n"
" aAvailable = aStatic? 1: aAvailable;\n"
" bAvailable = bStatic? 1: bAvailable;\n"
"\n"
" bool success = (aAvailable && bAvailable);\n"
" if(success)\n"
" {\n"
" writeBuf( ldsFixedBuffer, ea );\n"
" writeBuf( ldsFixedBuffer, eb );\n"
" \n"
" if (!aStatic)\n"
" writeBuf( ldsFixedBuffer, ea );\n"
" if (!bStatic)\n"
" writeBuf( ldsFixedBuffer, eb );\n"
" }\n"
" done = success;\n"
" }\n"