add support for BVH acceleration for concave trianglemesh collision against convex hulls
bugfix/improvement in batching
This commit is contained in:
@@ -818,9 +818,10 @@ void Solver::sortContacts( const btOpenCLArray<btRigidBodyCL>* bodyBuf,
|
||||
|
||||
*/
|
||||
|
||||
void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
||||
void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
||||
{
|
||||
|
||||
|
||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
||||
{
|
||||
BT_PROFILE("batch generation");
|
||||
|
||||
@@ -829,7 +830,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
cdata.y = 0;
|
||||
cdata.z = staticIdx;
|
||||
|
||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
||||
|
||||
#ifdef BATCH_DEBUG
|
||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||
adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||
@@ -837,12 +838,14 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
gpuDebugInfo.write(debugInfo,numWorkItems);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( contacts->getBufferCL() ),
|
||||
btBufferInfoCL( m_contactBuffer->getBufferCL() ),
|
||||
btBufferInfoCL( nNative->getBufferCL() ),
|
||||
btBufferInfoCL( offsetsNative->getBufferCL() )
|
||||
btBufferInfoCL( offsetsNative->getBufferCL() ),
|
||||
#ifdef BATCH_DEBUG
|
||||
, btBufferInfoCL(&gpuDebugInfo)
|
||||
#endif
|
||||
@@ -899,7 +902,8 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
btAssert(m_contactBuffer->size()==nContacts);
|
||||
//contacts->copyFromOpenCLArray( *m_contactBuffer);
|
||||
//clFinish(m_queue);//needed?
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -132,7 +132,7 @@ class Solver : public SolverBase
|
||||
btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData,
|
||||
int nContacts, const ConstraintCfg& cfg );
|
||||
|
||||
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ struct btConfig
|
||||
int m_maxConvexUniqueEdges;
|
||||
|
||||
int m_maxCompoundChildShapes;
|
||||
|
||||
int m_maxTriConvexPairCapacity;
|
||||
|
||||
btConfig()
|
||||
:m_maxConvexBodies(128*1024),
|
||||
@@ -23,7 +25,8 @@ struct btConfig
|
||||
m_maxConvexVertices(8192),
|
||||
m_maxConvexIndices(8192),
|
||||
m_maxConvexUniqueEdges(8192),
|
||||
m_maxCompoundChildShapes(8192)//??
|
||||
m_maxCompoundChildShapes(8192),
|
||||
m_maxTriConvexPairCapacity(256*1024)
|
||||
{
|
||||
m_maxBroadphasePairs = 16*m_maxConvexBodies;
|
||||
}
|
||||
|
||||
@@ -202,7 +202,7 @@ btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
|
||||
|
||||
struct btConstraintCfg
|
||||
{
|
||||
btConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {}
|
||||
btConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(0) {}
|
||||
|
||||
float m_positionDrift;
|
||||
float m_positionConstraintCoeff;
|
||||
@@ -403,8 +403,8 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
float dt=1./60.;
|
||||
btConstraintCfg csCfg( dt );
|
||||
csCfg.m_enableParallelSolve = true;
|
||||
csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = -1;//m_static0Index;//m_planeBodyIndex;
|
||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
|
||||
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
||||
btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
||||
@@ -572,64 +572,65 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
bool compareGPU = false;
|
||||
if (nContacts)
|
||||
{
|
||||
if (gpuBatchContacts)
|
||||
{
|
||||
maxNumBatches=250;//for now
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts");
|
||||
btAlignedObjectArray<btContact4> cpuContacts;
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
if (gpuBatchContacts)
|
||||
{
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
maxNumBatches = 50;
|
||||
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts");
|
||||
btAlignedObjectArray<btContact4> cpuContacts;
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
|
||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||
|
||||
btAlignedObjectArray<unsigned int> nNativeHost;
|
||||
btAlignedObjectArray<unsigned int> offsetsNativeHost;
|
||||
btAlignedObjectArray<unsigned int> nNativeHost;
|
||||
btAlignedObjectArray<unsigned int> offsetsNativeHost;
|
||||
|
||||
{
|
||||
BT_PROFILE("countsNative/offsetsNative copyToHost");
|
||||
countsNative->copyToHost(nNativeHost);
|
||||
offsetsNative->copyToHost(offsetsNativeHost);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("countsNative/offsetsNative copyToHost");
|
||||
countsNative->copyToHost(nNativeHost);
|
||||
offsetsNative->copyToHost(offsetsNativeHost);
|
||||
}
|
||||
|
||||
|
||||
int numNonzeroGrid=0;
|
||||
int numNonzeroGrid=0;
|
||||
|
||||
{
|
||||
BT_PROFILE("batch grid");
|
||||
for(int i=0; i<BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; i++)
|
||||
{
|
||||
int n = (nNativeHost)[i];
|
||||
int offset = (offsetsNativeHost)[i];
|
||||
{
|
||||
BT_PROFILE("batch grid");
|
||||
for(int i=0; i<BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; i++)
|
||||
{
|
||||
int n = (nNativeHost)[i];
|
||||
int offset = (offsetsNativeHost)[i];
|
||||
|
||||
if( n )
|
||||
{
|
||||
numNonzeroGrid++;
|
||||
//printf("cpu batch\n");
|
||||
if( n )
|
||||
{
|
||||
numNonzeroGrid++;
|
||||
//printf("cpu batch\n");
|
||||
|
||||
int simdWidth = -1;
|
||||
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ); // on GPU
|
||||
maxNumBatches = btMax(numBatches,maxNumBatches);
|
||||
int simdWidth = -1;
|
||||
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||
maxNumBatches = btMax(numBatches,maxNumBatches);
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
clFinish(m_data->m_queue);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("m_contactBuffer->copyFromHost");
|
||||
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
||||
}
|
||||
// printf("maxNumBatches = %d\n", maxNumBatches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("m_contactBuffer->copyFromHost");
|
||||
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//printf("maxNumBatches = %d\n", maxNumBatches);
|
||||
|
||||
if (nContacts)
|
||||
{
|
||||
@@ -704,8 +705,14 @@ btAlignedObjectArray<btSortData> sortData;
|
||||
btAlignedObjectArray<btContact4> old;
|
||||
|
||||
|
||||
inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx)
|
||||
inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
btAlignedObjectArray<int> bodyUsed;
|
||||
bodyUsed.resize(numBodies);
|
||||
for (int q=0;q<numBodies;q++)
|
||||
bodyUsed[q]=0;
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch");
|
||||
int numIter = 0;
|
||||
|
||||
sortData.resize(n);
|
||||
@@ -747,12 +754,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
|
||||
/*if (bodyAS<0)
|
||||
printf("A static\n");
|
||||
|
||||
if (bodyBS<0)
|
||||
printf("B static\n");
|
||||
*/
|
||||
|
||||
|
||||
int bodyA = abs(bodyAS);
|
||||
int bodyB = abs(bodyBS);
|
||||
@@ -763,14 +765,20 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
|
||||
unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
|
||||
unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
|
||||
|
||||
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||
|
||||
//use inv_mass!
|
||||
aUnavailable = (bodyAS>=0)&&bodyAS!=staticIdx? aUnavailable:0;//
|
||||
bUnavailable = (bodyBS>=0)&&bodyBS!=staticIdx? bUnavailable:0;
|
||||
aUnavailable = !aIsStatic? aUnavailable:0;//
|
||||
bUnavailable = !bIsStatic? bUnavailable:0;
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
flg[ aIdx/32 ] |= (1<<(aIdx&31));
|
||||
flg[ bIdx/32 ] |= (1<<(bIdx&31));
|
||||
if (!!aIsStatic)
|
||||
flg[ aIdx/32 ] |= (1<<(aIdx&31));
|
||||
if (!bIsStatic)
|
||||
flg[ bIdx/32 ] |= (1<<(bIdx&31));
|
||||
|
||||
cs[idx].getBatchIdx() = batchIdx;
|
||||
sortData[idx].m_key = batchIdx;
|
||||
sortData[idx].m_value = idx;
|
||||
|
||||
@@ -15,7 +15,7 @@ protected:
|
||||
struct btGpuBatchingPgsSolverInternalData* m_data;
|
||||
|
||||
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx);
|
||||
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
||||
|
||||
|
||||
@@ -63,6 +63,10 @@ struct btGpuNarrowPhaseInternalData
|
||||
btAlignedObjectArray<btSapAabb>* m_localShapeAABBCPU;
|
||||
|
||||
btAlignedObjectArray<class btOptimizedBvh*> m_bvhData;
|
||||
btOpenCLArray<btQuantizedBvhNode>* m_treeNodesGPU;
|
||||
btOpenCLArray<btBvhSubtreeInfo>* m_subTreesGPU;
|
||||
|
||||
|
||||
btConfig m_config;
|
||||
|
||||
};
|
||||
@@ -137,6 +141,10 @@ m_queue(queue)
|
||||
m_data->m_numAcceleratedShapes = 0;
|
||||
m_data->m_numAcceleratedRigidBodies = 0;
|
||||
|
||||
m_data->m_treeNodesGPU = 0;
|
||||
m_data->m_subTreesGPU = 0;
|
||||
|
||||
|
||||
//m_data->m_contactCGPU = new btOpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false);
|
||||
//m_data->m_frictionCGPU = new btOpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs);
|
||||
|
||||
@@ -170,6 +178,8 @@ btGpuNarrowPhase::~btGpuNarrowPhase()
|
||||
delete m_data->m_worldVertsA1GPU;
|
||||
delete m_data->m_worldVertsB2GPU;
|
||||
|
||||
delete m_data->m_treeNodesGPU;
|
||||
delete m_data->m_subTreesGPU;
|
||||
|
||||
delete m_data->m_convexData;
|
||||
delete m_data;
|
||||
@@ -328,6 +338,15 @@ int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
|
||||
|
||||
int btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
|
||||
{
|
||||
//right now we only support one single mesh, it is on the todo to merge all mesh data etc
|
||||
btAssert(m_data->m_treeNodesGPU ==0);
|
||||
btAssert(m_data->m_subTreesGPU ==0);
|
||||
if (m_data->m_treeNodesGPU)
|
||||
{
|
||||
printf("error, only 1 single concave mesh supported at the moment\n");
|
||||
exit (0);
|
||||
}
|
||||
|
||||
btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]);
|
||||
|
||||
int collidableIndex = allocateCollidable();
|
||||
@@ -377,6 +396,17 @@ int btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
|
||||
meshInterface->addIndexedMesh(mesh);
|
||||
bvh->build(meshInterface, useQuantizedAabbCompression, (btVector3&)aabb.m_min, (btVector3&)aabb.m_max);
|
||||
m_data->m_bvhData.push_back(bvh);
|
||||
int numNodes = bvh->getQuantizedNodeArray().size();
|
||||
btOpenCLArray<btQuantizedBvhNode>* treeNodesGPU = new btOpenCLArray<btQuantizedBvhNode>(this->m_context,this->m_queue,numNodes);
|
||||
treeNodesGPU->copyFromHost(bvh->getQuantizedNodeArray());
|
||||
|
||||
int numSubTrees = bvh->getSubtreeInfoArray().size();
|
||||
btOpenCLArray<btBvhSubtreeInfo>* subTreesGPU = new btOpenCLArray<btBvhSubtreeInfo>(this->m_context,this->m_queue,numSubTrees);
|
||||
subTreesGPU->copyFromHost(bvh->getSubtreeInfoArray());
|
||||
|
||||
m_data->m_treeNodesGPU = treeNodesGPU;
|
||||
m_data->m_subTreesGPU = subTreesGPU;
|
||||
|
||||
|
||||
return collidableIndex;
|
||||
}
|
||||
@@ -412,7 +442,7 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
|
||||
{
|
||||
if (i%256==0)
|
||||
{
|
||||
printf("i=%d out of %d", i,convex.m_numFaces);
|
||||
//printf("i=%d out of %d", i,convex.m_numFaces);
|
||||
}
|
||||
btVector3 vert0(vertices->at(indices->at(i*3))*scaling);
|
||||
btVector3 vert1(vertices->at(indices->at(i*3+1))*scaling);
|
||||
@@ -524,7 +554,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
|
||||
{
|
||||
int nContactOut = 0;
|
||||
|
||||
int maxTriConvexPairCapacity = 8192;
|
||||
int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity;
|
||||
btOpenCLArray<btInt4> triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity);
|
||||
int numTriConvexPairsOut=0;
|
||||
|
||||
@@ -552,6 +582,8 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
|
||||
*m_data->m_worldVertsA1GPU,
|
||||
*m_data->m_worldVertsB2GPU,
|
||||
m_data->m_bvhData,
|
||||
m_data->m_treeNodesGPU,
|
||||
m_data->m_subTreesGPU,
|
||||
numObjects,
|
||||
maxTriConvexPairCapacity,
|
||||
triangleConvexPairs,
|
||||
|
||||
@@ -224,17 +224,20 @@ __kernel void CreateBatches( __global const Contact4* gConstraints, __global Con
|
||||
aAvailable = tryWrite( ldsCheckBuffer, ea );
|
||||
bAvailable = tryWrite( ldsCheckBuffer, eb );
|
||||
|
||||
aAvailable = (e.m_a<0)? 1: aAvailable;
|
||||
bAvailable = (e.m_b<0)? 1: bAvailable;
|
||||
bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);
|
||||
bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);
|
||||
|
||||
aAvailable = (e.m_a==m_staticIdx)? 1: aAvailable;
|
||||
bAvailable = (e.m_b==m_staticIdx)? 1: bAvailable;
|
||||
aAvailable = aStatic? 1: aAvailable;
|
||||
bAvailable = bStatic? 1: bAvailable;
|
||||
|
||||
bool success = (aAvailable && bAvailable);
|
||||
if(success)
|
||||
{
|
||||
writeBuf( ldsFixedBuffer, ea );
|
||||
writeBuf( ldsFixedBuffer, eb );
|
||||
|
||||
if (!aStatic)
|
||||
writeBuf( ldsFixedBuffer, ea );
|
||||
if (!bStatic)
|
||||
writeBuf( ldsFixedBuffer, eb );
|
||||
}
|
||||
done = success;
|
||||
}
|
||||
|
||||
@@ -226,17 +226,20 @@ static const char* batchingKernelsCL= \
|
||||
" aAvailable = tryWrite( ldsCheckBuffer, ea );\n"
|
||||
" bAvailable = tryWrite( ldsCheckBuffer, eb );\n"
|
||||
"\n"
|
||||
" aAvailable = (e.m_a<0)? 1: aAvailable;\n"
|
||||
" bAvailable = (e.m_b<0)? 1: bAvailable;\n"
|
||||
" bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n"
|
||||
" bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n"
|
||||
" \n"
|
||||
" aAvailable = (e.m_a==m_staticIdx)? 1: aAvailable;\n"
|
||||
" bAvailable = (e.m_b==m_staticIdx)? 1: bAvailable;\n"
|
||||
" aAvailable = aStatic? 1: aAvailable;\n"
|
||||
" bAvailable = bStatic? 1: bAvailable;\n"
|
||||
"\n"
|
||||
" bool success = (aAvailable && bAvailable);\n"
|
||||
" if(success)\n"
|
||||
" {\n"
|
||||
" writeBuf( ldsFixedBuffer, ea );\n"
|
||||
" writeBuf( ldsFixedBuffer, eb );\n"
|
||||
" \n"
|
||||
" if (!aStatic)\n"
|
||||
" writeBuf( ldsFixedBuffer, ea );\n"
|
||||
" if (!bStatic)\n"
|
||||
" writeBuf( ldsFixedBuffer, eb );\n"
|
||||
" }\n"
|
||||
" done = success;\n"
|
||||
" }\n"
|
||||
|
||||
Reference in New Issue
Block a user