faster scene construction when not using instancing (avoid copy individual vertices/shapes to GPU, but do it afterwards in a batch), copy GPU data in 'writeAllBodiesToGpu' method

add option --no_instanced_collision_shapes, this only applies to the 2 benchmark screnes at the moment
always use the user specified b3Config file, so the settings can be set at runtime, not just at compile time
adjust default constants in b3Config (needs more tweaking, ideally at run-time)
This commit is contained in:
erwin coumans
2013-06-08 11:08:44 -07:00
parent 3f10082aa3
commit 100449d76f
11 changed files with 140 additions and 61 deletions

View File

@@ -20,17 +20,17 @@ struct b3Config
b3Config()
#ifdef __APPLE__
:m_maxConvexBodies(64*1024),
:m_maxConvexBodies(32*1024),
#else
:m_maxConvexBodies(128*1024),
:m_maxConvexBodies(32*1024),
#endif
m_maxConvexShapes(8192),
m_maxConvexShapes(81920),
m_maxVerticesPerFace(64),
m_maxFacesPerShape(64),
m_maxConvexVertices(8192),
m_maxConvexIndices(8192),
m_maxConvexUniqueEdges(8192),
m_maxCompoundChildShapes(8192),
m_maxConvexVertices(8192000),
m_maxConvexIndices(8192000),
m_maxConvexUniqueEdges(819200),
m_maxCompoundChildShapes(81920),
m_maxTriConvexPairCapacity(512*1024)
//m_maxTriConvexPairCapacity(256*1024)
{

View File

@@ -83,7 +83,6 @@ struct b3GpuNarrowPhaseInternalData
b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config)
:m_data(0) ,m_planeBodyIndex(-1),m_static0Index(-1),
m_context(ctx),
@@ -94,6 +93,7 @@ m_queue(queue)
m_data = new b3GpuNarrowPhaseInternalData();
memset(m_data,0,sizeof(b3GpuNarrowPhaseInternalData));
m_data->m_config = config;
m_data->m_gpuSatCollision = new GpuSatCollision(ctx,device,queue);
@@ -115,6 +115,7 @@ m_queue(queue)
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,queue,config.m_maxConvexBodies,false);
m_data->m_collidablesGPU = new b3OpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes);
m_data->m_collidablesCPU.reserve(config.m_maxConvexShapes);
m_data->m_localShapeAABBCPU = new b3AlignedObjectArray<b3SapAabb>;
m_data->m_localShapeAABBGPU = new b3OpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes);
@@ -124,13 +125,23 @@ m_queue(queue)
m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
m_data->m_convexFacesGPU = new b3OpenCLArray<b3GpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false);
m_data->m_convexFaces.reserve(config.m_maxConvexShapes*config.m_maxFacesPerShape);
m_data->m_gpuChildShapes = new b3OpenCLArray<b3GpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false);
m_data->m_convexPolyhedraGPU = new b3OpenCLArray<b3ConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
m_data->m_convexPolyhedra.reserve(config.m_maxConvexShapes);
m_data->m_uniqueEdgesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexUniqueEdges,true);
m_data->m_uniqueEdges.reserve(config.m_maxConvexUniqueEdges);
m_data->m_convexVerticesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexVertices,true);
m_data->m_convexVertices.reserve(config.m_maxConvexVertices);
m_data->m_convexIndicesGPU = new b3OpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true);
m_data->m_convexIndices.reserve(config.m_maxConvexIndices);
m_data->m_worldVertsB1GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace);
m_data->m_clippingFacesOutGPU = new b3OpenCLArray<b3Int4>(ctx,queue,config.m_maxConvexBodies);
@@ -141,7 +152,6 @@ m_queue(queue)
m_data->m_convexData = new b3AlignedObjectArray<b3ConvexUtility* >();
m_data->m_convexData->resize(config.m_maxConvexShapes);
m_data->m_convexPolyhedra.resize(config.m_maxConvexShapes);
@@ -237,7 +247,7 @@ int b3GpuNarrowPhase::registerSphereShape(float radius)
aabb.m_signedMaxIndices[3] = 0;
m_data->m_localShapeAABBCPU->push_back(aabb);
m_data->m_localShapeAABBGPU->push_back(aabb);
// m_data->m_localShapeAABBGPU->push_back(aabb);
clFinish(m_queue);
}
@@ -253,7 +263,6 @@ int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConsta
face.m_plane[1] = faceNormal.getY();
face.m_plane[2] = faceNormal.getZ();
face.m_plane[3] = faceConstant;
m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces);
return faceOffset;
}
@@ -280,7 +289,7 @@ int b3GpuNarrowPhase::registerPlaneShape(const b3Vector3& planeNormal, float pl
aabb.m_signedMaxIndices[3] = 0;
m_data->m_localShapeAABBCPU->push_back(aabb);
m_data->m_localShapeAABBGPU->push_back(aabb);
// m_data->m_localShapeAABBGPU->push_back(aabb);
clFinish(m_queue);
}
@@ -290,6 +299,7 @@ int b3GpuNarrowPhase::registerPlaneShape(const b3Vector3& planeNormal, float pl
int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Collidable& col)
{
m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1);
m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
@@ -317,7 +327,10 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Colli
int faceOffset = m_data->m_convexFaces.size();
convex.m_faceOffset = faceOffset;
convex.m_numFaces = convexPtr->m_faces.size();
m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces);
for (i=0;i<convexPtr->m_faces.size();i++)
{
m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[0] = convexPtr->m_faces[i].m_plane[0];
@@ -338,6 +351,7 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Colli
convex.m_numVertices = convexPtr->m_vertices.size();
int vertexOffset = m_data->m_convexVertices.size();
convex.m_vertexOffset =vertexOffset;
m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices);
for (int i=0;i<convexPtr->m_vertices.size();i++)
{
@@ -346,12 +360,6 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Colli
(*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr;
m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces);
m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra);
m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges);
m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices);
m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices);
return m_data->m_numAcceleratedShapes++;
@@ -361,7 +369,7 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Colli
int b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
{
b3AlignedObjectArray<b3Vector3> verts;
unsigned char* vts = (unsigned char*) vertices;
for (int i=0;i<numVertices;i++)
{
@@ -421,7 +429,7 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
aabb.m_signedMaxIndices[3] = 0;
m_data->m_localShapeAABBCPU->push_back(aabb);
m_data->m_localShapeAABBGPU->push_back(aabb);
// m_data->m_localShapeAABBGPU->push_back(aabb);
}
return collidableIndex;
@@ -442,8 +450,6 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShap
{
m_data->m_cpuChildShapes.push_back(childShapes->at(i));
}
//if writing the data directly is too slow, we can delay it and do it all at once in
m_data->m_gpuChildShapes->copyFromHost(m_data->m_cpuChildShapes);
}
@@ -492,7 +498,7 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShap
aabbWS.m_signedMaxIndices[3] = 0;
m_data->m_localShapeAABBCPU->push_back(aabbWS);
m_data->m_localShapeAABBGPU->push_back(aabbWS);
// m_data->m_localShapeAABBGPU->push_back(aabbWS);
clFinish(m_queue);
return collidableIndex;
@@ -534,7 +540,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
aabb.m_signedMaxIndices[3]= 0;
m_data->m_localShapeAABBCPU->push_back(aabb);
m_data->m_localShapeAABBGPU->push_back(aabb);
// m_data->m_localShapeAABBGPU->push_back(aabb);
b3OptimizedBvh* bvh = new b3OptimizedBvh();
//void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax)
@@ -554,7 +560,6 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
m_data->m_bvhData.push_back(bvh);
int numNodes = bvh->getQuantizedNodeArray().size();
//b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue,numNodes);
//treeNodesGPU->copyFromHost(bvh->getQuantizedNodeArray());
int numSubTrees = bvh->getSubtreeInfoArray().size();
b3BvhInfo bvhInfo;
@@ -568,7 +573,6 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size();
m_data->m_bvhInfoCPU.push_back(bvhInfo);
m_data->m_bvhInfoGPU->copyFromHost(m_data->m_bvhInfoCPU);
int numNewSubtrees = bvh->getSubtreeInfoArray().size();
@@ -584,11 +588,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vert
m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]);
}
//b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU = new b3OpenCLArray<b3BvhSubtreeInfo>(this->m_context,this->m_queue,numSubTrees);
//subTreesGPU->copyFromHost(bvh->getSubtreeInfoArray());
m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU);
m_data->m_subTreesGPU->copyFromHost(m_data->m_subTreesCPU);
return collidableIndex;
@@ -661,12 +661,6 @@ int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>*
(*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = 0;
m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces);
m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra);
m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges);
m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices);
m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices);
return m_data->m_numAcceleratedShapes++;
}
@@ -907,6 +901,24 @@ int b3GpuNarrowPhase::getNumRigidBodies() const
void b3GpuNarrowPhase::writeAllBodiesToGpu()
{
if (m_data->m_localShapeAABBCPU->size())
{
m_data->m_localShapeAABBGPU->copyFromHost(*m_data->m_localShapeAABBCPU);
}
m_data->m_gpuChildShapes->copyFromHost(m_data->m_cpuChildShapes);
m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces);
m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra);
m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges);
m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices);
m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices);
m_data->m_bvhInfoGPU->copyFromHost(m_data->m_bvhInfoCPU);
m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU);
m_data->m_subTreesGPU->copyFromHost(m_data->m_subTreesCPU);
m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);
m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);

View File

@@ -37,7 +37,7 @@ bool dumpContactStats = false;
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt)
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
{
m_data = new b3GpuRigidBodyPipelineInternalData;
m_data->m_context = ctx;
@@ -45,7 +45,7 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
m_data->m_queue = q;
m_data->m_solver = new b3PgsJacobiSolver(true);
b3Config config;
m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
@@ -282,10 +282,9 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
} else
#endif //TEST_OTHER_GPU_SOLVER
{
b3Config config;
int static0Index = m_data->m_narrowphase->getStatic0Index();
m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),config, static0Index);
m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),m_data->m_config, static0Index);
//m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());

View File

@@ -2,6 +2,7 @@
#define B3_GPU_RIGIDBODY_PIPELINE_H
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
#include "b3Config.h"
class b3GpuRigidBodyPipeline
{
@@ -13,7 +14,7 @@ protected:
public:
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt);
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config);
virtual ~b3GpuRigidBodyPipeline();
void stepSimulation(float deltaTime);

View File

@@ -9,6 +9,7 @@
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h"
#include "b3Config.h"
@@ -39,6 +40,7 @@ struct b3GpuRigidBodyPipelineInternalData
class b3GpuNarrowPhase* m_narrowphase;
b3Vector3 m_gravity;
b3Config m_config;
};
#endif //B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H