diff --git a/build/stringify.bat b/build/stringify.bat index 5c45bd8a5..ba3526ee3 100644 --- a/build/stringify.bat +++ b/build/stringify.bat @@ -13,6 +13,8 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kerne premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/sat.cl" --headerfile="../opencl/gpu_sat/kernels/satKernels.h" --stringname="satKernelsCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_sat/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/bvhTraversal.cl" --headerfile="../opencl/gpu_sat/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify + premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/integrateKernel.cl" --headerfile="../opencl/gpu_rigidbody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl" --headerfile="../opencl/gpu_rigidbody/kernels/updateAabbsKernel.h" --stringname="updateAabbsKernelCL" stringify diff --git a/demo/gpudemo/GpuDemo.h b/demo/gpudemo/GpuDemo.h index 081632724..4c07e3d07 100644 --- a/demo/gpudemo/GpuDemo.h +++ b/demo/gpudemo/GpuDemo.h @@ -16,36 +16,36 @@ public: typedef class GpuDemo* (CreateFunc)(); struct ConstructionInfo - { - bool useOpenCL; - int preferredOpenCLPlatformIndex; - int preferredOpenCLDeviceIndex; - int arraySizeX; - int arraySizeY; - int arraySizeZ; - bool m_useConcaveMesh; - float gapX; - float gapY; - float gapZ; - GLInstancingRenderer* m_instancingRenderer; - class btgWindowInterface* m_window; + { + bool useOpenCL; + int preferredOpenCLPlatformIndex; + int preferredOpenCLDeviceIndex; + int arraySizeX; + int arraySizeY; + int arraySizeZ; + bool m_useConcaveMesh; + float gapX; + float gapY; + float gapZ; + GLInstancingRenderer* m_instancingRenderer; + class btgWindowInterface* m_window; - ConstructionInfo() - :useOpenCL(true), - preferredOpenCLPlatformIndex(-1), - preferredOpenCLDeviceIndex(-1), - arraySizeX(23), - arraySizeY(23 ), - arraySizeZ(23), - m_useConcaveMesh(false), - gapX(4.3), - gapY(2.0), - gapZ(4.3), - m_instancingRenderer(0), - m_window(0) - { - } - }; + ConstructionInfo() + :useOpenCL(true), + preferredOpenCLPlatformIndex(-1), + preferredOpenCLDeviceIndex(-1), + arraySizeX(5), + arraySizeY(5 ), + arraySizeZ(5), + m_useConcaveMesh(false), + gapX(4.3), + gapY(2.0), + gapZ(4.3), + m_instancingRenderer(0), + m_window(0) + { + } + }; GpuDemo(); virtual ~GpuDemo(); diff --git a/demo/gpudemo/ParticleDemo.cpp b/demo/gpudemo/ParticleDemo.cpp index 0f2cad18d..a603dcfd9 100644 --- a/demo/gpudemo/ParticleDemo.cpp +++ b/demo/gpudemo/ParticleDemo.cpp @@ -380,7 +380,7 @@ void ParticleDemo::clientMoveAndDisplay() { btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_clPositionBuffer), - btBufferInfoCL( m_data->m_broadphaseGPU->getAabbBuffer()), + btBufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()), }; btLauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel ); diff --git a/demo/gpudemo/broadphase/PairBench.cpp b/demo/gpudemo/broadphase/PairBench.cpp index 50b4a9fb3..abaf4fc7c 100644 --- a/demo/gpudemo/broadphase/PairBench.cpp +++ b/demo/gpudemo/broadphase/PairBench.cpp @@ -301,7 +301,7 @@ void PairBench::clientMoveAndDisplay() btLauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); - launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBuffer()); + launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); launcher.launch1D( numObjects); clFinish(m_clData->m_clQueue); diff --git a/demo/gpudemo/rigidbody/ConcaveScene.cpp b/demo/gpudemo/rigidbody/ConcaveScene.cpp index 29fd244d1..3c99d6b3d 100644 --- a/demo/gpudemo/rigidbody/ConcaveScene.cpp +++ b/demo/gpudemo/rigidbody/ConcaveScene.cpp @@ -27,9 +27,9 @@ struct GraphicsVertex }; struct GraphicsShape { - const float* m_vertices; + btAlignedObjectArray* m_vertices; int m_numvertices; - const int* m_indices; + btAlignedObjectArray* m_indices; int m_numIndices; float m_scaling[4]; }; @@ -153,9 +153,9 @@ GraphicsShape* createGraphicsShapeFromWavefrontObj(objLoader* obj) GraphicsShape* gfxShape = new GraphicsShape; - gfxShape->m_vertices = &vertices->at(0).xyzw[0]; + gfxShape->m_vertices = vertices; gfxShape->m_numvertices = vertices->size(); - gfxShape->m_indices = &indicesPtr->at(0); + gfxShape->m_indices = indicesPtr; gfxShape->m_numIndices = indicesPtr->size(); for (int i=0;i<4;i++) gfxShape->m_scaling[i] = 1;//bake the scaling into the vertices @@ -167,8 +167,8 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) { objLoader* objData = new objLoader(); //char* fileName = "data/plane.obj"; - //char* fileName = "data/teddy.obj";//"plane.obj"; - char* fileName = "data/sponza_closed.obj";//"plane.obj"; + char* fileName = "data/teddy.obj";//"plane.obj"; + //char* fileName = "data/sponza_closed.obj";//"plane.obj"; FILE* f = 0; @@ -196,29 +196,46 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) return; objData->load(relativeFileName); - - GraphicsShape* shape = createGraphicsShapeFromWavefrontObj(objData); + int index=10; + { - int strideInBytes = 9*sizeof(float); - int numVertices = sizeof(cube_vertices)/strideInBytes; - int numIndices = sizeof(cube_vertices)/sizeof(int); - //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); - //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); - - - int shapeId = ci.m_instancingRenderer->registerShape(shape->m_vertices, shape->m_numvertices, shape->m_indices, shape->m_numIndices); - btQuaternion orn(0,0,0,1); - - btVector4 color(0,1,0,1.f);//0.5); + GraphicsShape* shape = createGraphicsShapeFromWavefrontObj(objData); btVector4 scaling(1,1,1,1); - + + btAlignedObjectArray verts; + for (int i=0;im_numvertices;i++) { - btVector3 position(0,0,0); - int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + btVector3 vtx = (btVector3&)shape->m_vertices->at(i).xyzw; + verts.push_back(vtx); } + + int colIndex = m_data->m_np->registerConcaveMesh(&verts,shape->m_indices,scaling); + + { + int strideInBytes = 9*sizeof(float); + int numVertices = sizeof(cube_vertices)/strideInBytes; + int numIndices = sizeof(cube_vertices)/sizeof(int); + //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + + + int shapeId = ci.m_instancingRenderer->registerShape(&shape->m_vertices->at(0).xyzw[0], shape->m_numvertices, &shape->m_indices->at(0), shape->m_numIndices); + btQuaternion orn(0,0,0,1); + + btVector4 color(0,1,0,1.f);//0.5); + + + { + float mass = 0.f; + btVector3 position(0,0,0); + int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index); + index++; + } + } } int strideInBytes = 9*sizeof(float); @@ -228,37 +245,39 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); int group=1; int mask=1; - int index=10; - float scaling[4] = {1,1,1,1}; + + - int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + - if (0) + if (1) { - for (int i=0;i<1;i++) - { - for (int j=0;jm_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); - int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index); + btVector4 color(0,1,0,1); - index++; + int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index); + + index++; + } } } } - } float camPos[4]={0,0,0,0};//65.5,4.5,65.5,0}; //float camPos[4]={1,12.5,1.5,0}; m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(10); + m_instancingRenderer->setCameraDistance(50); } \ No newline at end of file diff --git a/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp b/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp index f3e926798..900445597 100644 --- a/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp +++ b/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp @@ -472,7 +472,7 @@ void btGpuSapBroadphase::createProxy(const btVector3& aabbMin, const btVector3& m_allAabbsCPU.push_back(aabb); } -cl_mem btGpuSapBroadphase::getAabbBuffer() +cl_mem btGpuSapBroadphase::getAabbBufferWS() { return m_allAabbsGPU.getBufferCL(); } diff --git a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h b/opencl/gpu_broadphase/host/btGpuSapBroadphase.h index c119b8ce7..c1ccdcd6e 100644 --- a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h +++ b/opencl/gpu_broadphase/host/btGpuSapBroadphase.h @@ -53,7 +53,7 @@ class btGpuSapBroadphase //call writeAabbsToGpu after done making all changes (createProxy etc) void writeAabbsToGpu(); - cl_mem getAabbBuffer(); + cl_mem getAabbBufferWS(); int getNumOverlap(); cl_mem getOverlappingPairBuffer(); }; diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp index e9595bc0c..beec30c51 100644 --- a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp +++ b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp @@ -7,6 +7,8 @@ #include "../../gpu_broadphase/host/btSapAabb.h" #include #include "btConfig.h" +#include "../../gpu_sat/host/btOptimizedBvh.h" +#include "../../gpu_sat/host/btTriangleIndexVertexArray.h" struct btGpuNarrowPhaseInternalData { @@ -60,6 +62,7 @@ struct btGpuNarrowPhaseInternalData btOpenCLArray* m_localShapeAABBGPU; btAlignedObjectArray* m_localShapeAABBCPU; + btAlignedObjectArray m_bvhData; btConfig m_config; }; @@ -323,8 +326,137 @@ int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr) } +int btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray* vertices, btAlignedObjectArray* indices,const float* scaling1) +{ + btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]); + + int collidableIndex = allocateCollidable(); + btCollidable& col = getCollidableCpu(collidableIndex); + + col.m_shapeType = SHAPE_CONCAVE_TRIMESH; + col.m_shapeIndex = registerConcaveMeshShape(vertices,indices,col,scaling); + + + + btSapAabb aabb; + btVector3 myAabbMin(1e30f,1e30f,1e30f); + btVector3 myAabbMax(-1e30f,-1e30f,-1e30f); + + for (int i=0;isize();i++) + { + btVector3 vtx(vertices->at(i)*scaling); + myAabbMin.setMin(vtx); + myAabbMax.setMax(vtx); + } + aabb.m_min[0] = myAabbMin[0]; + aabb.m_min[1] = myAabbMin[1]; + aabb.m_min[2] = myAabbMin[2]; + aabb.m_minIndices[3] = 0; + + aabb.m_max[0] = myAabbMax[0]; + aabb.m_max[1]= myAabbMax[1]; + aabb.m_max[2]= myAabbMax[2]; + aabb.m_signedMaxIndices[3]= 0; + + m_data->m_localShapeAABBCPU->push_back(aabb); + m_data->m_localShapeAABBGPU->push_back(aabb); + + btOptimizedBvh* bvh = new btOptimizedBvh(); + //void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax) + + bool useQuantizedAabbCompression = true; + btTriangleIndexVertexArray* meshInterface=new btTriangleIndexVertexArray(); + btIndexedMesh mesh; + mesh.m_numTriangles = indices->size()/3; + mesh.m_numVertices = vertices->size(); + mesh.m_vertexBase = (const unsigned char *)&vertices->at(0).getX(); + mesh.m_vertexStride = sizeof(btVector3); + mesh.m_triangleIndexStride = 3 * sizeof(int);// or sizeof(int) + mesh.m_triangleIndexBase = (const unsigned char *)&indices->at(0); + + meshInterface->addIndexedMesh(mesh); + bvh->build(meshInterface, useQuantizedAabbCompression, (btVector3&)aabb.m_min, (btVector3&)aabb.m_max); + m_data->m_bvhData.push_back(bvh); + + return collidableIndex; +} + +int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray* vertices, btAlignedObjectArray* indices,btCollidable& col, const float* scaling1) +{ + btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]); + + m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1); + m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1); + + + btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1); + convex.mC = btVector3(0,0,0); + convex.mE = btVector3(0,0,0); + convex.m_extents= btVector3(0,0,0); + convex.m_localCenter = btVector3(0,0,0); + convex.m_radius = 0.f; + + convex.m_numUniqueEdges = 0; + int edgeOffset = m_data->m_uniqueEdges.size(); + convex.m_uniqueEdgesOffset = edgeOffset; + + int faceOffset = m_data->m_convexFaces.size(); + convex.m_faceOffset = faceOffset; + + convex.m_numFaces = indices->size()/3; + m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces); + m_data->m_convexIndices.reserve(convex.m_numFaces*3); + for (int i=0;iat(indices->at(i*3))*scaling); + btVector3 vert1(vertices->at(indices->at(i*3+1))*scaling); + btVector3 vert2(vertices->at(indices->at(i*3+2))*scaling); + + btVector3 normal = ((vert1-vert0).cross(vert2-vert0)).normalize(); + btScalar c = -(normal.dot(vert0)); + + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[0] = normal.x(); + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[1] = normal.y(); + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[2] = normal.z(); + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[3] = c; + int indexOffset = m_data->m_convexIndices.size(); + int numIndices = 3; + m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; + m_data->m_convexFaces[convex.m_faceOffset+i].m_indexOffset = indexOffset; + m_data->m_convexIndices.resize(indexOffset+numIndices); + for (int p=0;pat(i*3+p); + m_data->m_convexIndices[indexOffset+p] = vi;//convexPtr->m_faces[i].m_indices[p]; + } + } + + convex.m_numVertices = vertices->size(); + int vertexOffset = m_data->m_convexVertices.size(); + convex.m_vertexOffset =vertexOffset; + m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices); + for (int i=0;isize();i++) + { + m_data->m_convexVertices[vertexOffset+i] = vertices->at(i)*scaling; + } + + (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = 0; + + m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces); + + m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra); + m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges); + m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices); + m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices); + + return m_data->m_numAcceleratedShapes++; +} @@ -388,7 +520,7 @@ cl_mem btGpuNarrowPhase::getContactsGpu() } -void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbs, int numObjects) +void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects) { int nContactOut = 0; @@ -399,7 +531,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase btOpenCLArray broadphasePairsGPU(m_context,m_queue); broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs); btOpenCLArray clAabbArray(this->m_context,this->m_queue); - clAabbArray.setFromOpenCLBuffer(aabbs,numObjects); + clAabbArray.setFromOpenCLBuffer(aabbsWS,numObjects); m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT( &broadphasePairsGPU, numBroadphasePairs, @@ -419,7 +551,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase *m_data->m_worldNormalsAGPU, *m_data->m_worldVertsA1GPU, *m_data->m_worldVertsB2GPU, - + m_data->m_bvhData, numObjects, maxTriConvexPairCapacity, triangleConvexPairs, diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h index ca4381a6e..50d030c57 100644 --- a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h +++ b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h @@ -19,6 +19,8 @@ protected: cl_device_id m_device; cl_command_queue m_queue; + int registerConvexHullShape(class btConvexUtility* convexPtr, btCollidable& col); + int registerConcaveMeshShape(btAlignedObjectArray* vertices, btAlignedObjectArray* indices, btCollidable& col, const float* scaling); public: @@ -32,15 +34,14 @@ public: int registerCompoundShape(btAlignedObjectArray* childShapes); int registerFace(const btVector3& faceNormal, float faceConstant); - int registerConcaveMeshShape(btAlignedObjectArray* vertices, btAlignedObjectArray* indices, btCollidable& col, const float* scaling); - int registerConcaveMeshShape(class objLoader* obj, btCollidable& col, const float* scaling); + + int registerConcaveMesh(btAlignedObjectArray* vertices, btAlignedObjectArray* indices,const float* scaling); //do they need to be merged? - int registerConvexHullShape(class btConvexUtility* convexPtr, btCollidable& col); + int registerConvexHullShape(btConvexUtility* utilPtr); int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - int registerConvexHeightfield(class ConvexHeightField* convexShape,btCollidable& col); int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu); void setObjectTransform(const float* position, const float* orientation , int bodyIndex); diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp index 704942d23..f73079a9b 100644 --- a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp +++ b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp @@ -78,10 +78,10 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime) if (numPairs) { cl_mem pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer(); - cl_mem aabbs = m_data->m_broadphaseSap->getAabbBuffer(); + cl_mem aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS(); - m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbs,numBodies); + m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies); numContacts = m_data->m_narrowphase->getNumContactsGpu(); //if (numContacts) // printf("numContacts = %d\n", numContacts); @@ -169,7 +169,7 @@ void btGpuRigidBodyPipeline::setupGpuAabbsFull() launcher.setBuffer(collidables); cl_mem localAabbs = m_data->m_narrowphase->getAabbBufferGpu(); launcher.setBuffer(localAabbs); - cl_mem worldAabbs = m_data->m_broadphaseSap->getAabbBuffer(); + cl_mem worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); launcher.setBuffer(worldAabbs); launcher.launch1D(numBodies); oclCHECKERROR(ciErrNum, CL_SUCCESS); diff --git a/opencl/gpu_sat/host/ConvexHullContact.cpp b/opencl/gpu_sat/host/ConvexHullContact.cpp index 0987e3486..d527a9c36 100644 --- a/opencl/gpu_sat/host/ConvexHullContact.cpp +++ b/opencl/gpu_sat/host/ConvexHullContact.cpp @@ -23,6 +23,7 @@ subject to the following restrictions: #include "ConvexHullContact.h" #include //memcpy #include "btConvexPolyhedronCL.h" +#include "btOptimizedBvh.h" typedef btAlignedObjectArray btVertexArray; #include "BulletCommon/btQuickprof.h" @@ -34,8 +35,11 @@ typedef btAlignedObjectArray btVertexArray; #include "../kernels/satKernels.h" #include "../kernels/satClipHullContacts.h" +#include "../kernels/bvhTraversal.h" + #include "BulletGeometry/btAabbUtil2.h" + #define dot3F4 btDot GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) @@ -92,11 +96,8 @@ m_totalContactsOut(m_context, m_queue) m_newContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "newContactReductionKernel",&errNum,satClipContactsProg); btAssert(errNum==CL_SUCCESS); - - - - - } else + } + else { m_clipHullHullKernel=0; m_clipCompoundsHullHullKernel = 0; @@ -106,12 +107,25 @@ m_totalContactsOut(m_context, m_queue) m_clipHullHullConcaveConvexKernel = 0; m_extractManifoldAndAddContactKernel = 0; } + + if (1) + { + const char* srcBvh = bvhTraversalKernelCL; + cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_sat/kernels/bvhTraversal.cl"); + btAssert(errNum==CL_SUCCESS); + + m_bvhTraversalKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg); + btAssert(errNum==CL_SUCCESS); + + } + } GpuSatCollision::~GpuSatCollision() { + if (m_findSeparatingAxisKernel) clReleaseKernel(m_findSeparatingAxisKernel); @@ -138,9 +152,23 @@ GpuSatCollision::~GpuSatCollision() clReleaseKernel(m_clipHullHullConcaveConvexKernel); if (m_extractManifoldAndAddContactKernel) clReleaseKernel(m_extractManifoldAndAddContactKernel); + + if (m_bvhTraversalKernel) + clReleaseKernel(m_bvhTraversalKernel); + } +struct MyTriangleCallback : public btNodeOverlapCallback +{ + int m_bodyIndexA; + int m_bodyIndexB; + virtual void processNode(int subPart, int triangleIndex) + { + printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); + printf("triangleIndex %d\n", triangleIndex); + } +}; void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray* pairs, int nPairs, const btOpenCLArray* bodyBuf, @@ -154,12 +182,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray& gpuCollidables, const btOpenCLArray& gpuChildShapes, - const btOpenCLArray& clAabbs, + const btOpenCLArray& clAabbsWS, btOpenCLArray& worldVertsB1GPU, btOpenCLArray& clippingFacesOutGPU, btOpenCLArray& worldNormalsAGPU, btOpenCLArray& worldVertsA1GPU, - btOpenCLArray& worldVertsB2GPU, + btOpenCLArray& worldVertsB2GPU, + btAlignedObjectArray& bvhData, int numObjects, int maxTriConvexPairCapacity, btOpenCLArray& triangleConvexPairsOut, @@ -219,7 +248,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray maxTriConvexPairCapacity) - numConcave = maxTriConvexPairCapacity; - triangleConvexPairsOut.resize(numConcave); + if (numConcave) + { + if (numConcave > maxTriConvexPairCapacity) + numConcave = maxTriConvexPairCapacity; + + triangleConvexPairsOut.resize(numConcave); + btAlignedObjectArray triangleConvexPairsOutCPU; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutCPU); + printf("-----------------------\n", numConcave); + printf("got %d concave pairs\n", numConcave); + btAssert(numConcave = triangleConvexPairsOutCPU.size()); + + for (int i=0;i collidablesCPU; + gpuCollidables.copyToHost(collidablesCPU); + btAlignedObjectArray bodiesCPU; + bodyBuf->copyToHost(bodiesCPU); + btAlignedObjectArray pairsCPU; + + btAlignedObjectArray aabbsWSCPU; + clAabbsWS.copyToHost(aabbsWSCPU); + + pairs->copyToHost(pairsCPU); + MyTriangleCallback triCallback; + + + for (int i=0;ireportAabbOverlappingNodex(&triCallback, aabbMin,aabbMax); + } + } + + //now perform the tree query on GPU + + int numNodes = bvhData[0]->getLeafNodeArray().size(); + btOpenCLArray treeNodesGPU(this->m_context,this->m_queue,numNodes); + treeNodesGPU.copyFromHost(bvhData[0]->getQuantizedNodeArray()); + int numSubTrees = bvhData[0]->getSubtreeInfoArray().size(); + btOpenCLArray subTreesGPU(this->m_context,this->m_queue,numSubTrees); + subTreesGPU.copyFromHost(bvhData[0]->getSubtreeInfoArray()); + + + /* + __kernel void bvhTraversalKernel( __global const int2* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global btAabbCL* aabbs, + __global int4* concavePairsOut, + __global volatile int* numConcavePairsOut, + int numPairs, + int maxNumConcavePairsCapacity + ) + + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( clAabbsWS.getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( triangleConvexPairsOut.getBufferCL()), + btBufferInfoCL( concaveSepNormals.getBufferCL()), + btBufferInfoCL( numConcavePairsOut.getBufferCL()) + + */ + + { + int np = numConcavePairsOut.at(0); + printf("np=%d\n", np); + btLauncherCL launcher(m_queue, m_bvhTraversalKernel); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWS.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( numConcavePairsOut.getBufferCL()); + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + np = numConcavePairsOut.at(0); + printf("np=%d\n", np); + + } + printf("-----------------------\n", numConcave); + } @@ -256,7 +395,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray m_totalContactsOut; @@ -73,6 +75,7 @@ struct GpuSatCollision btOpenCLArray& worldNormalsAGPU, btOpenCLArray& worldVertsA1GPU, btOpenCLArray& worldVertsB2GPU, + btAlignedObjectArray& bvhData, int numObjects, int maxTriConvexPairCapacity, btOpenCLArray& triangleConvexPairs, diff --git a/opencl/gpu_sat/host/btOptimizedBvh.cpp b/opencl/gpu_sat/host/btOptimizedBvh.cpp new file mode 100644 index 000000000..a8dff1691 --- /dev/null +++ b/opencl/gpu_sat/host/btOptimizedBvh.cpp @@ -0,0 +1,391 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "btOptimizedBvh.h" +#include "btStridingMeshInterface.h" +#include "BulletGeometry/btAabbUtil2.h" +#include "BulletCommon/btIDebugDraw.h" + + +btOptimizedBvh::btOptimizedBvh() +{ +} + +btOptimizedBvh::~btOptimizedBvh() +{ +} + + +void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax) +{ + m_useQuantization = useQuantizedAabbCompression; + + + // NodeArray triangleNodes; + + struct NodeTriangleCallback : public btInternalTriangleIndexCallback + { + + NodeArray& m_triangleNodes; + + NodeTriangleCallback& operator=(NodeTriangleCallback& other) + { + m_triangleNodes.copyFromArray(other.m_triangleNodes); + return *this; + } + + NodeTriangleCallback(NodeArray& triangleNodes) + :m_triangleNodes(triangleNodes) + { + } + + virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex) + { + btOptimizedBvhNode node; + btVector3 aabbMin,aabbMax; + aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + aabbMin.setMin(triangle[0]); + aabbMax.setMax(triangle[0]); + aabbMin.setMin(triangle[1]); + aabbMax.setMax(triangle[1]); + aabbMin.setMin(triangle[2]); + aabbMax.setMax(triangle[2]); + + //with quantization? + node.m_aabbMinOrg = aabbMin; + node.m_aabbMaxOrg = aabbMax; + + node.m_escapeIndex = -1; + + //for child nodes + node.m_subPart = partId; + node.m_triangleIndex = triangleIndex; + m_triangleNodes.push_back(node); + } + }; + struct QuantizedNodeTriangleCallback : public btInternalTriangleIndexCallback + { + QuantizedNodeArray& m_triangleNodes; + const btQuantizedBvh* m_optimizedTree; // for quantization + + QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other) + { + m_triangleNodes.copyFromArray(other.m_triangleNodes); + m_optimizedTree = other.m_optimizedTree; + return *this; + } + + QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes,const btQuantizedBvh* tree) + :m_triangleNodes(triangleNodes),m_optimizedTree(tree) + { + } + + virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex) + { + // The partId and triangle index must fit in the same (positive) integer + btAssert(partId < (1<=0); + + btQuantizedBvhNode node; + btVector3 aabbMin,aabbMax; + aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + aabbMin.setMin(triangle[0]); + aabbMax.setMax(triangle[0]); + aabbMin.setMin(triangle[1]); + aabbMax.setMax(triangle[1]); + aabbMin.setMin(triangle[2]); + aabbMax.setMax(triangle[2]); + + //PCK: add these checks for zero dimensions of aabb + const btScalar MIN_AABB_DIMENSION = btScalar(0.002); + const btScalar MIN_AABB_HALF_DIMENSION = btScalar(0.001); + if (aabbMax.x() - aabbMin.x() < MIN_AABB_DIMENSION) + { + aabbMax.setX(aabbMax.x() + MIN_AABB_HALF_DIMENSION); + aabbMin.setX(aabbMin.x() - MIN_AABB_HALF_DIMENSION); + } + if (aabbMax.y() - aabbMin.y() < MIN_AABB_DIMENSION) + { + aabbMax.setY(aabbMax.y() + MIN_AABB_HALF_DIMENSION); + aabbMin.setY(aabbMin.y() - MIN_AABB_HALF_DIMENSION); + } + if (aabbMax.z() - aabbMin.z() < MIN_AABB_DIMENSION) + { + aabbMax.setZ(aabbMax.z() + MIN_AABB_HALF_DIMENSION); + aabbMin.setZ(aabbMin.z() - MIN_AABB_HALF_DIMENSION); + } + + m_optimizedTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0); + m_optimizedTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1); + + node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex; + + m_triangleNodes.push_back(node); + } + }; + + + + int numLeafNodes = 0; + + + if (m_useQuantization) + { + + //initialize quantization values + setQuantizationValues(bvhAabbMin,bvhAabbMax); + + QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes,this); + + + triangles->InternalProcessAllTriangles(&callback,m_bvhAabbMin,m_bvhAabbMax); + + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_quantizedLeafNodes.size(); + + + m_quantizedContiguousNodes.resize(2*numLeafNodes); + + + } else + { + NodeTriangleCallback callback(m_leafNodes); + + btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + + triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax); + + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_leafNodes.size(); + + m_contiguousNodes.resize(2*numLeafNodes); + } + + m_curNodeIndex = 0; + + buildTree(0,numLeafNodes); + + ///if the entire tree is small then subtree size, we need to create a header info for the tree + if(m_useQuantization && !m_SubtreeHeaders.size()) + { + btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); + subtree.m_rootNodeIndex = 0; + subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + + //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary + m_quantizedLeafNodes.clear(); + m_leafNodes.clear(); +} + + + + +void btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax) +{ + if (m_useQuantization) + { + + setQuantizationValues(aabbMin,aabbMax); + + updateBvhNodes(meshInterface,0,m_curNodeIndex,0); + + ///now update all subtree headers + + int i; + for (i=0;i m_bvhAabbMin.getX()); + btAssert(aabbMin.getY() > m_bvhAabbMin.getY()); + btAssert(aabbMin.getZ() > m_bvhAabbMin.getZ()); + + btAssert(aabbMax.getX() < m_bvhAabbMax.getX()); + btAssert(aabbMax.getY() < m_bvhAabbMax.getY()); + btAssert(aabbMax.getZ() < m_bvhAabbMax.getZ()); + + ///we should update all quantization values, using updateBvhNodes(meshInterface); + ///but we only update chunks that overlap the given aabb + + unsigned short quantizedQueryAabbMin[3]; + unsigned short quantizedQueryAabbMax[3]; + + quantize(&quantizedQueryAabbMin[0],aabbMin,0); + quantize(&quantizedQueryAabbMax[0],aabbMax,1); + + int i; + for (i=0;im_SubtreeHeaders.size();i++) + { + btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; + + //PCK: unsigned instead of bool + unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap != 0) + { + updateBvhNodes(meshInterface,subtree.m_rootNodeIndex,subtree.m_rootNodeIndex+subtree.m_subtreeSize,i); + + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); + } + } + +} + +void btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index) +{ + (void)index; + + btAssert(m_useQuantization); + + int curNodeSubPart=-1; + + //get access info to trianglemesh data + const unsigned char *vertexbase = 0; + int numverts = 0; + PHY_ScalarType type = PHY_INTEGER; + int stride = 0; + const unsigned char *indexbase = 0; + int indexstride = 0; + int numfaces = 0; + PHY_ScalarType indicestype = PHY_INTEGER; + + btVector3 triangleVerts[3]; + btVector3 aabbMin,aabbMax; + const btVector3& meshScaling = meshInterface->getScaling(); + + int i; + for (i=endNode-1;i>=firstNode;i--) + { + + + btQuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; + if (curNode.isLeafNode()) + { + //recalc aabb from triangle data + int nodeSubPart = curNode.getPartId(); + int nodeTriangleIndex = curNode.getTriangleIndex(); + if (nodeSubPart != curNodeSubPart) + { + if (curNodeSubPart >= 0) + meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); + meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts, type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart); + + curNodeSubPart = nodeSubPart; + btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT); + } + //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, + + unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride); + + + for (int j=2;j>=0;j--) + { + + int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j]; + if (type == PHY_FLOAT) + { + float* graphicsbase = (float*)(vertexbase+graphicsindex*stride); + triangleVerts[j] = btVector3( + graphicsbase[0]*meshScaling.getX(), + graphicsbase[1]*meshScaling.getY(), + graphicsbase[2]*meshScaling.getZ()); + } + else + { + double* graphicsbase = (double*)(vertexbase+graphicsindex*stride); + triangleVerts[j] = btVector3( btScalar(graphicsbase[0]*meshScaling.getX()), btScalar(graphicsbase[1]*meshScaling.getY()), btScalar(graphicsbase[2]*meshScaling.getZ())); + } + } + + + + aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + aabbMin.setMin(triangleVerts[0]); + aabbMax.setMax(triangleVerts[0]); + aabbMin.setMin(triangleVerts[1]); + aabbMax.setMax(triangleVerts[1]); + aabbMin.setMin(triangleVerts[2]); + aabbMax.setMax(triangleVerts[2]); + + quantize(&curNode.m_quantizedAabbMin[0],aabbMin,0); + quantize(&curNode.m_quantizedAabbMax[0],aabbMax,1); + + } else + { + //combine aabb from both children + + btQuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1]; + + btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] : + &m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()]; + + + { + for (int i=0;i<3;i++) + { + curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; + if (curNode.m_quantizedAabbMin[i]>rightChildNode->m_quantizedAabbMin[i]) + curNode.m_quantizedAabbMin[i]=rightChildNode->m_quantizedAabbMin[i]; + + curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; + if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) + curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; + } + } + } + + } + + if (curNodeSubPart >= 0) + meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); + + +} + +///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' +btOptimizedBvh* btOptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +{ + btQuantizedBvh* bvh = btQuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian); + + //we don't add additional data so just do a static upcast + return static_cast(bvh); +} diff --git a/opencl/gpu_sat/host/btOptimizedBvh.h b/opencl/gpu_sat/host/btOptimizedBvh.h new file mode 100644 index 000000000..e6692b456 --- /dev/null +++ b/opencl/gpu_sat/host/btOptimizedBvh.h @@ -0,0 +1,65 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///Contains contributions from Disney Studio's + +#ifndef BT_OPTIMIZED_BVH_H +#define BT_OPTIMIZED_BVH_H + +#include "btQuantizedBvh.h" + +class btStridingMeshInterface; + + +///The btOptimizedBvh extends the btQuantizedBvh to create AABB tree for triangle meshes, through the btStridingMeshInterface. +ATTRIBUTE_ALIGNED16(class) btOptimizedBvh : public btQuantizedBvh +{ + +public: + BT_DECLARE_ALIGNED_ALLOCATOR(); + +protected: + +public: + + btOptimizedBvh(); + + virtual ~btOptimizedBvh(); + + void build(btStridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax); + + void refit(btStridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax); + + void refitPartial(btStridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax); + + void updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index); + + /// Data buffer MUST be 16 byte aligned + virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const + { + return btQuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian); + + } + + ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' + static btOptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); + + +}; + + +#endif //BT_OPTIMIZED_BVH_H + + diff --git a/opencl/gpu_sat/host/btQuantizedBvh.cpp b/opencl/gpu_sat/host/btQuantizedBvh.cpp new file mode 100644 index 000000000..3c10d5bcf --- /dev/null +++ b/opencl/gpu_sat/host/btQuantizedBvh.cpp @@ -0,0 +1,1302 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btQuantizedBvh.h" + +#include "BulletGeometry/btAabbUtil2.h" +#include "BulletCommon/btIDebugDraw.h" + + +#define RAYAABB2 + +btQuantizedBvh::btQuantizedBvh() : + m_bulletVersion(BT_BULLET_VERSION), + m_useQuantization(false), + m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) + //m_traversalMode(TRAVERSAL_STACKLESS) + //m_traversalMode(TRAVERSAL_RECURSIVE) + ,m_subtreeHeaderCount(0) //PCK: add this line +{ + m_bvhAabbMin.setValue(-SIMD_INFINITY,-SIMD_INFINITY,-SIMD_INFINITY); + m_bvhAabbMax.setValue(SIMD_INFINITY,SIMD_INFINITY,SIMD_INFINITY); +} + + + + + +void btQuantizedBvh::buildInternal() +{ + ///assumes that caller filled in the m_quantizedLeafNodes + m_useQuantization = true; + int numLeafNodes = 0; + + if (m_useQuantization) + { + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_quantizedLeafNodes.size(); + + m_quantizedContiguousNodes.resize(2*numLeafNodes); + + } + + m_curNodeIndex = 0; + + buildTree(0,numLeafNodes); + + ///if the entire tree is small then subtree size, we need to create a header info for the tree + if(m_useQuantization && !m_SubtreeHeaders.size()) + { + btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); + subtree.m_rootNodeIndex = 0; + subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + + //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary + m_quantizedLeafNodes.clear(); + m_leafNodes.clear(); +} + + + +///just for debugging, to visualize the individual patches/subtrees +#ifdef DEBUG_PATCH_COLORS +btVector3 color[4]= +{ + btVector3(1,0,0), + btVector3(0,1,0), + btVector3(0,0,1), + btVector3(0,1,1) +}; +#endif //DEBUG_PATCH_COLORS + + + +void btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin) +{ + //enlarge the AABB to avoid division by zero when initializing the quantization values + btVector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin); + m_bvhAabbMin = bvhAabbMin - clampValue; + m_bvhAabbMax = bvhAabbMax + clampValue; + btVector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; + m_bvhQuantization = btVector3(btScalar(65533.0),btScalar(65533.0),btScalar(65533.0)) / aabbSize; + m_useQuantization = true; +} + + + + +btQuantizedBvh::~btQuantizedBvh() +{ +} + +#ifdef DEBUG_TREE_BUILDING +int gStackDepth = 0; +int gMaxStackDepth = 0; +#endif //DEBUG_TREE_BUILDING + +void btQuantizedBvh::buildTree (int startIndex,int endIndex) +{ +#ifdef DEBUG_TREE_BUILDING + gStackDepth++; + if (gStackDepth > gMaxStackDepth) + gMaxStackDepth = gStackDepth; +#endif //DEBUG_TREE_BUILDING + + + int splitAxis, splitIndex, i; + int numIndices =endIndex-startIndex; + int curIndex = m_curNodeIndex; + + btAssert(numIndices>0); + + if (numIndices==1) + { +#ifdef DEBUG_TREE_BUILDING + gStackDepth--; +#endif //DEBUG_TREE_BUILDING + + assignInternalNodeFromLeafNode(m_curNodeIndex,startIndex); + + m_curNodeIndex++; + return; + } + //calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. + + splitAxis = calcSplittingAxis(startIndex,endIndex); + + splitIndex = sortAndCalcSplittingIndex(startIndex,endIndex,splitAxis); + + int internalNodeIndex = m_curNodeIndex; + + //set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value. + //the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values + setInternalNodeAabbMin(m_curNodeIndex,m_bvhAabbMax);//can't use btVector3(SIMD_INFINITY,SIMD_INFINITY,SIMD_INFINITY)) because of quantization + setInternalNodeAabbMax(m_curNodeIndex,m_bvhAabbMin);//can't use btVector3(-SIMD_INFINITY,-SIMD_INFINITY,-SIMD_INFINITY)) because of quantization + + + for (i=startIndex;im_escapeIndex; + + int leftChildNodexIndex = m_curNodeIndex; + + //build left child tree + buildTree(startIndex,splitIndex); + + int rightChildNodexIndex = m_curNodeIndex; + //build right child tree + buildTree(splitIndex,endIndex); + +#ifdef DEBUG_TREE_BUILDING + gStackDepth--; +#endif //DEBUG_TREE_BUILDING + + int escapeIndex = m_curNodeIndex - curIndex; + + if (m_useQuantization) + { + //escapeIndex is the number of nodes of this subtree + const int sizeQuantizedNode =sizeof(btQuantizedBvhNode); + const int treeSizeInBytes = escapeIndex * sizeQuantizedNode; + if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES) + { + updateSubtreeHeaders(leftChildNodexIndex,rightChildNodexIndex); + } + } else + { + + } + + setInternalNodeEscapeIndex(internalNodeIndex,escapeIndex); + +} + +void btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex) +{ + btAssert(m_useQuantization); + + btQuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex]; + int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); + int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast(sizeof(btQuantizedBvhNode)); + + btQuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex]; + int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); + int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast(sizeof(btQuantizedBvhNode)); + + if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) + { + btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(leftChildNode); + subtree.m_rootNodeIndex = leftChildNodexIndex; + subtree.m_subtreeSize = leftSubTreeSize; + } + + if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) + { + btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(rightChildNode); + subtree.m_rootNodeIndex = rightChildNodexIndex; + subtree.m_subtreeSize = rightSubTreeSize; + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); +} + + +int btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis) +{ + int i; + int splitIndex =startIndex; + int numIndices = endIndex - startIndex; + btScalar splitValue; + + btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.)); + for (i=startIndex;i splitValue) + { + //swap + swapLeafNodes(i,splitIndex); + splitIndex++; + } + } + + //if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex + //otherwise the tree-building might fail due to stack-overflows in certain cases. + //unbalanced1 is unsafe: it can cause stack overflows + //bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); + + //unbalanced2 should work too: always use center (perfect balanced trees) + //bool unbalanced2 = true; + + //this should be safe too: + int rangeBalancedIndices = numIndices/3; + bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices))); + + if (unbalanced) + { + splitIndex = startIndex+ (numIndices>>1); + } + + bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex)); + (void)unbal; + btAssert(!unbal); + + return splitIndex; +} + + +int btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex) +{ + int i; + + btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.)); + btVector3 variance(btScalar(0.),btScalar(0.),btScalar(0.)); + int numIndices = endIndex-startIndex; + + for (i=startIndex;im_aabbMinOrg,rootNode->m_aabbMaxOrg); + isLeafNode = rootNode->m_escapeIndex == -1; + + //PCK: unsigned instead of bool + if (isLeafNode && (aabbOverlap != 0)) + { + nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); + } + + //PCK: unsigned instead of bool + if ((aabbOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->m_escapeIndex; + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (maxIterations < walkIterations) + maxIterations = walkIterations; + +} + +/* +///this was the original recursive traversal, before we optimized towards stackless traversal +void btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const +{ + bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax); + if (aabbOverlap) + { + isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild); + if (isLeafNode) + { + nodeCallback->processNode(rootNode); + } else + { + walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax); + walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax); + } + } + +} +*/ + +void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +{ + btAssert(m_useQuantization); + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap; + + //PCK: unsigned instead of bool + aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,currentNode->m_quantizedAabbMin,currentNode->m_quantizedAabbMax); + isLeafNode = currentNode->isLeafNode(); + + //PCK: unsigned instead of bool + if (aabbOverlap != 0) + { + if (isLeafNode) + { + nodeCallback->processNode(currentNode->getPartId(),currentNode->getTriangleIndex()); + } else + { + //process left and right children + const btQuantizedBvhNode* leftChildNode = currentNode+1; + walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + + const btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex(); + walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + } + } +} + + + +void btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const +{ + btAssert(!m_useQuantization); + + const btOptimizedBvhNode* rootNode = &m_contiguousNodes[0]; + int escapeIndex, curIndex = 0; + int walkIterations = 0; + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap=0; + unsigned rayBoxOverlap=0; + btScalar lambda_max = 1.0; + + /* Quick pruning by quantized box */ + btVector3 rayAabbMin = raySource; + btVector3 rayAabbMax = raySource; + rayAabbMin.setMin(rayTarget); + rayAabbMax.setMax(rayTarget); + + /* Add box cast extents to bounding box */ + rayAabbMin += aabbMin; + rayAabbMax += aabbMax; + +#ifdef RAYAABB2 + btVector3 rayDir = (rayTarget-raySource); + rayDir.normalize (); + lambda_max = rayDir.dot(rayTarget-raySource); + ///what about division by zero? --> just set rayDirection[i] to 1.0 + btVector3 rayDirectionInverse; + rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[0]; + rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[1]; + rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[2]; + unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; +#endif + + btVector3 bounds[2]; + + while (curIndex < m_curNodeIndex) + { + btScalar param = 1.0; + //catch bugs in tree data + btAssert (walkIterations < m_curNodeIndex); + + walkIterations++; + + bounds[0] = rootNode->m_aabbMinOrg; + bounds[1] = rootNode->m_aabbMaxOrg; + /* Add box cast extents */ + bounds[0] -= aabbMax; + bounds[1] -= aabbMin; + + aabbOverlap = TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); + //perhaps profile if it is worth doing the aabbOverlap test first + +#ifdef RAYAABB2 + ///careful with this check: need to check division by zero (above) and fix the unQuantize method + ///thanks Joerg/hiker for the reproduction case! + ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 + rayBoxOverlap = aabbOverlap ? btRayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; + +#else + btVector3 normal; + rayBoxOverlap = btRayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal); +#endif + + isLeafNode = rootNode->m_escapeIndex == -1; + + //PCK: unsigned instead of bool + if (isLeafNode && (rayBoxOverlap != 0)) + { + nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); + } + + //PCK: unsigned instead of bool + if ((rayBoxOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->m_escapeIndex; + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (maxIterations < walkIterations) + maxIterations = walkIterations; + +} + + + +void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const +{ + btAssert(m_useQuantization); + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + (void)subTreeSize; + + const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; + int escapeIndex; + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned boxBoxOverlap = 0; + unsigned rayBoxOverlap = 0; + + btScalar lambda_max = 1.0; + +#ifdef RAYAABB2 + btVector3 rayDirection = (rayTarget-raySource); + rayDirection.normalize (); + lambda_max = rayDirection.dot(rayTarget-raySource); + ///what about division by zero? --> just set rayDirection[i] to 1.0 + rayDirection[0] = rayDirection[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[0]; + rayDirection[1] = rayDirection[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[1]; + rayDirection[2] = rayDirection[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[2]; + unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; +#endif + + /* Quick pruning by quantized box */ + btVector3 rayAabbMin = raySource; + btVector3 rayAabbMax = raySource; + rayAabbMin.setMin(rayTarget); + rayAabbMax.setMax(rayTarget); + + /* Add box cast extents to bounding box */ + rayAabbMin += aabbMin; + rayAabbMax += aabbMax; + + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + quantizeWithClamp(quantizedQueryAabbMin,rayAabbMin,0); + quantizeWithClamp(quantizedQueryAabbMax,rayAabbMax,1); + + while (curIndex < endNodeIndex) + { + +//#define VISUALLY_ANALYZE_BVH 1 +#ifdef VISUALLY_ANALYZE_BVH + //some code snippet to debugDraw aabb, to visually analyze bvh structure + static int drawPatch = 0; + //need some global access to a debugDrawer + extern btIDebugDraw* debugDrawerPtr; + if (curIndex==drawPatch) + { + btVector3 aabbMin,aabbMax; + aabbMin = unQuantize(rootNode->m_quantizedAabbMin); + aabbMax = unQuantize(rootNode->m_quantizedAabbMax); + btVector3 color(1,0,0); + debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); + } +#endif//VISUALLY_ANALYZE_BVH + + //catch bugs in tree data + btAssert (walkIterations < subTreeSize); + + walkIterations++; + //PCK: unsigned instead of bool + // only interested if this is closer than any previous hit + btScalar param = 1.0; + rayBoxOverlap = 0; + boxBoxOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + if (boxBoxOverlap) + { + btVector3 bounds[2]; + bounds[0] = unQuantize(rootNode->m_quantizedAabbMin); + bounds[1] = unQuantize(rootNode->m_quantizedAabbMax); + /* Add box cast extents */ + bounds[0] -= aabbMax; + bounds[1] -= aabbMin; + btVector3 normal; +#if 0 + bool ra2 = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); + bool ra = btRayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal); + if (ra2 != ra) + { + printf("functions don't match\n"); + } +#endif +#ifdef RAYAABB2 + ///careful with this check: need to check division by zero (above) and fix the unQuantize method + ///thanks Joerg/hiker for the reproduction case! + ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 + + //BT_PROFILE("btRayAabb2"); + rayBoxOverlap = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); + +#else + rayBoxOverlap = true;//btRayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); +#endif + } + + if (isLeafNode && rayBoxOverlap) + { + nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); + } + + //PCK: unsigned instead of bool + if ((rayBoxOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (maxIterations < walkIterations) + maxIterations = walkIterations; + +} + +void btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const +{ + btAssert(m_useQuantization); + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + (void)subTreeSize; + + const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; + int escapeIndex; + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap; + + while (curIndex < endNodeIndex) + { + +//#define VISUALLY_ANALYZE_BVH 1 +#ifdef VISUALLY_ANALYZE_BVH + //some code snippet to debugDraw aabb, to visually analyze bvh structure + static int drawPatch = 0; + //need some global access to a debugDrawer + extern btIDebugDraw* debugDrawerPtr; + if (curIndex==drawPatch) + { + btVector3 aabbMin,aabbMax; + aabbMin = unQuantize(rootNode->m_quantizedAabbMin); + aabbMax = unQuantize(rootNode->m_quantizedAabbMax); + btVector3 color(1,0,0); + debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); + } +#endif//VISUALLY_ANALYZE_BVH + + //catch bugs in tree data + btAssert (walkIterations < subTreeSize); + + walkIterations++; + //PCK: unsigned instead of bool + aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + + if (isLeafNode && aabbOverlap) + { + nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); + } + + //PCK: unsigned instead of bool + if ((aabbOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (maxIterations < walkIterations) + maxIterations = walkIterations; + +} + +//This traversal can be called from Playstation 3 SPU +void btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +{ + btAssert(m_useQuantization); + + int i; + + + for (i=0;im_SubtreeHeaders.size();i++) + { + const btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; + + //PCK: unsigned instead of bool + unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap != 0) + { + walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, + subtree.m_rootNodeIndex, + subtree.m_rootNodeIndex+subtree.m_subtreeSize); + } + } +} + + +void btQuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const +{ + reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,btVector3(0,0,0),btVector3(0,0,0)); +} + + +void btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const +{ + //always use stackless + + if (m_useQuantization) + { + walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); + } + else + { + walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); + } + /* + { + //recursive traversal + btVector3 qaabbMin = raySource; + btVector3 qaabbMax = raySource; + qaabbMin.setMin(rayTarget); + qaabbMax.setMax(rayTarget); + qaabbMin += aabbMin; + qaabbMax += aabbMax; + reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax); + } + */ + +} + + +void btQuantizedBvh::swapLeafNodes(int i,int splitIndex) +{ + if (m_useQuantization) + { + btQuantizedBvhNode tmp = m_quantizedLeafNodes[i]; + m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; + m_quantizedLeafNodes[splitIndex] = tmp; + } else + { + btOptimizedBvhNode tmp = m_leafNodes[i]; + m_leafNodes[i] = m_leafNodes[splitIndex]; + m_leafNodes[splitIndex] = tmp; + } +} + +void btQuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex) +{ + if (m_useQuantization) + { + m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; + } else + { + m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex]; + } +} + +//PCK: include +#include + +#if 0 +//PCK: consts +static const unsigned BVH_ALIGNMENT = 16; +static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1; + +static const unsigned BVH_ALIGNMENT_BLOCKS = 2; +#endif + + +unsigned int btQuantizedBvh::getAlignmentSerializationPadding() +{ + // I changed this to 0 since the extra padding is not needed or used. + return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; +} + +unsigned btQuantizedBvh::calculateSerializeBufferSize() const +{ + unsigned baseSize = sizeof(btQuantizedBvh) + getAlignmentSerializationPadding(); + baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount; + if (m_useQuantization) + { + return baseSize + m_curNodeIndex * sizeof(btQuantizedBvhNode); + } + return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode); +} + +bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const +{ + btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size()); + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + +/* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) + { + ///check alignedment for buffer? + btAssert(0); + return false; + } +*/ + + btQuantizedBvh *targetBvh = (btQuantizedBvh *)o_alignedDataBuffer; + + // construct the class so the virtual function table, etc will be set up + // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor + new (targetBvh) btQuantizedBvh; + + if (i_swapEndian) + { + targetBvh->m_curNodeIndex = static_cast(btSwapEndian(m_curNodeIndex)); + + + btSwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin); + btSwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax); + btSwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization); + + targetBvh->m_traversalMode = (btTraversalMode)btSwapEndian(m_traversalMode); + targetBvh->m_subtreeHeaderCount = static_cast(btSwapEndian(m_subtreeHeaderCount)); + } + else + { + targetBvh->m_curNodeIndex = m_curNodeIndex; + targetBvh->m_bvhAabbMin = m_bvhAabbMin; + targetBvh->m_bvhAabbMax = m_bvhAabbMax; + targetBvh->m_bvhQuantization = m_bvhQuantization; + targetBvh->m_traversalMode = m_traversalMode; + targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount; + } + + targetBvh->m_useQuantization = m_useQuantization; + + unsigned char *nodeData = (unsigned char *)targetBvh; + nodeData += sizeof(btQuantizedBvh); + + unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + int nodeCount = m_curNodeIndex; + + if (m_useQuantization) + { + targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast(btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); + } + } + else + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]; + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; + + + } + } + nodeData += sizeof(btQuantizedBvhNode) * nodeCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0); + } + else + { + targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); + btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); + + targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast(btSwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex)); + targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast(btSwapEndian(m_contiguousNodes[nodeIndex].m_subPart)); + targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast(btSwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex)); + } + } + else + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg; + targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg; + + targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex; + targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart; + targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex; + } + } + nodeData += sizeof(btOptimizedBvhNode) * nodeCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0); + } + + sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + // Now serialize the subtree headers + targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount); + if (i_swapEndian) + { + for (int i = 0; i < m_subtreeHeaderCount; i++) + { + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast(btSwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex)); + targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast(btSwapEndian(m_SubtreeHeaders[i].m_subtreeSize)); + } + } + else + { + for (int i = 0; i < m_subtreeHeaderCount; i++) + { + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex); + targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize); + + // need to clear padding in destination buffer + targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0; + targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0; + targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0; + } + } + nodeData += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0); + + // this wipes the virtual function table pointer at the start of the buffer for the class + *((void**)o_alignedDataBuffer) = NULL; + + return true; +} + +btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +{ + + if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) + { + return NULL; + } + btQuantizedBvh *bvh = (btQuantizedBvh *)i_alignedDataBuffer; + + if (i_swapEndian) + { + bvh->m_curNodeIndex = static_cast(btSwapEndian(bvh->m_curNodeIndex)); + + btUnSwapVector3Endian(bvh->m_bvhAabbMin); + btUnSwapVector3Endian(bvh->m_bvhAabbMax); + btUnSwapVector3Endian(bvh->m_bvhQuantization); + + bvh->m_traversalMode = (btTraversalMode)btSwapEndian(bvh->m_traversalMode); + bvh->m_subtreeHeaderCount = static_cast(btSwapEndian(bvh->m_subtreeHeaderCount)); + } + + unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize(); + btAssert(calculatedBufSize <= i_dataBufferSize); + + if (calculatedBufSize > i_dataBufferSize) + { + return NULL; + } + + unsigned char *nodeData = (unsigned char *)bvh; + nodeData += sizeof(btQuantizedBvh); + + unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + int nodeCount = bvh->m_curNodeIndex; + + // Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor + // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor + new (bvh) btQuantizedBvh(*bvh, false); + + if (bvh->m_useQuantization) + { + bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); + + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); + + bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast(btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); + } + } + nodeData += sizeof(btQuantizedBvhNode) * nodeCount; + } + else + { + bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); + btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); + + bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex)); + bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart)); + bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); + } + } + nodeData += sizeof(btOptimizedBvhNode) * nodeCount; + } + + sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + // Now serialize the subtree headers + bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount); + if (i_swapEndian) + { + for (int i = 0; i < bvh->m_subtreeHeaderCount; i++) + { + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast(btSwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex)); + bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast(btSwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize)); + } + } + + return bvh; +} + +// Constructor that prevents btVector3's default constructor from being called +btQuantizedBvh::btQuantizedBvh(btQuantizedBvh &self, bool /* ownsMemory */) : +m_bvhAabbMin(self.m_bvhAabbMin), +m_bvhAabbMax(self.m_bvhAabbMax), +m_bvhQuantization(self.m_bvhQuantization), +m_bulletVersion(BT_BULLET_VERSION) +{ + +} + +void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData) +{ + m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax); + m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin); + m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization); + + m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; + m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0; + + { + int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes; + m_contiguousNodes.resize(numElem); + + if (numElem) + { + btOptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; + + for (int i=0;im_aabbMaxOrg); + m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); + m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; + m_contiguousNodes[i].m_subPart = memPtr->m_subPart; + m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; + } + } + } + + { + int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes; + m_quantizedContiguousNodes.resize(numElem); + + if (numElem) + { + btQuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; + for (int i=0;im_escapeIndexOrTriangleIndex; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + } + } + } + + m_traversalMode = btTraversalMode(quantizedBvhFloatData.m_traversalMode); + + { + int numElem = quantizedBvhFloatData.m_numSubtreeHeaders; + m_SubtreeHeaders.resize(numElem); + if (numElem) + { + btBvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; + for (int i=0;im_quantizedAabbMax[0] ; + m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; + m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; + } + } + } +} + +void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData) +{ + m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax); + m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin); + m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization); + + m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; + m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0; + + { + int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes; + m_contiguousNodes.resize(numElem); + + if (numElem) + { + btOptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; + + for (int i=0;im_aabbMaxOrg); + m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); + m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; + m_contiguousNodes[i].m_subPart = memPtr->m_subPart; + m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; + } + } + } + + { + int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes; + m_quantizedContiguousNodes.resize(numElem); + + if (numElem) + { + btQuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; + for (int i=0;im_escapeIndexOrTriangleIndex; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + } + } + } + + m_traversalMode = btTraversalMode(quantizedBvhDoubleData.m_traversalMode); + + { + int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders; + m_SubtreeHeaders.resize(numElem); + if (numElem) + { + btBvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; + for (int i=0;im_quantizedAabbMax[0] ; + m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; + m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; + } + } + } + +} + + + +///fills the dataBuffer and returns the struct name (and 0 on failure) +const char* btQuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const +{ + btAssert(0); + return 0; +} + + + + + diff --git a/opencl/gpu_sat/host/btQuantizedBvh.h b/opencl/gpu_sat/host/btQuantizedBvh.h new file mode 100644 index 000000000..45f55e5a3 --- /dev/null +++ b/opencl/gpu_sat/host/btQuantizedBvh.h @@ -0,0 +1,581 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_QUANTIZED_BVH_H +#define BT_QUANTIZED_BVH_H + +class btSerializer; + +//#define DEBUG_CHECK_DEQUANTIZATION 1 +#ifdef DEBUG_CHECK_DEQUANTIZATION +#ifdef __SPU__ +#define printf spu_printf +#endif //__SPU__ + +#include +#include +#endif //DEBUG_CHECK_DEQUANTIZATION + +#include "BulletCommon/btVector3.h" +#include "BulletCommon/btAlignedAllocator.h" + +#ifdef BT_USE_DOUBLE_PRECISION +#define btQuantizedBvhData btQuantizedBvhDoubleData +#define btOptimizedBvhNodeData btOptimizedBvhNodeDoubleData +#define btQuantizedBvhDataName "btQuantizedBvhDoubleData" +#else +#define btQuantizedBvhData btQuantizedBvhFloatData +#define btOptimizedBvhNodeData btOptimizedBvhNodeFloatData +#define btQuantizedBvhDataName "btQuantizedBvhFloatData" +#endif + + + +//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp + + +//Note: currently we have 16 bytes per quantized node +#define MAX_SUBTREE_SIZE_IN_BYTES 2048 + +// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one +// actually) triangles each (since the sign bit is reserved +#define MAX_NUM_PARTS_IN_BITS 10 + +///btQuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +ATTRIBUTE_ALIGNED16 (struct) btQuantizedBvhNode +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; + + bool isLeafNode() const + { + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (m_escapeIndexOrTriangleIndex >= 0); + } + int getEscapeIndex() const + { + btAssert(!isLeafNode()); + return -m_escapeIndexOrTriangleIndex; + } + int getTriangleIndex() const + { + btAssert(isLeafNode()); + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (m_escapeIndexOrTriangleIndex&~(y)); + } + int getPartId() const + { + btAssert(isLeafNode()); + // Get only the highest bits where the part index is stored + return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); + } +} +; + +/// btOptimizedBvhNode contains both internal and leaf node information. +/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. +ATTRIBUTE_ALIGNED16 (struct) btOptimizedBvhNode +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + //32 bytes + btVector3 m_aabbMinOrg; + btVector3 m_aabbMaxOrg; + + //4 + int m_escapeIndex; + + //8 + //for child nodes + int m_subPart; + int m_triangleIndex; + +//pad the size to 64 bytes + char m_padding[20]; +}; + + +///btBvhSubtreeInfo provides info to gather a subtree of limited size +ATTRIBUTE_ALIGNED16(class) btBvhSubtreeInfo +{ +public: + BT_DECLARE_ALIGNED_ALLOCATOR(); + + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; + + btBvhSubtreeInfo() + { + //memset(&m_padding[0], 0, sizeof(m_padding)); + } + + + void setAabbFromQuantizeNode(const btQuantizedBvhNode& quantizedNode) + { + m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0]; + m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; + m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2]; + m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0]; + m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1]; + m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2]; + } +} +; + + +class btNodeOverlapCallback +{ +public: + virtual ~btNodeOverlapCallback() {}; + + virtual void processNode(int subPart, int triangleIndex) = 0; +}; + +#include "BulletCommon/btAlignedAllocator.h" +#include "BulletCommon/btAlignedObjectArray.h" + + + +///for code readability: +typedef btAlignedObjectArray NodeArray; +typedef btAlignedObjectArray QuantizedNodeArray; +typedef btAlignedObjectArray BvhSubtreeInfoArray; + + +///The btQuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU. +///It is used by the btBvhTriangleMeshShape as midphase, and by the btMultiSapBroadphase. +///It is recommended to use quantization for better performance and lower memory requirements. +ATTRIBUTE_ALIGNED16(class) btQuantizedBvh +{ +public: + enum btTraversalMode + { + TRAVERSAL_STACKLESS = 0, + TRAVERSAL_STACKLESS_CACHE_FRIENDLY, + TRAVERSAL_RECURSIVE + }; + +protected: + + + btVector3 m_bvhAabbMin; + btVector3 m_bvhAabbMax; + btVector3 m_bvhQuantization; + + int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. + + int m_curNodeIndex; + //quantization data + bool m_useQuantization; + + + + NodeArray m_leafNodes; + NodeArray m_contiguousNodes; + QuantizedNodeArray m_quantizedLeafNodes; + QuantizedNodeArray m_quantizedContiguousNodes; + + btTraversalMode m_traversalMode; + BvhSubtreeInfoArray m_SubtreeHeaders; + + //This is only used for serialization so we don't have to add serialization directly to btAlignedObjectArray + mutable int m_subtreeHeaderCount; + + + + + + ///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!) + ///this might be refactored into a virtual, it is usually not calculated at run-time + void setInternalNodeAabbMin(int nodeIndex, const btVector3& aabbMin) + { + if (m_useQuantization) + { + quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] ,aabbMin,0); + } else + { + m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; + + } + } + void setInternalNodeAabbMax(int nodeIndex,const btVector3& aabbMax) + { + if (m_useQuantization) + { + quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0],aabbMax,1); + } else + { + m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax; + } + } + + btVector3 getAabbMin(int nodeIndex) const + { + if (m_useQuantization) + { + return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]); + } + //non-quantized + return m_leafNodes[nodeIndex].m_aabbMinOrg; + + } + btVector3 getAabbMax(int nodeIndex) const + { + if (m_useQuantization) + { + return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); + } + //non-quantized + return m_leafNodes[nodeIndex].m_aabbMaxOrg; + + } + + + void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) + { + if (m_useQuantization) + { + m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; + } + else + { + m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex; + } + + } + + void mergeInternalNodeAabb(int nodeIndex,const btVector3& newAabbMin,const btVector3& newAabbMax) + { + if (m_useQuantization) + { + unsigned short int quantizedAabbMin[3]; + unsigned short int quantizedAabbMax[3]; + quantize(quantizedAabbMin,newAabbMin,0); + quantize(quantizedAabbMax,newAabbMax,1); + for (int i=0;i<3;i++) + { + if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i]) + m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i]; + + if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i]) + m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; + + } + } else + { + //non-quantized + m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); + m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax); + } + } + + void swapLeafNodes(int firstIndex,int secondIndex); + + void assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex); + +protected: + + + + void buildTree (int startIndex,int endIndex); + + int calcSplittingAxis(int startIndex,int endIndex); + + int sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis); + + void walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const; + + void walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const; + void walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const; + void walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const; + + ///tree traversal designed for small-memory processors like PS3 SPU + void walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; + + ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal + void walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; + + ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal + void walkRecursiveQuantizedTreeAgainstQuantizedTree(const btQuantizedBvhNode* treeNodeA,const btQuantizedBvhNode* treeNodeB,btNodeOverlapCallback* nodeCallback) const; + + + + + void updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex); + +public: + + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btQuantizedBvh(); + + virtual ~btQuantizedBvh(); + + + ///***************************************** expert/internal use only ************************* + void setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin=btScalar(1.0)); + QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } + ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized + void buildInternal(); + ///***************************************** expert/internal use only ************************* + + void reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const; + void reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const; + void reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const; + + SIMD_FORCE_INLINE void quantize(unsigned short* out, const btVector3& point,int isMax) const + { + + btAssert(m_useQuantization); + + btAssert(point.getX() <= m_bvhAabbMax.getX()); + btAssert(point.getY() <= m_bvhAabbMax.getY()); + btAssert(point.getZ() <= m_bvhAabbMax.getZ()); + + btAssert(point.getX() >= m_bvhAabbMin.getX()); + btAssert(point.getY() >= m_bvhAabbMin.getY()); + btAssert(point.getZ() >= m_bvhAabbMin.getZ()); + + btVector3 v = (point - m_bvhAabbMin) * m_bvhQuantization; + ///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative + ///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly) + ///@todo: double-check this + if (isMax) + { + out[0] = (unsigned short) (((unsigned short)(v.getX()+btScalar(1.)) | 1)); + out[1] = (unsigned short) (((unsigned short)(v.getY()+btScalar(1.)) | 1)); + out[2] = (unsigned short) (((unsigned short)(v.getZ()+btScalar(1.)) | 1)); + } else + { + out[0] = (unsigned short) (((unsigned short)(v.getX()) & 0xfffe)); + out[1] = (unsigned short) (((unsigned short)(v.getY()) & 0xfffe)); + out[2] = (unsigned short) (((unsigned short)(v.getZ()) & 0xfffe)); + } + + +#ifdef DEBUG_CHECK_DEQUANTIZATION + btVector3 newPoint = unQuantize(out); + if (isMax) + { + if (newPoint.getX() < point.getX()) + { + printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); + } + if (newPoint.getY() < point.getY()) + { + printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); + } + if (newPoint.getZ() < point.getZ()) + { + + printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); + } + } else + { + if (newPoint.getX() > point.getX()) + { + printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); + } + if (newPoint.getY() > point.getY()) + { + printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); + } + if (newPoint.getZ() > point.getZ()) + { + printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); + } + } +#endif //DEBUG_CHECK_DEQUANTIZATION + + } + + + SIMD_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const btVector3& point2,int isMax) const + { + + btAssert(m_useQuantization); + + btVector3 clampedPoint(point2); + clampedPoint.setMax(m_bvhAabbMin); + clampedPoint.setMin(m_bvhAabbMax); + + quantize(out,clampedPoint,isMax); + + } + + SIMD_FORCE_INLINE btVector3 unQuantize(const unsigned short* vecIn) const + { + btVector3 vecOut; + vecOut.setValue( + (btScalar)(vecIn[0]) / (m_bvhQuantization.getX()), + (btScalar)(vecIn[1]) / (m_bvhQuantization.getY()), + (btScalar)(vecIn[2]) / (m_bvhQuantization.getZ())); + vecOut += m_bvhAabbMin; + return vecOut; + } + + ///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. + void setTraversalMode(btTraversalMode traversalMode) + { + m_traversalMode = traversalMode; + } + + + SIMD_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() + { + return m_quantizedContiguousNodes; + } + + + SIMD_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray() + { + return m_SubtreeHeaders; + } + +//////////////////////////////////////////////////////////////////// + + /////Calculate space needed to store BVH for serialization + unsigned calculateSerializeBufferSize() const; + + /// Data buffer MUST be 16 byte aligned + virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; + + ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' + static btQuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); + + static unsigned int getAlignmentSerializationPadding(); +////////////////////////////////////////////////////////////////////// + + + virtual int calculateSerializeBufferSizeNew() const; + + ///fills the dataBuffer and returns the struct name (and 0 on failure) + virtual const char* serialize(void* dataBuffer, btSerializer* serializer) const; + + virtual void deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData); + + virtual void deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData); + + +//////////////////////////////////////////////////////////////////// + + SIMD_FORCE_INLINE bool isQuantized() + { + return m_useQuantization; + } + +private: + // Special "copy" constructor that allows for in-place deserialization + // Prevents btVector3's default constructor from being called, but doesn't inialize much else + // ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) + btQuantizedBvh(btQuantizedBvh &other, bool ownsMemory); + +} +; + + +struct btBvhSubtreeInfoData +{ + int m_rootNodeIndex; + int m_subtreeSize; + unsigned short m_quantizedAabbMin[3]; + unsigned short m_quantizedAabbMax[3]; +}; + +struct btOptimizedBvhNodeFloatData +{ + btVector3FloatData m_aabbMinOrg; + btVector3FloatData m_aabbMaxOrg; + int m_escapeIndex; + int m_subPart; + int m_triangleIndex; + char m_pad[4]; +}; + +struct btOptimizedBvhNodeDoubleData +{ + btVector3DoubleData m_aabbMinOrg; + btVector3DoubleData m_aabbMaxOrg; + int m_escapeIndex; + int m_subPart; + int m_triangleIndex; + char m_pad[4]; +}; + + +struct btQuantizedBvhNodeData +{ + unsigned short m_quantizedAabbMin[3]; + unsigned short m_quantizedAabbMax[3]; + int m_escapeIndexOrTriangleIndex; +}; + +struct btQuantizedBvhFloatData +{ + btVector3FloatData m_bvhAabbMin; + btVector3FloatData m_bvhAabbMax; + btVector3FloatData m_bvhQuantization; + int m_curNodeIndex; + int m_useQuantization; + int m_numContiguousLeafNodes; + int m_numQuantizedContiguousNodes; + btOptimizedBvhNodeFloatData *m_contiguousNodesPtr; + btQuantizedBvhNodeData *m_quantizedContiguousNodesPtr; + btBvhSubtreeInfoData *m_subTreeInfoPtr; + int m_traversalMode; + int m_numSubtreeHeaders; + +}; + +struct btQuantizedBvhDoubleData +{ + btVector3DoubleData m_bvhAabbMin; + btVector3DoubleData m_bvhAabbMax; + btVector3DoubleData m_bvhQuantization; + int m_curNodeIndex; + int m_useQuantization; + int m_numContiguousLeafNodes; + int m_numQuantizedContiguousNodes; + btOptimizedBvhNodeDoubleData *m_contiguousNodesPtr; + btQuantizedBvhNodeData *m_quantizedContiguousNodesPtr; + + int m_traversalMode; + int m_numSubtreeHeaders; + btBvhSubtreeInfoData *m_subTreeInfoPtr; +}; + + +SIMD_FORCE_INLINE int btQuantizedBvh::calculateSerializeBufferSizeNew() const +{ + return sizeof(btQuantizedBvhData); +} + + + +#endif //BT_QUANTIZED_BVH_H diff --git a/opencl/gpu_sat/host/btStridingMeshInterface.cpp b/opencl/gpu_sat/host/btStridingMeshInterface.cpp new file mode 100644 index 000000000..298f6cbf7 --- /dev/null +++ b/opencl/gpu_sat/host/btStridingMeshInterface.cpp @@ -0,0 +1,214 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btStridingMeshInterface.h" + + +btStridingMeshInterface::~btStridingMeshInterface() +{ + +} + + +void btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const +{ + (void)aabbMin; + (void)aabbMax; + int numtotalphysicsverts = 0; + int part,graphicssubparts = getNumSubParts(); + const unsigned char * vertexbase; + const unsigned char * indexbase; + int indexstride; + PHY_ScalarType type; + PHY_ScalarType gfxindextype; + int stride,numverts,numtriangles; + int gfxindex; + btVector3 triangle[3]; + + btVector3 meshScaling = getScaling(); + + ///if the number of parts is big, the performance might drop due to the innerloop switch on indextype + for (part=0;partinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_SHORT: + { + for (gfxindex=0;gfxindexinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_UCHAR: + { + for (gfxindex=0;gfxindexinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + default: + btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); + } + break; + } + + case PHY_DOUBLE: + { + double* graphicsbase; + + switch (gfxindextype) + { + case PHY_INTEGER: + { + for (gfxindex=0;gfxindexinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_SHORT: + { + for (gfxindex=0;gfxindexinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_UCHAR: + { + for (gfxindex=0;gfxindexinternalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + default: + btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); + } + break; + } + default: + btAssert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); + } + + unLockReadOnlyVertexBase(part); + } +} + +void btStridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax) +{ + + struct AabbCalculationCallback : public btInternalTriangleIndexCallback + { + btVector3 m_aabbMin; + btVector3 m_aabbMax; + + AabbCalculationCallback() + { + m_aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + m_aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + } + + virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex) + { + (void)partId; + (void)triangleIndex; + + m_aabbMin.setMin(triangle[0]); + m_aabbMax.setMax(triangle[0]); + m_aabbMin.setMin(triangle[1]); + m_aabbMax.setMax(triangle[1]); + m_aabbMin.setMin(triangle[2]); + m_aabbMax.setMax(triangle[2]); + } + }; + + //first calculate the total aabb for all triangles + AabbCalculationCallback aabbCallback; + aabbMin.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); + aabbMax.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)); + InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax); + + aabbMin = aabbCallback.m_aabbMin; + aabbMax = aabbCallback.m_aabbMax; +} + + diff --git a/opencl/gpu_sat/host/btStridingMeshInterface.h b/opencl/gpu_sat/host/btStridingMeshInterface.h new file mode 100644 index 000000000..b457df484 --- /dev/null +++ b/opencl/gpu_sat/host/btStridingMeshInterface.h @@ -0,0 +1,167 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_STRIDING_MESHINTERFACE_H +#define BT_STRIDING_MESHINTERFACE_H + +#include "BulletCommon/btVector3.h" +#include "btTriangleCallback.h" +//#include "btConcaveShape.h" + + +enum PHY_ScalarType { + PHY_FLOAT, PHY_DOUBLE, PHY_INTEGER, PHY_SHORT, + PHY_FIXEDPOINT88, PHY_UCHAR +}; + + +/// The btStridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes. +/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips. +/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. +ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface +{ + protected: + + btVector3 m_scaling; + + public: + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btStridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.)) + { + + } + + virtual ~btStridingMeshInterface(); + + + + virtual void InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const; + + ///brute force method to calculate aabb + void calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax); + + /// get read and write access to a subpart of a triangle mesh + /// this subpart has a continuous array of vertices and indices + /// in this way the mesh can be handled as chunks of memory with striding + /// very similar to OpenGL vertexarray support + /// make a call to unLockVertexBase when the read and write access is finished + virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0)=0; + + virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const=0; + + /// unLockVertexBase finishes the access to a subpart of the triangle mesh + /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished + virtual void unLockVertexBase(int subpart)=0; + + virtual void unLockReadOnlyVertexBase(int subpart) const=0; + + + /// getNumSubParts returns the number of seperate subparts + /// each subpart has a continuous array of vertices and indices + virtual int getNumSubParts() const=0; + + virtual void preallocateVertices(int numverts)=0; + virtual void preallocateIndices(int numindices)=0; + + virtual bool hasPremadeAabb() const { return false; } + virtual void setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const + { + (void) aabbMin; + (void) aabbMax; + } + virtual void getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const + { + (void) aabbMin; + (void) aabbMax; + } + + const btVector3& getScaling() const { + return m_scaling; + } + void setScaling(const btVector3& scaling) + { + m_scaling = scaling; + } + + virtual int calculateSerializeBufferSize() const; + + ///fills the dataBuffer and returns the struct name (and 0 on failure) + //virtual const char* serialize(void* dataBuffer, btSerializer* serializer) const; + + +}; + +struct btIntIndexData +{ + int m_value; +}; + +struct btShortIntIndexData +{ + short m_value; + char m_pad[2]; +}; + +struct btShortIntIndexTripletData +{ + short m_values[3]; + char m_pad[2]; +}; + +struct btCharIndexTripletData +{ + unsigned char m_values[3]; + char m_pad; +}; + + +///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 +struct btMeshPartData +{ + btVector3FloatData *m_vertices3f; + btVector3DoubleData *m_vertices3d; + + btIntIndexData *m_indices32; + btShortIntIndexTripletData *m_3indices16; + btCharIndexTripletData *m_3indices8; + + btShortIntIndexData *m_indices16;//backwards compatibility + + int m_numTriangles;//length of m_indices = m_numTriangles + int m_numVertices; +}; + + +///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 +struct btStridingMeshInterfaceData +{ + btMeshPartData *m_meshPartsPtr; + btVector3FloatData m_scaling; + int m_numMeshParts; + char m_padding[4]; +}; + + + + +SIMD_FORCE_INLINE int btStridingMeshInterface::calculateSerializeBufferSize() const +{ + return sizeof(btStridingMeshInterfaceData); +} + + + +#endif //BT_STRIDING_MESHINTERFACE_H diff --git a/opencl/gpu_sat/host/btTriangleCallback.cpp b/opencl/gpu_sat/host/btTriangleCallback.cpp new file mode 100644 index 000000000..f558bf6d2 --- /dev/null +++ b/opencl/gpu_sat/host/btTriangleCallback.cpp @@ -0,0 +1,28 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btTriangleCallback.h" + +btTriangleCallback::~btTriangleCallback() +{ + +} + + +btInternalTriangleIndexCallback::~btInternalTriangleIndexCallback() +{ + +} + diff --git a/opencl/gpu_sat/host/btTriangleCallback.h b/opencl/gpu_sat/host/btTriangleCallback.h new file mode 100644 index 000000000..e9ce72ffb --- /dev/null +++ b/opencl/gpu_sat/host/btTriangleCallback.h @@ -0,0 +1,42 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_TRIANGLE_CALLBACK_H +#define BT_TRIANGLE_CALLBACK_H + +#include "BulletCommon/btVector3.h" + + +///The btTriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles. +///This callback is called by processAllTriangles for all btConcaveShape derived class, such as btBvhTriangleMeshShape, btStaticPlaneShape and btHeightfieldTerrainShape. +class btTriangleCallback +{ +public: + + virtual ~btTriangleCallback(); + virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex) = 0; +}; + +class btInternalTriangleIndexCallback +{ +public: + + virtual ~btInternalTriangleIndexCallback(); + virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex) = 0; +}; + + + +#endif //BT_TRIANGLE_CALLBACK_H diff --git a/opencl/gpu_sat/host/btTriangleIndexVertexArray.cpp b/opencl/gpu_sat/host/btTriangleIndexVertexArray.cpp new file mode 100644 index 000000000..a665024cb --- /dev/null +++ b/opencl/gpu_sat/host/btTriangleIndexVertexArray.cpp @@ -0,0 +1,95 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btTriangleIndexVertexArray.h" + +btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride) +: m_hasAabb(0) +{ + btIndexedMesh mesh; + + mesh.m_numTriangles = numTriangles; + mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase; + mesh.m_triangleIndexStride = triangleIndexStride; + mesh.m_numVertices = numVertices; + mesh.m_vertexBase = (const unsigned char *)vertexBase; + mesh.m_vertexStride = vertexStride; + + addIndexedMesh(mesh); + +} + +btTriangleIndexVertexArray::~btTriangleIndexVertexArray() +{ + +} + +void btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) +{ + btAssert(subpart< getNumSubParts() ); + + btIndexedMesh& mesh = m_indexedMeshes[subpart]; + + numverts = mesh.m_numVertices; + (*vertexbase) = (unsigned char *) mesh.m_vertexBase; + + type = mesh.m_vertexType; + + vertexStride = mesh.m_vertexStride; + + numfaces = mesh.m_numTriangles; + + (*indexbase) = (unsigned char *)mesh.m_triangleIndexBase; + indexstride = mesh.m_triangleIndexStride; + indicestype = mesh.m_indexType; +} + +void btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const +{ + const btIndexedMesh& mesh = m_indexedMeshes[subpart]; + + numverts = mesh.m_numVertices; + (*vertexbase) = (const unsigned char *)mesh.m_vertexBase; + + type = mesh.m_vertexType; + + vertexStride = mesh.m_vertexStride; + + numfaces = mesh.m_numTriangles; + (*indexbase) = (const unsigned char *)mesh.m_triangleIndexBase; + indexstride = mesh.m_triangleIndexStride; + indicestype = mesh.m_indexType; +} + +bool btTriangleIndexVertexArray::hasPremadeAabb() const +{ + return (m_hasAabb == 1); +} + + +void btTriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const +{ + m_aabbMin = aabbMin; + m_aabbMax = aabbMax; + m_hasAabb = 1; // this is intentionally an int see notes in header +} + +void btTriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const +{ + *aabbMin = m_aabbMin; + *aabbMax = m_aabbMax; +} + + diff --git a/opencl/gpu_sat/host/btTriangleIndexVertexArray.h b/opencl/gpu_sat/host/btTriangleIndexVertexArray.h new file mode 100644 index 000000000..bc6f05e26 --- /dev/null +++ b/opencl/gpu_sat/host/btTriangleIndexVertexArray.h @@ -0,0 +1,133 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_TRIANGLE_INDEX_VERTEX_ARRAY_H +#define BT_TRIANGLE_INDEX_VERTEX_ARRAY_H + +#include "btStridingMeshInterface.h" +#include "BulletCommon/btAlignedObjectArray.h" +#include "BulletCommon/btScalar.h" + + +///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a btTriangleIndexVertexArray using addIndexedMesh. +///Instead of the number of indices, we pass the number of triangles. +ATTRIBUTE_ALIGNED16( struct) btIndexedMesh +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + int m_numTriangles; + const unsigned char * m_triangleIndexBase; + // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) + int m_triangleIndexStride; + int m_numVertices; + const unsigned char * m_vertexBase; + // Size of a vertex, in bytes + int m_vertexStride; + + // The index type is set when adding an indexed mesh to the + // btTriangleIndexVertexArray, do not set it manually + PHY_ScalarType m_indexType; + + // The vertex type has a default type similar to Bullet's precision mode (float or double) + // but can be set manually if you for example run Bullet with double precision but have + // mesh data in single precision.. + PHY_ScalarType m_vertexType; + + + btIndexedMesh() + :m_indexType(PHY_INTEGER), +#ifdef BT_USE_DOUBLE_PRECISION + m_vertexType(PHY_DOUBLE) +#else // BT_USE_DOUBLE_PRECISION + m_vertexType(PHY_FLOAT) +#endif // BT_USE_DOUBLE_PRECISION + { + } +} +; + + +typedef btAlignedObjectArray IndexedMeshArray; + +///The btTriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays. +///Additional meshes can be added using addIndexedMesh +///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays. +///So keep those arrays around during the lifetime of this btTriangleIndexVertexArray. +ATTRIBUTE_ALIGNED16( class) btTriangleIndexVertexArray : public btStridingMeshInterface +{ +protected: + IndexedMeshArray m_indexedMeshes; + int m_pad[2]; + mutable int m_hasAabb; // using int instead of bool to maintain alignment + mutable btVector3 m_aabbMin; + mutable btVector3 m_aabbMax; + +public: + + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btTriangleIndexVertexArray() : m_hasAabb(0) + { + } + + virtual ~btTriangleIndexVertexArray(); + + //just to be backwards compatible + btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride); + + void addIndexedMesh(const btIndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) + { + m_indexedMeshes.push_back(mesh); + m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType; + } + + + virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0); + + virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const; + + /// unLockVertexBase finishes the access to a subpart of the triangle mesh + /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished + virtual void unLockVertexBase(int subpart) {(void)subpart;} + + virtual void unLockReadOnlyVertexBase(int subpart) const {(void)subpart;} + + /// getNumSubParts returns the number of seperate subparts + /// each subpart has a continuous array of vertices and indices + virtual int getNumSubParts() const { + return (int)m_indexedMeshes.size(); + } + + IndexedMeshArray& getIndexedMeshArray() + { + return m_indexedMeshes; + } + + const IndexedMeshArray& getIndexedMeshArray() const + { + return m_indexedMeshes; + } + + virtual void preallocateVertices(int numverts){(void) numverts;} + virtual void preallocateIndices(int numindices){(void) numindices;} + + virtual bool hasPremadeAabb() const; + virtual void setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const; + virtual void getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const; + +} +; + +#endif //BT_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/opencl/gpu_sat/kernels/bvhTraversal.cl b/opencl/gpu_sat/kernels/bvhTraversal.cl new file mode 100644 index 000000000..aad0341c4 --- /dev/null +++ b/opencl/gpu_sat/kernels/bvhTraversal.cl @@ -0,0 +1,109 @@ +//keep this enum in sync with the CPU version (in btCollidable.h) +//written by Erwin Coumans + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_CONCAVE_TRIMESH 5 +#define TRIANGLE_NUM_CONVEX_FACES 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 + +typedef unsigned int u32; + +///keep this in sync with btCollidable.h +typedef struct +{ + int m_numChildShapes; + int blaat2; + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + +// work-in-progress +__kernel void bvhTraversalKernel( __global const int2* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global btAabbCL* aabbs, + __global int4* concavePairsOut, + __global volatile int* numConcavePairsOut, + int numPairs, + int maxNumConcavePairsCapacity + ) +{ + + int i = get_global_id(0); + + if (i