From 085d70564506bdc1378137b9b55ea6b3fb7d6926 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Tue, 19 Mar 2013 23:11:19 -0700 Subject: [PATCH] more work towards GPU bvh traversal --- opencl/gpu_sat/host/ConvexHullContact.cpp | 43 ++--- opencl/gpu_sat/host/btQuantizedBvh.h | 3 +- opencl/gpu_sat/kernels/bvhTraversal.cl | 185 ++++++++++++++++++++-- opencl/gpu_sat/kernels/bvhTraversal.h | 185 ++++++++++++++++++++-- 4 files changed, 354 insertions(+), 62 deletions(-) diff --git a/opencl/gpu_sat/host/ConvexHullContact.cpp b/opencl/gpu_sat/host/ConvexHullContact.cpp index d527a9c36..2f0c2806f 100644 --- a/opencl/gpu_sat/host/ConvexHullContact.cpp +++ b/opencl/gpu_sat/host/ConvexHullContact.cpp @@ -329,35 +329,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray subTreesGPU(this->m_context,this->m_queue,numSubTrees); subTreesGPU.copyFromHost(bvhData[0]->getSubtreeInfoArray()); - - /* - __kernel void bvhTraversalKernel( __global const int2* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global btAabbCL* aabbs, - __global int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - int numPairs, - int maxNumConcavePairsCapacity - ) - - btBufferInfoCL( pairs->getBufferCL(), true ), - btBufferInfoCL( bodyBuf->getBufferCL(),true), - btBufferInfoCL( gpuCollidables.getBufferCL(),true), - btBufferInfoCL( convexData.getBufferCL(),true), - btBufferInfoCL( gpuVertices.getBufferCL(),true), - btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - btBufferInfoCL( gpuFaces.getBufferCL(),true), - btBufferInfoCL( gpuIndices.getBufferCL(),true), - btBufferInfoCL( clAabbsWS.getBufferCL(),true), - btBufferInfoCL( sepNormals.getBufferCL()), - btBufferInfoCL( hasSeparatingNormals.getBufferCL()), - btBufferInfoCL( triangleConvexPairsOut.getBufferCL()), - btBufferInfoCL( concaveSepNormals.getBufferCL()), - btBufferInfoCL( numConcavePairsOut.getBufferCL()) - - */ - + btVector3 bvhAabbMin = bvhData[0]->m_bvhAabbMin; + btVector3 bvhAabbMax = bvhData[0]->m_bvhAabbMax; + btVector3 bvhQuantization = bvhData[0]->m_bvhQuantization; { int np = numConcavePairsOut.at(0); printf("np=%d\n", np); @@ -368,12 +342,23 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray pairsOutCPU; + triangleConvexPairsOut.copyToHost(pairsOutCPU); + + printf("np=%d\n", np); } diff --git a/opencl/gpu_sat/host/btQuantizedBvh.h b/opencl/gpu_sat/host/btQuantizedBvh.h index 45f55e5a3..35378b2d0 100644 --- a/opencl/gpu_sat/host/btQuantizedBvh.h +++ b/opencl/gpu_sat/host/btQuantizedBvh.h @@ -181,13 +181,14 @@ public: TRAVERSAL_RECURSIVE }; -protected: + btVector3 m_bvhAabbMin; btVector3 m_bvhAabbMax; btVector3 m_bvhQuantization; +protected: int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. int m_curNodeIndex; diff --git a/opencl/gpu_sat/kernels/bvhTraversal.cl b/opencl/gpu_sat/kernels/bvhTraversal.cl index aad0341c4..8411ac01e 100644 --- a/opencl/gpu_sat/kernels/bvhTraversal.cl +++ b/opencl/gpu_sat/kernels/bvhTraversal.cl @@ -8,6 +8,76 @@ typedef unsigned int u32; +#define MAX_NUM_PARTS_IN_BITS 10 + +///btQuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; +} btQuantizedBvhNode; +/* + bool isLeafNode() const + { + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (m_escapeIndexOrTriangleIndex >= 0); + } + int getEscapeIndex() const + { + btAssert(!isLeafNode()); + return -m_escapeIndexOrTriangleIndex; + } + int getTriangleIndex() const + { + btAssert(isLeafNode()); + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (m_escapeIndexOrTriangleIndex&~(y)); + } + int getPartId() const + { + btAssert(isLeafNode()); + // Get only the highest bits where the part index is stored + return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); + } +*/ + +int getTriangleIndex(__global const btQuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +bool isLeaf(__global const btQuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0); +} + +int getEscapeIndex(__global const btQuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +} btBvhSubtreeInfo; + ///keep this in sync with btCollidable.h typedef struct { @@ -58,16 +128,53 @@ typedef struct }; } btAabbCL; + +bool testQuantizedAabbAgainstQuantizedAabb(__private const unsigned short int* aabbMin1,__private const unsigned short int* aabbMax1,__global const unsigned short int* aabbMin2,__global const unsigned short int* aabbMax2) +{ + bool overlap = true; + overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap; + overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap; + overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap; + return overlap; +} + + +void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization) +{ + float4 clampedPoint = max(point2,bvhAabbMin); + clampedPoint = min (clampedPoint, bvhAabbMax); + + float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; + if (isMax) + { + out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); + out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); + out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); + } else + { + out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); + out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); + out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); + } + +} + + // work-in-progress __kernel void bvhTraversalKernel( __global const int2* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global btAabbCL* aabbs, - __global int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - int numPairs, - int maxNumConcavePairsCapacity - ) + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global btAabbCL* aabbs, + __global int4* concavePairsOut, + __global volatile int* numConcavePairsOut, + __global const btBvhSubtreeInfo* subtreeHeaders, + __global const btQuantizedBvhNode* quantizedNodes, + float4 bvhAabbMin, + float4 bvhAabbMax, + float4 bvhQuantization, + int numSubtreeHeaders, + int numPairs, + int maxNumConcavePairsCapacity) { int i = get_global_id(0); @@ -94,16 +201,62 @@ __kernel void bvhTraversalKernel( __global const int2* pairs, if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))// && (collidables[collidableIndexB].m_shapeType==SHAPE_CONVEX_HULL)) { - int pairIdx = atomic_inc(numConcavePairsOut); - if (pairIdxm_quantizedAabbMin,subtree->m_quantizedAabbMax); + if (overlap != 0) + { + int startNodeIndex = subtree->m_rootNodeIndex; + int endNodeIndex = subtree->m_rootNodeIndex+subtree->m_subtreeSize; + + int curIndex = startNodeIndex; + int subTreeSize = endNodeIndex - startNodeIndex; + __global const btQuantizedBvhNode* rootNode = &quantizedNodes[startNodeIndex]; + int escapeIndex; + bool isLeafNode; + unsigned aabbOverlap; + while (curIndex < endNodeIndex) + { + aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = isLeaf(rootNode); + if (isLeafNode && aabbOverlap) + { + //do your thing! nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); + int triangleIndex = getTriangleIndex(rootNode); + int pairIdx = atomic_inc(numConcavePairsOut); + if (pairIdx=0 (leafnode)\n" +" return (m_escapeIndexOrTriangleIndex >= 0);\n" +" }\n" +" int getEscapeIndex() const\n" +" {\n" +" btAssert(!isLeafNode());\n" +" return -m_escapeIndexOrTriangleIndex;\n" +" }\n" +" int getTriangleIndex() const\n" +" {\n" +" btAssert(isLeafNode());\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (m_escapeIndexOrTriangleIndex&~(y));\n" +" }\n" +" int getPartId() const\n" +" {\n" +" btAssert(isLeafNode());\n" +" // Get only the highest bits where the part index is stored\n" +" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n" +" }\n" +"*/\n" +"\n" +"int getTriangleIndex(__global const btQuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"\n" +"bool isLeaf(__global const btQuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0);\n" +"}\n" +" \n" +"int getEscapeIndex(__global const btQuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes, points to the root of the subtree\n" +" int m_rootNodeIndex;\n" +" //4 bytes\n" +" int m_subtreeSize;\n" +" int m_padding[3];\n" +"} btBvhSubtreeInfo;\n" +"\n" "///keep this in sync with btCollidable.h\n" "typedef struct\n" "{\n" @@ -60,16 +130,53 @@ static const char* bvhTraversalKernelCL= \ " };\n" "} btAabbCL;\n" "\n" +"\n" +"bool testQuantizedAabbAgainstQuantizedAabb(__private const unsigned short int* aabbMin1,__private const unsigned short int* aabbMax1,__global const unsigned short int* aabbMin2,__global const unsigned short int* aabbMax2)\n" +"{\n" +" bool overlap = true;\n" +" overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;\n" +" overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;\n" +" overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;\n" +" return overlap;\n" +"}\n" +"\n" +"\n" +"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" +"{\n" +" float4 clampedPoint = max(point2,bvhAabbMin);\n" +" clampedPoint = min (clampedPoint, bvhAabbMax);\n" +"\n" +" float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" +" if (isMax)\n" +" {\n" +" out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" +" out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" +" out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" +" } else\n" +" {\n" +" out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" +" out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" +" out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" +" }\n" +"\n" +"}\n" +"\n" +"\n" "// work-in-progress\n" "__kernel void bvhTraversalKernel( __global const int2* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global btAabbCL* aabbs,\n" -" __global int4* concavePairsOut,\n" -" __global volatile int* numConcavePairsOut,\n" -" int numPairs,\n" -" int maxNumConcavePairsCapacity\n" -" )\n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global btAabbCL* aabbs,\n" +" __global int4* concavePairsOut,\n" +" __global volatile int* numConcavePairsOut,\n" +" __global const btBvhSubtreeInfo* subtreeHeaders,\n" +" __global const btQuantizedBvhNode* quantizedNodes,\n" +" float4 bvhAabbMin,\n" +" float4 bvhAabbMax,\n" +" float4 bvhQuantization,\n" +" int numSubtreeHeaders,\n" +" int numPairs,\n" +" int maxNumConcavePairsCapacity)\n" "{\n" "\n" " int i = get_global_id(0);\n" @@ -96,17 +203,63 @@ static const char* bvhTraversalKernelCL= \ " \n" " if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))// && (collidables[collidableIndexB].m_shapeType==SHAPE_CONVEX_HULL))\n" " {\n" -" int pairIdx = atomic_inc(numConcavePairsOut);\n" -" if (pairIdxm_quantizedAabbMin,subtree->m_quantizedAabbMax);\n" +" if (overlap != 0)\n" +" {\n" +" int startNodeIndex = subtree->m_rootNodeIndex;\n" +" int endNodeIndex = subtree->m_rootNodeIndex+subtree->m_subtreeSize;\n" +"\n" +" int curIndex = startNodeIndex;\n" +" int subTreeSize = endNodeIndex - startNodeIndex;\n" +" __global const btQuantizedBvhNode* rootNode = &quantizedNodes[startNodeIndex];\n" +" int escapeIndex;\n" +" bool isLeafNode;\n" +" unsigned aabbOverlap;\n" +" while (curIndex < endNodeIndex)\n" +" {\n" +" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);\n" +" isLeafNode = isLeaf(rootNode);\n" +" if (isLeafNode && aabbOverlap)\n" +" {\n" +" //do your thing! nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex());\n" +" int triangleIndex = getTriangleIndex(rootNode);\n" +" int pairIdx = atomic_inc(numConcavePairsOut);\n" +" if (pairIdx