more work towards GPU bvh traversal

This commit is contained in:
erwin coumans
2013-03-19 23:11:19 -07:00
parent 8bfbaf3ed1
commit 085d705645
4 changed files with 354 additions and 62 deletions

View File

@@ -10,6 +10,76 @@ static const char* bvhTraversalKernelCL= \
"\n"
"typedef unsigned int u32;\n"
"\n"
"#define MAX_NUM_PARTS_IN_BITS 10\n"
"\n"
"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n"
"typedef struct\n"
"{\n"
" //12 bytes\n"
" unsigned short int m_quantizedAabbMin[3];\n"
" unsigned short int m_quantizedAabbMax[3];\n"
" //4 bytes\n"
" int m_escapeIndexOrTriangleIndex;\n"
"} btQuantizedBvhNode;\n"
"/*\n"
" bool isLeafNode() const\n"
" {\n"
" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
" return (m_escapeIndexOrTriangleIndex >= 0);\n"
" }\n"
" int getEscapeIndex() const\n"
" {\n"
" btAssert(!isLeafNode());\n"
" return -m_escapeIndexOrTriangleIndex;\n"
" }\n"
" int getTriangleIndex() const\n"
" {\n"
" btAssert(isLeafNode());\n"
" unsigned int x=0;\n"
" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
" // Get only the lower bits where the triangle index is stored\n"
" return (m_escapeIndexOrTriangleIndex&~(y));\n"
" }\n"
" int getPartId() const\n"
" {\n"
" btAssert(isLeafNode());\n"
" // Get only the highest bits where the part index is stored\n"
" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n"
" }\n"
"*/\n"
"\n"
"int getTriangleIndex(__global const btQuantizedBvhNode* rootNode)\n"
"{\n"
" unsigned int x=0;\n"
" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
" // Get only the lower bits where the triangle index is stored\n"
" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n"
"}\n"
"\n"
"bool isLeaf(__global const btQuantizedBvhNode* rootNode)\n"
"{\n"
" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
" return (rootNode->m_escapeIndexOrTriangleIndex >= 0);\n"
"}\n"
" \n"
"int getEscapeIndex(__global const btQuantizedBvhNode* rootNode)\n"
"{\n"
" return -rootNode->m_escapeIndexOrTriangleIndex;\n"
"}\n"
"\n"
"typedef struct\n"
"{\n"
" //12 bytes\n"
" unsigned short int m_quantizedAabbMin[3];\n"
" unsigned short int m_quantizedAabbMax[3];\n"
" //4 bytes, points to the root of the subtree\n"
" int m_rootNodeIndex;\n"
" //4 bytes\n"
" int m_subtreeSize;\n"
" int m_padding[3];\n"
"} btBvhSubtreeInfo;\n"
"\n"
"///keep this in sync with btCollidable.h\n"
"typedef struct\n"
"{\n"
@@ -60,16 +130,53 @@ static const char* bvhTraversalKernelCL= \
" };\n"
"} btAabbCL;\n"
"\n"
"\n"
"bool testQuantizedAabbAgainstQuantizedAabb(__private const unsigned short int* aabbMin1,__private const unsigned short int* aabbMax1,__global const unsigned short int* aabbMin2,__global const unsigned short int* aabbMax2)\n"
"{\n"
" bool overlap = true;\n"
" overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;\n"
" overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;\n"
" overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;\n"
" return overlap;\n"
"}\n"
"\n"
"\n"
"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n"
"{\n"
" float4 clampedPoint = max(point2,bvhAabbMin);\n"
" clampedPoint = min (clampedPoint, bvhAabbMax);\n"
"\n"
" float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n"
" if (isMax)\n"
" {\n"
" out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n"
" out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n"
" out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n"
" } else\n"
" {\n"
" out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n"
" out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n"
" out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n"
" }\n"
"\n"
"}\n"
"\n"
"\n"
"// work-in-progress\n"
"__kernel void bvhTraversalKernel( __global const int2* pairs, \n"
" __global const BodyData* rigidBodies, \n"
" __global const btCollidableGpu* collidables,\n"
" __global btAabbCL* aabbs,\n"
" __global int4* concavePairsOut,\n"
" __global volatile int* numConcavePairsOut,\n"
" int numPairs,\n"
" int maxNumConcavePairsCapacity\n"
" )\n"
" __global const BodyData* rigidBodies, \n"
" __global const btCollidableGpu* collidables,\n"
" __global btAabbCL* aabbs,\n"
" __global int4* concavePairsOut,\n"
" __global volatile int* numConcavePairsOut,\n"
" __global const btBvhSubtreeInfo* subtreeHeaders,\n"
" __global const btQuantizedBvhNode* quantizedNodes,\n"
" float4 bvhAabbMin,\n"
" float4 bvhAabbMax,\n"
" float4 bvhQuantization,\n"
" int numSubtreeHeaders,\n"
" int numPairs,\n"
" int maxNumConcavePairsCapacity)\n"
"{\n"
"\n"
" int i = get_global_id(0);\n"
@@ -96,17 +203,63 @@ static const char* bvhTraversalKernelCL= \
" \n"
" if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))// && (collidables[collidableIndexB].m_shapeType==SHAPE_CONVEX_HULL))\n"
" {\n"
" int pairIdx = atomic_inc(numConcavePairsOut);\n"
" if (pairIdx<maxNumConcavePairsCapacity)\n"
"\n"
" \n"
" unsigned short int quantizedQueryAabbMin[3];\n"
" unsigned short int quantizedQueryAabbMax[3];\n"
" quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n"
" quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n"
"\n"
"\n"
" int i;\n"
" for (i=0;i<numSubtreeHeaders;i++)\n"
" {\n"
" //int4 newPair;\n"
" concavePairsOut[pairIdx].x = bodyIndexA;\n"
" concavePairsOut[pairIdx].y = bodyIndexB;\n"
" concavePairsOut[pairIdx].z = 5;\n"
" concavePairsOut[pairIdx].w = 3;\n"
" const __global btBvhSubtreeInfo* subtree = &subtreeHeaders[i];\n"
" //PCK: unsigned instead of bool\n"
" unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree->m_quantizedAabbMin,subtree->m_quantizedAabbMax);\n"
" if (overlap != 0)\n"
" {\n"
" int startNodeIndex = subtree->m_rootNodeIndex;\n"
" int endNodeIndex = subtree->m_rootNodeIndex+subtree->m_subtreeSize;\n"
"\n"
" int curIndex = startNodeIndex;\n"
" int subTreeSize = endNodeIndex - startNodeIndex;\n"
" __global const btQuantizedBvhNode* rootNode = &quantizedNodes[startNodeIndex];\n"
" int escapeIndex;\n"
" bool isLeafNode;\n"
" unsigned aabbOverlap;\n"
" while (curIndex < endNodeIndex)\n"
" {\n"
" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);\n"
" isLeafNode = isLeaf(rootNode);\n"
" if (isLeafNode && aabbOverlap)\n"
" {\n"
" //do your thing! nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex());\n"
" int triangleIndex = getTriangleIndex(rootNode);\n"
" int pairIdx = atomic_inc(numConcavePairsOut);\n"
" if (pairIdx<maxNumConcavePairsCapacity)\n"
" {\n"
" //int4 newPair;\n"
" concavePairsOut[pairIdx].x = bodyIndexA;\n"
" concavePairsOut[pairIdx].y = bodyIndexB;\n"
" concavePairsOut[pairIdx].z = triangleIndex;\n"
" concavePairsOut[pairIdx].w = 3;\n"
" }\n"
" } \n"
" if ((aabbOverlap != 0) || isLeafNode)\n"
" {\n"
" rootNode++;\n"
" curIndex++;\n"
" } else\n"
" {\n"
" escapeIndex = getEscapeIndex(rootNode);\n"
" rootNode += escapeIndex;\n"
" curIndex += escapeIndex;\n"
" }\n"
" }\n"
" }\n"
" }\n"
" }//SHAPE_CONCAVE_TRIMESH\n"
" \n"
" }//i<numpairs\n"
"}\n"
;