diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp index 0593fc136..a478f9e6f 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp @@ -69,6 +69,9 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel); + m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_findLeafIndexRangesKernel); + m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_plbvhCalculateOverlappingPairsKernel); m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros ); @@ -91,6 +94,8 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() clReleaseKernel(m_findDistanceFromRootKernel); clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel); + clReleaseKernel(m_findLeafIndexRangesKernel); + clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); clReleaseKernel(m_plbvhRayTraverseKernel); clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); @@ -253,7 +258,33 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab } // - constructRadixBinaryTree(); + constructBinaryRadixTree(); + + + //Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices. + //The root node contains leaf node indices in the range [0, numLeafNodes - 1]. + //The child nodes of each node split their parent's index range into 2 contiguous halves. + // + //For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31]. + //The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31]. + // + //This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice + { + B3_PROFILE("m_findLeafIndexRangesKernel"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numInternalNodes); + + launcher.launch1D(numInternalNodes); + clFinish(m_queue); + } } void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray& out_numPairs, b3OpenCLArray& out_overlappingPairs) @@ -410,9 +441,9 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray& out_numRayRigidPairs, b3OpenCLArray& out_rayRigidPairs); private: - void constructRadixBinaryTree(); + void constructBinaryRadixTree(); }; #endif diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index b28c5ab56..83f355849 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -211,12 +211,12 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - //Optimization - if the node is not a leaf, check whether the highest leaf index of that node - //is less than the queried node's index to avoid testing each pair twice. + //Optimization - if the BVH is structured as a binary radix tree, then + //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]). + //This can be used to avoid testing each AABB-AABB pair twice. { - // fix: produces duplicate pairs - // int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y; - // if(highestLeafIndex < queryBvhNodeIndex) continue; + int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y; + if(highestLeafIndex < queryBvhNodeIndex) continue; } //bvhRigidIndex is not used if internal node @@ -225,7 +225,7 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) ) { - if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3]) + if(isLeaf) { int4 pair; pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3]; @@ -741,3 +741,32 @@ __kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, internalNodeAabbs[internalNodeIndex] = mergedAabb; } } + +__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes) +{ + int internalNodeIndex = get_global_id(0); + if(internalNodeIndex >= numInternalNodes) return; + + int numLeafNodes = numInternalNodes + 1; + + int2 childNodes = internalNodeChildNodes[internalNodeIndex]; + + int2 leafIndexRange; //x == min leaf index, y == max leaf index + + //Find lowest leaf index covered by this internal node + { + int lowestIndex = childNodes.x; //childNodes.x == Left child + while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x; + leafIndexRange.x = lowestIndex; + } + + //Find highest leaf index covered by this internal node + { + int highestIndex = childNodes.y; //childNodes.y == Right child + while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y; + leafIndexRange.y = highestIndex; + } + + // + out_leafIndexRanges[internalNodeIndex] = leafIndexRange; +} diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index 731423098..40c5e3218 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -196,12 +196,12 @@ static const char* parallelLinearBvhCL= \ " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" " \n" -" //Optimization - if the node is not a leaf, check whether the highest leaf index of that node\n" -" //is less than the queried node's index to avoid testing each pair twice.\n" +" //Optimization - if the BVH is structured as a binary radix tree, then\n" +" //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n" +" //This can be used to avoid testing each AABB-AABB pair twice.\n" " {\n" -" // fix: produces duplicate pairs\n" -" // int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" -" // if(highestLeafIndex < queryBvhNodeIndex) continue;\n" +" int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" +" if(highestLeafIndex < queryBvhNodeIndex) continue;\n" " }\n" " \n" " //bvhRigidIndex is not used if internal node\n" @@ -210,7 +210,7 @@ static const char* parallelLinearBvhCL= \ " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" " if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n" " {\n" -" if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])\n" +" if(isLeaf)\n" " {\n" " int4 pair;\n" " pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n" @@ -702,4 +702,32 @@ static const char* parallelLinearBvhCL= \ " internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" " }\n" "}\n" +"__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n" +"{\n" +" int internalNodeIndex = get_global_id(0);\n" +" if(internalNodeIndex >= numInternalNodes) return;\n" +" \n" +" int numLeafNodes = numInternalNodes + 1;\n" +" \n" +" int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n" +" \n" +" int2 leafIndexRange; //x == min leaf index, y == max leaf index\n" +" \n" +" //Find lowest leaf index covered by this internal node\n" +" {\n" +" int lowestIndex = childNodes.x; //childNodes.x == Left child\n" +" while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n" +" leafIndexRange.x = lowestIndex;\n" +" }\n" +" \n" +" //Find highest leaf index covered by this internal node\n" +" {\n" +" int highestIndex = childNodes.y; //childNodes.y == Right child\n" +" while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n" +" leafIndexRange.y = highestIndex;\n" +" }\n" +" \n" +" //\n" +" out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n" +"}\n" ;