Fix PLBVH symmetry optimization for calculateOverlappingPairs().

This commit is contained in:
Jackson Lee
2014-03-13 19:22:05 -07:00
parent bb0102d79b
commit 19b194e8fe
4 changed files with 106 additions and 16 deletions

View File

@@ -69,6 +69,9 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel);
m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_findLeafIndexRangesKernel);
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
@@ -91,6 +94,8 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
clReleaseKernel(m_findDistanceFromRootKernel);
clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel);
clReleaseKernel(m_findLeafIndexRangesKernel);
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
clReleaseKernel(m_plbvhRayTraverseKernel);
clReleaseKernel(m_plbvhLargeAabbAabbTestKernel);
@@ -253,7 +258,33 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
}
//
constructRadixBinaryTree();
constructBinaryRadixTree();
//Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices.
//The root node contains leaf node indices in the range [0, numLeafNodes - 1].
//The child nodes of each node split their parent's index range into 2 contiguous halves.
//
//For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31].
//The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31].
//
//This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice
{
B3_PROFILE("m_findLeafIndexRangesKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
}
void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
@@ -410,9 +441,9 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
}
void b3GpuParallelLinearBvh::constructRadixBinaryTree()
void b3GpuParallelLinearBvh::constructBinaryRadixTree()
{
B3_PROFILE("b3GpuParallelLinearBvh::constructRadixBinaryTree()");
B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()");
int numLeaves = m_leafNodeAabbs.size();
int numInternalNodes = numLeaves - 1;

View File

@@ -63,6 +63,8 @@ class b3GpuParallelLinearBvh
cl_kernel m_findDistanceFromRootKernel;
cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
cl_kernel m_findLeafIndexRangesKernel;
//Traversal kernels
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
cl_kernel m_plbvhRayTraverseKernel;
@@ -117,7 +119,7 @@ public:
b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
private:
void constructRadixBinaryTree();
void constructBinaryRadixTree();
};
#endif

View File

@@ -211,12 +211,12 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
//Optimization - if the node is not a leaf, check whether the highest leaf index of that node
//is less than the queried node's index to avoid testing each pair twice.
//Optimization - if the BVH is structured as a binary radix tree, then
//each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).
//This can be used to avoid testing each AABB-AABB pair twice.
{
// fix: produces duplicate pairs
// int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y;
// if(highestLeafIndex < queryBvhNodeIndex) continue;
int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;
if(highestLeafIndex < queryBvhNodeIndex) continue;
}
//bvhRigidIndex is not used if internal node
@@ -225,7 +225,7 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
{
if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])
if(isLeaf)
{
int4 pair;
pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];
@@ -741,3 +741,32 @@ __kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot,
internalNodeAabbs[internalNodeIndex] = mergedAabb;
}
}
__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if(internalNodeIndex >= numInternalNodes) return;
int numLeafNodes = numInternalNodes + 1;
int2 childNodes = internalNodeChildNodes[internalNodeIndex];
int2 leafIndexRange; //x == min leaf index, y == max leaf index
//Find lowest leaf index covered by this internal node
{
int lowestIndex = childNodes.x; //childNodes.x == Left child
while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;
leafIndexRange.x = lowestIndex;
}
//Find highest leaf index covered by this internal node
{
int highestIndex = childNodes.y; //childNodes.y == Right child
while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;
leafIndexRange.y = highestIndex;
}
//
out_leafIndexRanges[internalNodeIndex] = leafIndexRange;
}

View File

@@ -196,12 +196,12 @@ static const char* parallelLinearBvhCL= \
" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
" \n"
" //Optimization - if the node is not a leaf, check whether the highest leaf index of that node\n"
" //is less than the queried node's index to avoid testing each pair twice.\n"
" //Optimization - if the BVH is structured as a binary radix tree, then\n"
" //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n"
" //This can be used to avoid testing each AABB-AABB pair twice.\n"
" {\n"
" // fix: produces duplicate pairs\n"
" // int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
" // if(highestLeafIndex < queryBvhNodeIndex) continue;\n"
" int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
" if(highestLeafIndex < queryBvhNodeIndex) continue;\n"
" }\n"
" \n"
" //bvhRigidIndex is not used if internal node\n"
@@ -210,7 +210,7 @@ static const char* parallelLinearBvhCL= \
" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
" if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
" {\n"
" if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])\n"
" if(isLeaf)\n"
" {\n"
" int4 pair;\n"
" pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n"
@@ -702,4 +702,32 @@ static const char* parallelLinearBvhCL= \
" internalNodeAabbs[internalNodeIndex] = mergedAabb;\n"
" }\n"
"}\n"
"__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if(internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" int numLeafNodes = numInternalNodes + 1;\n"
" \n"
" int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n"
" \n"
" int2 leafIndexRange; //x == min leaf index, y == max leaf index\n"
" \n"
" //Find lowest leaf index covered by this internal node\n"
" {\n"
" int lowestIndex = childNodes.x; //childNodes.x == Left child\n"
" while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n"
" leafIndexRange.x = lowestIndex;\n"
" }\n"
" \n"
" //Find highest leaf index covered by this internal node\n"
" {\n"
" int highestIndex = childNodes.y; //childNodes.y == Right child\n"
" while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n"
" leafIndexRange.y = highestIndex;\n"
" }\n"
" \n"
" //\n"
" out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n"
"}\n"
;