Merge various commits into a single commit.
Commits after:
2014-03-03 Draft PLBVH construction using binary radix tree.
f19f853685
Are merged into a single commit; this includes:
03-10 Remove single launch build AABB kernel.
03-10 Add kernels for setting PLBVH AABBs using distance from root.
03-10 Use faster morton code, remove convertChildNodeFormat kernel.
03-09 Add duplicate morton code handling to binary radix construct.
03-09 Remove slower PLBVH constructors.
03-08 Add binary radix tree construct using binary search.
03-06 Remove slowest PLBVH constructor, fix implicit construct AABB.
03-04 Test various optimizations for PLBVH binary radix tree construct.
This commit is contained in:
@@ -18,25 +18,24 @@ subject to the following restrictions:
|
||||
|
||||
b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||
m_queue(queue),
|
||||
m_fill(context, device, queue),
|
||||
m_radixSorter(context, device, queue),
|
||||
|
||||
m_rootNodeIndex(context, queue),
|
||||
|
||||
m_numNodesPerLevelGpu(context, queue),
|
||||
m_firstIndexOffsetPerLevelGpu(context, queue),
|
||||
m_maxDistanceFromRoot(context, queue),
|
||||
|
||||
m_internalNodeAabbs(context, queue),
|
||||
m_internalNodeLeafIndexRanges(context, queue),
|
||||
m_internalNodeChildNodes(context, queue),
|
||||
m_internalNodeParentNodes(context, queue),
|
||||
|
||||
m_maxCommonPrefix(context, queue),
|
||||
m_commonPrefixes(context, queue),
|
||||
m_leftInternalNodePointers(context, queue),
|
||||
m_rightInternalNodePointers(context, queue),
|
||||
m_internalNodeLeftChildNodes(context, queue),
|
||||
m_internalNodeRightChildNodes(context, queue),
|
||||
m_commonPrefixLengths(context, queue),
|
||||
m_childNodeCount(context, queue),
|
||||
m_distanceFromRoot(context, queue),
|
||||
m_TEMP_leftLowerPrefix(context, queue),
|
||||
m_TEMP_rightLowerPrefix(context, queue),
|
||||
m_TEMP_leftSharedPrefixLength(context, queue),
|
||||
m_TEMP_rightSharedPrefixLength(context, queue),
|
||||
|
||||
m_leafNodeParentNodes(context, queue),
|
||||
m_mortonCodesAndAabbIndicies(context, queue),
|
||||
@@ -46,8 +45,8 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
||||
m_largeAabbs(context, queue)
|
||||
{
|
||||
m_rootNodeIndex.resize(1);
|
||||
m_maxCommonPrefix.resize(1);
|
||||
|
||||
m_maxDistanceFromRoot.resize(1);
|
||||
|
||||
//
|
||||
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
||||
|
||||
@@ -64,21 +63,16 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
||||
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||
|
||||
m_constructBinaryTreeKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "constructBinaryTree", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_constructBinaryTreeKernel);
|
||||
m_determineInternalNodeAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "determineInternalNodeAabbs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_determineInternalNodeAabbsKernel);
|
||||
|
||||
m_computePrefixAndInitPointersKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computePrefixAndInitPointers", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_computePrefixAndInitPointersKernel);
|
||||
m_correctDuplicatePrefixesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "correctDuplicatePrefixes", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_correctDuplicatePrefixesKernel);
|
||||
m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_computeAdjacentPairCommonPrefixKernel);
|
||||
m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_buildBinaryRadixTreeLeafNodesKernel);
|
||||
m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_buildBinaryRadixTreeInternalNodesKernel);
|
||||
m_convertChildNodeFormatKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "convertChildNodeFormat", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_convertChildNodeFormatKernel);
|
||||
m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_findDistanceFromRootKernel);
|
||||
m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel);
|
||||
|
||||
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
||||
@@ -96,14 +90,11 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
|
||||
clReleaseKernel(m_findAllNodesMergedAabbKernel);
|
||||
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||
|
||||
clReleaseKernel(m_constructBinaryTreeKernel);
|
||||
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
||||
|
||||
clReleaseKernel(m_computePrefixAndInitPointersKernel);
|
||||
clReleaseKernel(m_correctDuplicatePrefixesKernel);
|
||||
clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel);
|
||||
clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel);
|
||||
clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel);
|
||||
clReleaseKernel(m_convertChildNodeFormatKernel);
|
||||
clReleaseKernel(m_findDistanceFromRootKernel);
|
||||
clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel);
|
||||
|
||||
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
||||
clReleaseKernel(m_plbvhRayTraverseKernel);
|
||||
@@ -186,18 +177,19 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
m_internalNodeParentNodes.resize(numInternalNodes);
|
||||
|
||||
m_commonPrefixes.resize(numInternalNodes);
|
||||
m_leftInternalNodePointers.resize(numInternalNodes);
|
||||
m_rightInternalNodePointers.resize(numInternalNodes);
|
||||
m_internalNodeLeftChildNodes.resize(numInternalNodes);
|
||||
m_internalNodeRightChildNodes.resize(numInternalNodes);
|
||||
m_commonPrefixLengths.resize(numInternalNodes);
|
||||
m_childNodeCount.resize(numInternalNodes);
|
||||
m_distanceFromRoot.resize(numInternalNodes);
|
||||
m_TEMP_leftLowerPrefix.resize(numInternalNodes);
|
||||
m_TEMP_rightLowerPrefix.resize(numInternalNodes);
|
||||
m_TEMP_leftSharedPrefixLength.resize(numInternalNodes);
|
||||
m_TEMP_rightSharedPrefixLength.resize(numInternalNodes);
|
||||
|
||||
m_leafNodeParentNodes.resize(numLeaves);
|
||||
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
||||
m_mergedAabb.resize(numLeaves);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Find the merged AABB of all small AABBs; this is used to define the size of
|
||||
//each cell in the virtual grid(2^10 cells in each dimension).
|
||||
{
|
||||
@@ -255,18 +247,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Optional; only element at m_internalNodeParentNodes[0], the root node, needs to be set here
|
||||
//as the parent indices of other nodes are overwritten during m_constructBinaryTreeKernel
|
||||
{
|
||||
B3_PROFILE("Reset parent node indices");
|
||||
|
||||
m_fill.execute( m_internalNodeParentNodes, B3_PLBVH_ROOT_NODE_MARKER, m_internalNodeParentNodes.size() );
|
||||
m_fill.execute( m_leafNodeParentNodes, B3_PLBVH_ROOT_NODE_MARKER, m_leafNodeParentNodes.size() );
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//
|
||||
//constructSimpleBinaryTree();
|
||||
constructRadixBinaryTree();
|
||||
}
|
||||
|
||||
@@ -424,148 +405,11 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
|
||||
|
||||
}
|
||||
|
||||
void b3GpuParallelLinearBvh::constructSimpleBinaryTree()
|
||||
{
|
||||
B3_PROFILE("b3GpuParallelLinearBvh::constructSimpleBinaryTree()");
|
||||
|
||||
int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid bodies with small AABBs
|
||||
int numInternalNodes = numLeaves - 1;
|
||||
|
||||
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
||||
//The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes)
|
||||
int numLevels = 0;
|
||||
{
|
||||
//Find the most significant bit(msb)
|
||||
int mostSignificantBit = 0;
|
||||
{
|
||||
int temp = numLeaves;
|
||||
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
|
||||
}
|
||||
numLevels = mostSignificantBit + 1;
|
||||
|
||||
//If the number of nodes is not a power of 2(as in, can be expressed as 2^N where N is an integer), then there is 1 additional level
|
||||
if( ~(1 << mostSignificantBit) & numLeaves ) numLevels++;
|
||||
}
|
||||
|
||||
//Determine number of internal nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
||||
{
|
||||
B3_PROFILE("Determine number of nodes per level");
|
||||
|
||||
m_numNodesPerLevelCpu.resize(numLevels);
|
||||
|
||||
//The last level contains the leaf nodes; number of leaves is already known
|
||||
if(numLevels - 1 >= 0) m_numNodesPerLevelCpu[numLevels - 1] = numLeaves;
|
||||
|
||||
//Calculate number of nodes in each level;
|
||||
//start from the second to last level(level right next to leaf nodes) and move towards the root(level 0)
|
||||
int remainder = 0;
|
||||
for(int levelIndex = numLevels - 2; levelIndex >= 0; --levelIndex)
|
||||
{
|
||||
int numNodesPreviousLevel = m_numNodesPerLevelCpu[levelIndex + 1]; //For first iteration this == numLeaves
|
||||
int numNodesCurrentLevel = numNodesPreviousLevel / 2;
|
||||
|
||||
remainder += numNodesPreviousLevel % 2;
|
||||
if(remainder == 2)
|
||||
{
|
||||
numNodesCurrentLevel++;
|
||||
remainder = 0;
|
||||
}
|
||||
|
||||
m_numNodesPerLevelCpu[levelIndex] = numNodesCurrentLevel;
|
||||
}
|
||||
|
||||
//Prefix sum to calculate the first index offset of each level
|
||||
{
|
||||
m_firstIndexOffsetPerLevelCpu = m_numNodesPerLevelCpu;
|
||||
|
||||
//Perform inclusive scan
|
||||
for(int i = 1; i < m_firstIndexOffsetPerLevelCpu.size(); ++i)
|
||||
m_firstIndexOffsetPerLevelCpu[i] += m_firstIndexOffsetPerLevelCpu[i - 1];
|
||||
|
||||
//Convert inclusive scan to exclusive scan to get the offsets
|
||||
//This is equivalent to shifting each element in m_firstIndexOffsetPerLevelCpu[] by 1 to the right,
|
||||
//and setting the first element to 0
|
||||
for(int i = 0; i < m_firstIndexOffsetPerLevelCpu.size(); ++i)
|
||||
m_firstIndexOffsetPerLevelCpu[i] -= m_numNodesPerLevelCpu[i];
|
||||
}
|
||||
|
||||
//Copy to GPU
|
||||
m_numNodesPerLevelGpu.copyFromHost(m_numNodesPerLevelCpu, false);
|
||||
m_firstIndexOffsetPerLevelGpu.copyFromHost(m_firstIndexOffsetPerLevelCpu, false);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Construct binary tree; find the children of each internal node, and assign parent nodes
|
||||
{
|
||||
B3_PROFILE("Construct binary tree");
|
||||
|
||||
const int ROOT_NODE_INDEX = 0x80000000; //Default root index is 0, most significant bit is set to indicate internal node
|
||||
m_rootNodeIndex.copyFromHostPointer(&ROOT_NODE_INDEX, 1);
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_firstIndexOffsetPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_numNodesPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_constructBinaryTreeKernel, "m_constructBinaryTreeKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLevels);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//For each internal node, check children to get its AABB; start from the
|
||||
//last level, which contains the leaves, and move towards the root
|
||||
{
|
||||
B3_PROFILE("Set AABBs");
|
||||
|
||||
//Due to the arrangement of internal nodes, each internal node corresponds
|
||||
//to a contiguous range of leaf node indices. This characteristic can be used
|
||||
//to optimize calculateOverlappingPairs(); checking if
|
||||
//(m_internalNodeLeafIndexRanges[].y < leafNodeIndex) can be used to ensure that
|
||||
//each pair is processed only once.
|
||||
{
|
||||
B3_PROFILE("Reset internal node index ranges");
|
||||
|
||||
b3Int2 invalidIndexRange;
|
||||
invalidIndexRange.x = -1; //x == min
|
||||
invalidIndexRange.y = -2; //y == max
|
||||
|
||||
m_fill.execute( m_internalNodeLeafIndexRanges, invalidIndexRange, m_internalNodeLeafIndexRanges.size() );
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
int lastInternalLevelIndex = numLevels - 2; //Last level is leaf node level
|
||||
for(int level = lastInternalLevelIndex; level >= 0; --level)
|
||||
{
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_firstIndexOffsetPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_numNodesPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_determineInternalNodeAabbsKernel, "m_determineInternalNodeAabbsKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLevels);
|
||||
launcher.setConst(numInternalNodes);
|
||||
launcher.setConst(level);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
}
|
||||
clFinish(m_queue);
|
||||
}
|
||||
}
|
||||
// remove
|
||||
#include <iostream>
|
||||
int isLeafNode(int index) { return (index >> 31 == 0); }
|
||||
int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }
|
||||
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }
|
||||
|
||||
void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
{
|
||||
@@ -576,17 +420,16 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
|
||||
//For each internal node, compute common prefix and set pointers to left and right internal nodes
|
||||
{
|
||||
B3_PROFILE("m_computePrefixAndInitPointersKernel");
|
||||
B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leftInternalNodePointers.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rightInternalNodePointers.getBufferCL() )
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_computePrefixAndInitPointersKernel, "m_computePrefixAndInitPointersKernel");
|
||||
b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
@@ -594,96 +437,185 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Increase the common prefixes so that there are no adjacent duplicates for each internal node
|
||||
{
|
||||
B3_PROFILE("m_correctDuplicatePrefixesKernel");
|
||||
|
||||
int reset = 0;
|
||||
m_maxCommonPrefix.copyFromHostPointer(&reset, 1);
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_maxCommonPrefix.getBufferCL() ),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_correctDuplicatePrefixesKernel, "m_correctDuplicatePrefixesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
static b3AlignedObjectArray<int> prefixLengths;
|
||||
m_commonPrefixLengths.copyToHost(prefixLengths);
|
||||
clFinish(m_queue);
|
||||
|
||||
for(int i = 1; i < prefixLengths.size(); ++i)
|
||||
if( prefixLengths[i - 1] == prefixLengths[i] )
|
||||
for(;;) printf("duplicate prefix[%d]: %d\n", i, prefixLengths[i]);
|
||||
}
|
||||
|
||||
//For each leaf node, find parent nodes and assign child node indices
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
//For each leaf node, find parent nodes and assign child node indices
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() )
|
||||
};
|
||||
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLeaves);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLeaves);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//For each internal node, find parent nodes and assign child node indices
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
|
||||
int maxCommonPrefix = -1;
|
||||
m_maxCommonPrefix.copyToHostPointer(&maxCommonPrefix, 1);
|
||||
|
||||
//-1 so that the root sets its AABB
|
||||
for(int processedCommonPrefix = maxCommonPrefix; processedCommonPrefix >= -1; --processedCommonPrefix)
|
||||
//For each internal node, find parent nodes and assign child node indices
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leftInternalNodePointers.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rightInternalNodePointers.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() )
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_leftLowerPrefix.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_rightLowerPrefix.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_leftSharedPrefixLength.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_rightSharedPrefixLength.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(processedCommonPrefix);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
{
|
||||
B3_PROFILE("m_convertChildNodeFormatKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
if(0)
|
||||
{
|
||||
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
|
||||
};
|
||||
static b3AlignedObjectArray<b3SortData> mortonCodesAndAabbIndices;
|
||||
|
||||
static b3AlignedObjectArray<b3Int2> child;
|
||||
static b3AlignedObjectArray<b3Int64> commonPrefixes;
|
||||
static b3AlignedObjectArray<int> commonPrefixLengths;
|
||||
static b3AlignedObjectArray<int> tempLeftLowerPrefixIndex;
|
||||
static b3AlignedObjectArray<int> tempRightLowerPrefixIndex;
|
||||
static b3AlignedObjectArray<int> tempLeftLowerPrefixSPL;
|
||||
static b3AlignedObjectArray<int> tempRightLowerPrefixSPL;
|
||||
static b3AlignedObjectArray<int> internalParent;
|
||||
|
||||
m_mortonCodesAndAabbIndicies.copyToHost(mortonCodesAndAabbIndices);
|
||||
|
||||
m_internalNodeChildNodes.copyToHost(child);
|
||||
m_commonPrefixes.copyToHost(commonPrefixes);
|
||||
m_commonPrefixLengths.copyToHost(commonPrefixLengths);
|
||||
m_TEMP_leftLowerPrefix.copyToHost(tempLeftLowerPrefixIndex);
|
||||
m_TEMP_rightLowerPrefix.copyToHost(tempRightLowerPrefixIndex);
|
||||
m_TEMP_leftSharedPrefixLength.copyToHost(tempLeftLowerPrefixSPL);
|
||||
m_TEMP_rightSharedPrefixLength.copyToHost(tempRightLowerPrefixSPL);
|
||||
m_internalNodeParentNodes.copyToHost(internalParent);
|
||||
|
||||
int rootNode = -1;
|
||||
m_rootNodeIndex.copyToHostPointer(&rootNode, 1);
|
||||
clFinish(m_queue);
|
||||
|
||||
printf( "rootNode: %d\n", getIndexWithInternalNodeMarkerRemoved(rootNode) );
|
||||
|
||||
for(int i = 0; i < numInternalNodes; ++i)
|
||||
{
|
||||
b3Int2 childNodes = child[i];
|
||||
|
||||
printf("childNodes[%d]:", i);
|
||||
printf( " %d", getIndexWithInternalNodeMarkerRemoved(childNodes.x) );
|
||||
if( !isLeafNode(childNodes.x) ) printf("i");
|
||||
printf( ", %d", getIndexWithInternalNodeMarkerRemoved(childNodes.y) );
|
||||
if( !isLeafNode(childNodes.y) ) printf("i");
|
||||
printf(" (lr: %d, %d)", tempLeftLowerPrefixIndex[i], tempRightLowerPrefixIndex[i]);
|
||||
printf(" (spl: %d, %d)", tempLeftLowerPrefixSPL[i], tempRightLowerPrefixSPL[i]);
|
||||
printf(" (prefix: %d)", commonPrefixLengths[i]);
|
||||
printf(" (par: %d)", internalParent[i]);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for(int i = 0; i < numInternalNodes; ++i)
|
||||
{
|
||||
int hi = static_cast<int>(commonPrefixes[i] >> 32);
|
||||
int lo = static_cast<int>(commonPrefixes[i]);
|
||||
|
||||
printf("commonPrefix[%d]: %x, %d, len %d \n", i, hi, lo, commonPrefixLengths[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for(int i = 0; i < numLeaves; ++i)
|
||||
{
|
||||
printf("z-curve[%d]: %x \n", i, mortonCodesAndAabbIndices[i].m_key);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
std::cout << std::endl;
|
||||
for(;;);
|
||||
}
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_convertChildNodeFormatKernel, "m_convertChildNodeFormatKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
//Find the number of nodes seperating each internal node and the root node
|
||||
//so that the AABBs can be set using the next kernel
|
||||
{
|
||||
B3_PROFILE("m_findDistanceFromRootKernel");
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Starting from the nodes nearest to the leaf nodes, recursively move up
|
||||
//the tree to set the AABBs of each internal node; each internal node
|
||||
//checks its children and merges their AABBs
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
|
||||
int maxDistanceFromRoot = -1;
|
||||
{
|
||||
B3_PROFILE("copy maxDistanceFromRoot to CPU");
|
||||
m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot)
|
||||
{
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(maxDistanceFromRoot);
|
||||
launcher.setConst(distanceFromRoot);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
}
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,10 +22,11 @@ subject to the following restrictions:
|
||||
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
|
||||
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"
|
||||
|
||||
#define B3_PLBVH_ROOT_NODE_MARKER -1 //Syncronize with parallelLinearBvh.cl
|
||||
#define b3Int64 cl_long
|
||||
|
||||
///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
|
||||
///@remarks
|
||||
@@ -36,16 +37,13 @@ subject to the following restrictions:
|
||||
///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
|
||||
/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid)
|
||||
/// - [fully parallel] Sort morton codes
|
||||
/// - [somewhat parallel] Build radix binary tree (assign parent/child pointers for internal nodes of the BVH)
|
||||
/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH)
|
||||
/// - [somewhat parallel] Set internal node AABBs
|
||||
///@par
|
||||
///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
|
||||
///The BVH implementation here is almost the same as [Karras 2012], but a different method is used for constructing the tree.
|
||||
/// - Instead of building a binary radix tree, we simply pair each node with its nearest sibling.
|
||||
/// This has the effect of further worsening the quality of the BVH, but the main spatial partitioning is done by the
|
||||
/// Z-curve anyways, and this method should be simpler and faster during construction.
|
||||
/// - Rather than traveling upwards towards the root from the leaf nodes, as in the paper,
|
||||
/// each internal node checks its child nodes to get its AABB.
|
||||
///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree.
|
||||
///Instead of searching for the child nodes of each internal node, we search for the parent node of each node.
|
||||
///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs.
|
||||
class b3GpuParallelLinearBvh
|
||||
{
|
||||
cl_command_queue m_queue;
|
||||
@@ -56,58 +54,49 @@ class b3GpuParallelLinearBvh
|
||||
cl_kernel m_findAllNodesMergedAabbKernel;
|
||||
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
||||
|
||||
//Simple binary tree construction kernels
|
||||
cl_kernel m_constructBinaryTreeKernel;
|
||||
cl_kernel m_determineInternalNodeAabbsKernel;
|
||||
|
||||
//Radix binary tree construction kernels
|
||||
cl_kernel m_computePrefixAndInitPointersKernel;
|
||||
cl_kernel m_correctDuplicatePrefixesKernel;
|
||||
//Binary radix tree construction kernels
|
||||
cl_kernel m_computeAdjacentPairCommonPrefixKernel;
|
||||
cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
|
||||
cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
|
||||
cl_kernel m_convertChildNodeFormatKernel;
|
||||
cl_kernel m_findDistanceFromRootKernel;
|
||||
cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
|
||||
|
||||
//Traversal kernels
|
||||
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
||||
cl_kernel m_plbvhRayTraverseKernel;
|
||||
|
||||
cl_kernel m_plbvhLargeAabbAabbTestKernel;
|
||||
cl_kernel m_plbvhLargeAabbRayTestKernel;
|
||||
|
||||
b3FillCL m_fill;
|
||||
|
||||
b3RadixSort32CL m_radixSorter;
|
||||
|
||||
//
|
||||
//1 element
|
||||
b3OpenCLArray<int> m_rootNodeIndex;
|
||||
b3OpenCLArray<int> m_maxDistanceFromRoot;
|
||||
|
||||
//1 element per level in the tree
|
||||
b3AlignedObjectArray<int> m_numNodesPerLevelCpu; //Level 0(m_numNodesPerLevelCpu[0]) is the root, last level contains the leaf nodes
|
||||
b3AlignedObjectArray<int> m_firstIndexOffsetPerLevelCpu; //Contains the index/offset of the first node in that level
|
||||
b3OpenCLArray<int> m_numNodesPerLevelGpu;
|
||||
b3OpenCLArray<int> m_firstIndexOffsetPerLevelGpu;
|
||||
|
||||
//1 element per internal node (number_of_internal_nodes = number_of_leaves - 1)
|
||||
//1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
|
||||
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
|
||||
b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
|
||||
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
|
||||
b3OpenCLArray<int> m_internalNodeParentNodes;
|
||||
|
||||
//1 element per internal node; for radix binary tree construction
|
||||
b3OpenCLArray<int> m_maxCommonPrefix;
|
||||
b3OpenCLArray<int> m_commonPrefixes;
|
||||
b3OpenCLArray<int> m_leftInternalNodePointers; //Linked list
|
||||
b3OpenCLArray<int> m_rightInternalNodePointers; //Linked list
|
||||
b3OpenCLArray<int> m_internalNodeLeftChildNodes;
|
||||
b3OpenCLArray<int> m_internalNodeRightChildNodes;
|
||||
//1 element per internal node; for binary radix tree construction
|
||||
b3OpenCLArray<b3Int64> m_commonPrefixes;
|
||||
b3OpenCLArray<int> m_commonPrefixLengths;
|
||||
b3OpenCLArray<int> m_childNodeCount;
|
||||
b3OpenCLArray<int> m_distanceFromRoot;
|
||||
b3OpenCLArray<int> m_TEMP_leftLowerPrefix;
|
||||
b3OpenCLArray<int> m_TEMP_rightLowerPrefix;
|
||||
b3OpenCLArray<int> m_TEMP_leftSharedPrefixLength;
|
||||
b3OpenCLArray<int> m_TEMP_rightSharedPrefixLength;
|
||||
|
||||
//1 element per leaf node (leaf nodes only include small AABBs)
|
||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
||||
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index
|
||||
b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes
|
||||
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs
|
||||
|
||||
//1 element per large AABB
|
||||
b3OpenCLArray<b3SapAabb> m_largeAabbs; //Not stored in the BVH
|
||||
//1 element per large AABB, which is not stored in the BVH
|
||||
b3OpenCLArray<b3SapAabb> m_largeAabbs;
|
||||
|
||||
public:
|
||||
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
|
||||
@@ -131,8 +120,6 @@ public:
|
||||
b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
|
||||
|
||||
private:
|
||||
void constructSimpleBinaryTree();
|
||||
|
||||
void constructRadixBinaryTree();
|
||||
};
|
||||
|
||||
|
||||
@@ -45,34 +45,29 @@ unsigned int interleaveBits(unsigned int x)
|
||||
//........ ........ ......12 3456789A //x
|
||||
//....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits
|
||||
|
||||
//........ ....1234 56789A12 3456789A //x |= (x << 10)
|
||||
//........ ....1111 1....... ...11111 //0x 00 0F 80 1F
|
||||
//........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F;
|
||||
//......12 3456789A ......12 3456789A //x ^ (x << 16)
|
||||
//11111111 ........ ........ 11111111 //0x FF 00 00 FF
|
||||
//......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;
|
||||
|
||||
//.......1 23451234 5.....67 89A6789A //x |= (x << 5)
|
||||
//.......1 1.....11 1.....11 .....111 //0x 01 83 83 07
|
||||
//.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;
|
||||
//......12 ........ 3456789A 3456789A //x ^ (x << 8)
|
||||
//......11 ........ 1111.... ....1111 //0x 03 00 F0 0F
|
||||
//......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;
|
||||
|
||||
//....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)
|
||||
//....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23
|
||||
//....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;
|
||||
//..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)
|
||||
//......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3
|
||||
//......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;
|
||||
|
||||
//...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )
|
||||
//....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45
|
||||
//....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;
|
||||
|
||||
//...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)
|
||||
//....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29
|
||||
//....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;
|
||||
//....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)
|
||||
//....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49
|
||||
//....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;
|
||||
|
||||
//........ ........ ......11 11111111 //0x000003FF
|
||||
x &= 0x000003FF; //Clear all bits above bit 10
|
||||
|
||||
x = ( x | (x << 10) ) & 0x000F801F;
|
||||
x = ( x | (x << 5) ) & 0x01838307;
|
||||
x = ( x | (x << 3) ) & 0x08919123;
|
||||
x = ( x | (x << 1) ) & 0x09229245;
|
||||
x = ( x | (x << 1) ) & 0x09349229;
|
||||
x = (x ^ (x << 16)) & 0xFF0000FF;
|
||||
x = (x ^ (x << 8)) & 0x0300F00F;
|
||||
x = (x ^ (x << 4)) & 0x030C30C3;
|
||||
x = (x ^ (x << 2)) & 0x09249249;
|
||||
|
||||
return x;
|
||||
}
|
||||
@@ -160,147 +155,11 @@ __kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabb
|
||||
|
||||
//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.
|
||||
//If it is set, then the index is for an internal node; otherwise, it is a leaf node.
|
||||
//In both cases, the bit should be cleared to access the index.
|
||||
//In both cases, the bit should be cleared to access the actual node index.
|
||||
int isLeafNode(int index) { return (index >> 31 == 0); }
|
||||
int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }
|
||||
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }
|
||||
|
||||
__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,
|
||||
__global int* numNodesPerLevel,
|
||||
__global int2* out_internalNodeChildIndices,
|
||||
__global int* out_internalNodeParentNodes,
|
||||
__global int* out_leafNodeParentNodes,
|
||||
int numLevels, int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if(internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
//Find the level that this node is in, using linear search(could replace with binary search)
|
||||
int level = 0;
|
||||
int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes
|
||||
for(; level < numInternalLevels; ++level)
|
||||
{
|
||||
if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;
|
||||
}
|
||||
|
||||
//Check lower levels to find child nodes
|
||||
//Left child is always in the next level, but the same does not apply to the right child
|
||||
int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];
|
||||
int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)
|
||||
|
||||
int leftChildLevel = level + 1;
|
||||
int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;
|
||||
|
||||
int rightChildLevel = level + 1;
|
||||
int rightChildIndex = leftChildIndex + 1;
|
||||
|
||||
//Under certain conditions, the right child index as calculated above is invalid; need to find the correct index
|
||||
//
|
||||
//First condition: must be at least 2 levels apart from the leaf node level;
|
||||
//if the current level is right next to the leaf node level, then the right child
|
||||
//will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)
|
||||
//
|
||||
//Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid
|
||||
//
|
||||
//Third condition: must be the last node in its level
|
||||
if( level < numLevels - 2
|
||||
&& numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1]
|
||||
&& indexInLevel == numNodesPerLevel[level] - 1 )
|
||||
{
|
||||
//Check lower levels until we find a node without a parent
|
||||
for(; rightChildLevel < numLevels - 1; ++rightChildLevel)
|
||||
{
|
||||
int rightChildNextLevel = rightChildLevel + 1;
|
||||
|
||||
//If this branch is taken, it means that the last node in rightChildNextLevel has no parent
|
||||
if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )
|
||||
{
|
||||
//Set the node to the last node in rightChildNextLevel
|
||||
rightChildLevel = rightChildNextLevel;
|
||||
rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);
|
||||
int isRightChildLeaf = (rightChildLevel >= numLevels - 1);
|
||||
|
||||
//If left/right child is a leaf node, the index needs to be corrected
|
||||
//the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,
|
||||
//with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays
|
||||
{
|
||||
int leafNodeLevel = numLevels - 1;
|
||||
leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;
|
||||
rightChildIndex = (isRightChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;
|
||||
}
|
||||
|
||||
//Set the negative sign bit if the node is internal
|
||||
int2 childIndices;
|
||||
childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);
|
||||
childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);
|
||||
out_internalNodeChildIndices[internalNodeIndex] = childIndices;
|
||||
|
||||
//Assign parent node index to children
|
||||
__global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
|
||||
out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;
|
||||
|
||||
__global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
|
||||
out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;
|
||||
}
|
||||
|
||||
__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,
|
||||
__global int* numNodesPerLevel,
|
||||
__global int2* internalNodeChildIndices,
|
||||
__global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global b3AabbCL* leafNodeAabbs,
|
||||
__global int2* out_internalNodeLeafIndexRanges,
|
||||
__global b3AabbCL* out_internalNodeAabbs,
|
||||
int numLevels, int numInternalNodes, int level)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if(i >= numInternalNodes) return;
|
||||
|
||||
//For each node in a level, check its child nodes to determine its AABB
|
||||
{
|
||||
int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]
|
||||
|
||||
int numNodesInLevel = numNodesPerLevel[level];
|
||||
if(indexInLevel < numNodesInLevel)
|
||||
{
|
||||
int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];
|
||||
int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];
|
||||
|
||||
int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);
|
||||
int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);
|
||||
|
||||
int isLeftChildLeaf = isLeafNode(childIndicies.x);
|
||||
int isRightChildLeaf = isLeafNode(childIndicies.y);
|
||||
|
||||
//left/RightChildLeafIndex == Rigid body indicies
|
||||
int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
|
||||
int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
|
||||
|
||||
b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];
|
||||
b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];
|
||||
|
||||
//
|
||||
b3AabbCL internalNodeAabb;
|
||||
internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
|
||||
internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
|
||||
out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;
|
||||
|
||||
//For index range, x == min and y == max; left child always has lower index
|
||||
int2 leafIndexRange;
|
||||
leafIndexRange.x = (isLeftChildLeaf) ? leftChildIndex : out_internalNodeLeafIndexRanges[leftChildIndex].x;
|
||||
leafIndexRange.y = (isRightChildLeaf) ? rightChildIndex : out_internalNodeLeafIndexRanges[rightChildIndex].y;
|
||||
|
||||
out_internalNodeLeafIndexRanges[internalNodeIndexGlobal] = leafIndexRange;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//From sap.cl
|
||||
#define NEW_PAIR_MARKER -1
|
||||
|
||||
@@ -567,84 +426,65 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1
|
||||
|
||||
int longestCommonPrefix(int i, int j) { return clz(i ^ j); }
|
||||
|
||||
__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global int* out_commonPrefixes,
|
||||
__global int* out_leftInternalNodePointers,
|
||||
__global int* out_rightInternalNodePointers,
|
||||
int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if (internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
//Compute common prefix
|
||||
{
|
||||
//Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,
|
||||
//and the number of internal nodes is always numLeafNodes - 1
|
||||
int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key;
|
||||
int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key;
|
||||
|
||||
out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode);
|
||||
}
|
||||
|
||||
//Assign neighbor pointers of this node
|
||||
{
|
||||
int leftInternalIndex = internalNodeIndex - 1;
|
||||
int rightInternalIndex = internalNodeIndex + 1;
|
||||
|
||||
out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;
|
||||
out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if (internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
int commonPrefix = commonPrefixes[internalNodeIndex];
|
||||
|
||||
//Linear search to find the size of the subtree
|
||||
int firstSubTreeIndex = internalNodeIndex;
|
||||
int lastSubTreeIndex = internalNodeIndex;
|
||||
|
||||
while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex;
|
||||
while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex;
|
||||
|
||||
//Fix duplicate common prefixes by incrementing them so that a subtree is formed.
|
||||
//Recursively divide the tree until the position of the split is this node's index.
|
||||
//Every time this node is not the split node, increment the common prefix.
|
||||
int isCurrentSplitNode = false;
|
||||
int correctedCommonPrefix = commonPrefix;
|
||||
|
||||
while(!isCurrentSplitNode)
|
||||
{
|
||||
int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1;
|
||||
int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2;
|
||||
|
||||
if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1;
|
||||
else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1;
|
||||
//else if(internalNodeIndex == splitNodeIndex) break;
|
||||
|
||||
isCurrentSplitNode = (internalNodeIndex == splitNodeIndex);
|
||||
if(!isCurrentSplitNode) correctedCommonPrefix++;
|
||||
}
|
||||
|
||||
commonPrefixes[internalNodeIndex] = correctedCommonPrefix;
|
||||
atomic_max(out_maxCommonPrefix, correctedCommonPrefix);
|
||||
}
|
||||
|
||||
//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
|
||||
//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
|
||||
//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.
|
||||
#define B3_PLBVH_INVALID_COMMON_PREFIX 128
|
||||
|
||||
__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes,
|
||||
__global int* out_rightChildNodes, int numLeafNodes)
|
||||
#define B3_PLBVH_ROOT_NODE_MARKER -1
|
||||
|
||||
#define b3Int64 long
|
||||
|
||||
int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }
|
||||
b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j)
|
||||
{
|
||||
//This function only needs to return (i & j) in order for the algorithm to work,
|
||||
//but it may help with debugging to mask out the lower bits.
|
||||
|
||||
b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);
|
||||
|
||||
b3Int64 sharedBits = i & j;
|
||||
b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0
|
||||
|
||||
return sharedBits & bitmask;
|
||||
}
|
||||
int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)
|
||||
{
|
||||
return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );
|
||||
}
|
||||
|
||||
__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global b3Int64* out_commonPrefixes,
|
||||
__global int* out_commonPrefixLengths,
|
||||
int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if (internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
//Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,
|
||||
//and the number of internal nodes is always numLeafNodes - 1
|
||||
int leftLeafIndex = internalNodeIndex;
|
||||
int rightLeafIndex = internalNodeIndex + 1;
|
||||
|
||||
int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;
|
||||
int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;
|
||||
|
||||
//Binary radix tree construction algorithm does not work if there are duplicate morton codes.
|
||||
//Append the index of each leaf node to each morton code so that there are no duplicates.
|
||||
//The algorithm also requires that the morton codes are sorted in ascending order; this requirement
|
||||
//is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.
|
||||
//
|
||||
//upsample(a, b) == ( ((b3Int64)a) << 32) | b
|
||||
b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);
|
||||
b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);
|
||||
|
||||
out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
|
||||
out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
|
||||
}
|
||||
|
||||
|
||||
__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,
|
||||
__global int2* out_childNodes, int numLeafNodes)
|
||||
{
|
||||
int leafNodeIndex = get_global_id(0);
|
||||
if (leafNodeIndex >= numLeafNodes) return;
|
||||
@@ -654,8 +494,8 @@ __kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __glob
|
||||
int leftSplitIndex = leafNodeIndex - 1;
|
||||
int rightSplitIndex = leafNodeIndex;
|
||||
|
||||
int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
|
||||
//Parent node is the highest adjacent common prefix that is lower than the node's common prefix
|
||||
//Leaf nodes are considered as having the highest common prefix
|
||||
@@ -667,72 +507,223 @@ __kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __glob
|
||||
if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
|
||||
|
||||
int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;
|
||||
out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;
|
||||
|
||||
//If the left node is the parent, then this node is its right child and vice versa
|
||||
__global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes;
|
||||
int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa
|
||||
|
||||
//out_childNodesAsInt[0] == int2.x == left child
|
||||
//out_childNodesAsInt[1] == int2.y == right child
|
||||
int isLeaf = 1;
|
||||
out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);
|
||||
__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
|
||||
out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);
|
||||
}
|
||||
|
||||
__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global int* leftChildNodes, __global int* rightChildNodes,
|
||||
__global int* leftNeighborPointers, __global int* rightNeighborPointers,
|
||||
__global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,
|
||||
__global int* out_rootNodeIndex,
|
||||
int processedCommonPrefix, int numInternalNodes)
|
||||
__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,
|
||||
__global int2* out_childNodes,
|
||||
__global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,
|
||||
__global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix,
|
||||
__global int* TEMP_spl_left, __global int* TEMP_spl_right,
|
||||
int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if (internalNodeIndex >= numInternalNodes) return;
|
||||
int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
if(internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
int commonPrefix = commonPrefixes[internalNodeIndex];
|
||||
if (commonPrefix == processedCommonPrefix)
|
||||
b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];
|
||||
int nodePrefixLength = commonPrefixLengths[internalNodeIndex];
|
||||
|
||||
//#define USE_LINEAR_SEARCH
|
||||
#ifdef USE_LINEAR_SEARCH
|
||||
int leftIndex = -1;
|
||||
int rightIndex = -1;
|
||||
|
||||
for(int i = internalNodeIndex - 1; i >= 0; --i)
|
||||
{
|
||||
//Check neighbors and compare the common prefix to select the parent node
|
||||
int leftNodeIndex = leftNeighborPointers[internalNodeIndex];
|
||||
int rightNodeIndex = rightNeighborPointers[internalNodeIndex];
|
||||
|
||||
int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
|
||||
//Parent node is the highest common prefix that is lower than the node's common prefix
|
||||
//Since the nodes with lower common prefixes are removed, that condition does not have to be tested for,
|
||||
//and we only need to pick the node with the higher prefix.
|
||||
int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);
|
||||
|
||||
//
|
||||
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;
|
||||
else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
|
||||
|
||||
int isRootNode = false;
|
||||
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true;
|
||||
|
||||
int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex;
|
||||
|
||||
//If the left node is the parent, then this node is its right child and vice versa
|
||||
__global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes;
|
||||
|
||||
int isLeaf = 0;
|
||||
if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
|
||||
|
||||
if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
|
||||
|
||||
//Remove this node from the linked list,
|
||||
//so that the left and right nodes point at each other instead of this node
|
||||
if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex;
|
||||
if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex;
|
||||
|
||||
//For debug
|
||||
leftNeighborPointers[internalNodeIndex] = -2;
|
||||
rightNeighborPointers[internalNodeIndex] = -2;
|
||||
int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
|
||||
if(nodeLeftSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
leftIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//Processing occurs from highest common prefix to lowest common prefix
|
||||
//Nodes in the previously processed level have their children set, so we merge their child AABBs here
|
||||
if (commonPrefix == processedCommonPrefix + 1)
|
||||
for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)
|
||||
{
|
||||
int leftChildIndex = leftChildNodes[internalNodeIndex];
|
||||
int rightChildIndex = rightChildNodes[internalNodeIndex];
|
||||
int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
|
||||
if(nodeRightSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
rightIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#else //Use binary search
|
||||
|
||||
//Find nearest element to left with a lower common prefix
|
||||
int leftIndex = -1;
|
||||
{
|
||||
int lower = 0;
|
||||
int upper = internalNodeIndex - 1;
|
||||
|
||||
while(lower <= upper)
|
||||
{
|
||||
int mid = (lower + upper) / 2;
|
||||
b3Int64 midPrefix = commonPrefixes[mid];
|
||||
int midPrefixLength = commonPrefixLengths[mid];
|
||||
|
||||
int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
|
||||
if(nodeMidSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
int right = mid + 1;
|
||||
if(right < internalNodeIndex)
|
||||
{
|
||||
b3Int64 rightPrefix = commonPrefixes[right];
|
||||
int rightPrefixLength = commonPrefixLengths[right];
|
||||
|
||||
int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);
|
||||
if(nodeRightSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
lower = right;
|
||||
leftIndex = right;
|
||||
}
|
||||
else
|
||||
{
|
||||
leftIndex = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
leftIndex = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else upper = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
//Find nearest element to right with a lower common prefix
|
||||
int rightIndex = -1;
|
||||
{
|
||||
int lower = internalNodeIndex + 1;
|
||||
int upper = numInternalNodes - 1;
|
||||
|
||||
while(lower <= upper)
|
||||
{
|
||||
int mid = (lower + upper) / 2;
|
||||
b3Int64 midPrefix = commonPrefixes[mid];
|
||||
int midPrefixLength = commonPrefixLengths[mid];
|
||||
|
||||
int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
|
||||
if(nodeMidSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
int left = mid - 1;
|
||||
if(left > internalNodeIndex)
|
||||
{
|
||||
b3Int64 leftPrefix = commonPrefixes[left];
|
||||
int leftPrefixLength = commonPrefixLengths[left];
|
||||
|
||||
int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);
|
||||
if(nodeLeftSharedPrefixLength < nodePrefixLength)
|
||||
{
|
||||
upper = left;
|
||||
rightIndex = left;
|
||||
}
|
||||
else
|
||||
{
|
||||
rightIndex = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rightIndex = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else lower = mid + 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex;
|
||||
TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex;
|
||||
TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1;
|
||||
TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1;
|
||||
|
||||
//Select parent
|
||||
{
|
||||
int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
|
||||
int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);
|
||||
|
||||
if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;
|
||||
else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;
|
||||
|
||||
int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;
|
||||
|
||||
int isRootNode = (leftIndex == -1 && rightIndex == -1);
|
||||
out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;
|
||||
|
||||
int isLeaf = 0;
|
||||
if(!isRootNode)
|
||||
{
|
||||
int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa
|
||||
|
||||
//out_childNodesAsInt[0] == int2.x == left child
|
||||
//out_childNodesAsInt[1] == int2.y == right child
|
||||
__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
|
||||
out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
|
||||
}
|
||||
else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,
|
||||
__global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)
|
||||
{
|
||||
if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);
|
||||
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if(internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
//
|
||||
int distanceFromRoot = 0;
|
||||
{
|
||||
int parentIndex = internalNodeParentNodes[internalNodeIndex];
|
||||
while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)
|
||||
{
|
||||
parentIndex = internalNodeParentNodes[parentIndex];
|
||||
++distanceFromRoot;
|
||||
}
|
||||
}
|
||||
out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;
|
||||
|
||||
//
|
||||
__local int localMaxDistanceFromRoot;
|
||||
if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);
|
||||
}
|
||||
|
||||
__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global int2* childNodes,
|
||||
__global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,
|
||||
int maxDistanceFromRoot, int processedDistance, int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if(internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
int distance = distanceFromRoot[internalNodeIndex];
|
||||
|
||||
if(distance == processedDistance)
|
||||
{
|
||||
int leftChildIndex = childNodes[internalNodeIndex].x;
|
||||
int rightChildIndex = childNodes[internalNodeIndex].y;
|
||||
|
||||
int isLeftChildLeaf = isLeafNode(leftChildIndex);
|
||||
int isRightChildLeaf = isLeafNode(rightChildIndex);
|
||||
@@ -753,18 +744,3 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __
|
||||
internalNodeAabbs[internalNodeIndex] = mergedAabb;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes,
|
||||
__global int2* out_childNodes, int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if (internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
int2 childNodesIndices;
|
||||
childNodesIndices.x = leftChildNodes[internalNodeIndex];
|
||||
childNodesIndices.y = rightChildNodes[internalNodeIndex];
|
||||
|
||||
out_childNodes[internalNodeIndex] = childNodesIndices;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -41,34 +41,29 @@ static const char* parallelLinearBvhCL= \
|
||||
" //........ ........ ......12 3456789A //x\n"
|
||||
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n"
|
||||
" \n"
|
||||
" //........ ....1234 56789A12 3456789A //x |= (x << 10)\n"
|
||||
" //........ ....1111 1....... ...11111 //0x 00 0F 80 1F\n"
|
||||
" //........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F; \n"
|
||||
" //......12 3456789A ......12 3456789A //x ^ (x << 16)\n"
|
||||
" //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n"
|
||||
" //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n"
|
||||
" \n"
|
||||
" //.......1 23451234 5.....67 89A6789A //x |= (x << 5)\n"
|
||||
" //.......1 1.....11 1.....11 .....111 //0x 01 83 83 07\n"
|
||||
" //.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;\n"
|
||||
" //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n"
|
||||
" //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n"
|
||||
" //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n"
|
||||
" \n"
|
||||
" //....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)\n"
|
||||
" //....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23\n"
|
||||
" //....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;\n"
|
||||
" //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n"
|
||||
" //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n"
|
||||
" //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n"
|
||||
" \n"
|
||||
" //...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )\n"
|
||||
" //....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45\n"
|
||||
" //....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;\n"
|
||||
" \n"
|
||||
" //...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)\n"
|
||||
" //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29\n"
|
||||
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;\n"
|
||||
" //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n"
|
||||
" //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n"
|
||||
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n"
|
||||
" \n"
|
||||
" //........ ........ ......11 11111111 //0x000003FF\n"
|
||||
" x &= 0x000003FF; //Clear all bits above bit 10\n"
|
||||
" \n"
|
||||
" x = ( x | (x << 10) ) & 0x000F801F;\n"
|
||||
" x = ( x | (x << 5) ) & 0x01838307;\n"
|
||||
" x = ( x | (x << 3) ) & 0x08919123;\n"
|
||||
" x = ( x | (x << 1) ) & 0x09229245;\n"
|
||||
" x = ( x | (x << 1) ) & 0x09349229;\n"
|
||||
" x = (x ^ (x << 16)) & 0xFF0000FF;\n"
|
||||
" x = (x ^ (x << 8)) & 0x0300F00F;\n"
|
||||
" x = (x ^ (x << 4)) & 0x030C30C3;\n"
|
||||
" x = (x ^ (x << 2)) & 0x09249249;\n"
|
||||
" \n"
|
||||
" return x;\n"
|
||||
"}\n"
|
||||
@@ -150,143 +145,10 @@ static const char* parallelLinearBvhCL= \
|
||||
"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
|
||||
"//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n"
|
||||
"//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n"
|
||||
"//In both cases, the bit should be cleared to access the index.\n"
|
||||
"//In both cases, the bit should be cleared to access the actual node index.\n"
|
||||
"int isLeafNode(int index) { return (index >> 31 == 0); }\n"
|
||||
"int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n"
|
||||
"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n"
|
||||
"__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,\n"
|
||||
" __global int* numNodesPerLevel,\n"
|
||||
" __global int2* out_internalNodeChildIndices, \n"
|
||||
" __global int* out_internalNodeParentNodes, \n"
|
||||
" __global int* out_leafNodeParentNodes, \n"
|
||||
" int numLevels, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if(internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" //Find the level that this node is in, using linear search(could replace with binary search)\n"
|
||||
" int level = 0;\n"
|
||||
" int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes\n"
|
||||
" for(; level < numInternalLevels; ++level)\n"
|
||||
" {\n"
|
||||
" if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Check lower levels to find child nodes\n"
|
||||
" //Left child is always in the next level, but the same does not apply to the right child\n"
|
||||
" int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];\n"
|
||||
" int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)\n"
|
||||
" \n"
|
||||
" int leftChildLevel = level + 1;\n"
|
||||
" int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;\n"
|
||||
" \n"
|
||||
" int rightChildLevel = level + 1;\n"
|
||||
" int rightChildIndex = leftChildIndex + 1;\n"
|
||||
" \n"
|
||||
" //Under certain conditions, the right child index as calculated above is invalid; need to find the correct index\n"
|
||||
" //\n"
|
||||
" //First condition: must be at least 2 levels apart from the leaf node level;\n"
|
||||
" //if the current level is right next to the leaf node level, then the right child\n"
|
||||
" //will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)\n"
|
||||
" //\n"
|
||||
" //Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid\n"
|
||||
" //\n"
|
||||
" //Third condition: must be the last node in its level\n"
|
||||
" if( level < numLevels - 2 \n"
|
||||
" && numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1] \n"
|
||||
" && indexInLevel == numNodesPerLevel[level] - 1 )\n"
|
||||
" {\n"
|
||||
" //Check lower levels until we find a node without a parent\n"
|
||||
" for(; rightChildLevel < numLevels - 1; ++rightChildLevel)\n"
|
||||
" {\n"
|
||||
" int rightChildNextLevel = rightChildLevel + 1;\n"
|
||||
" \n"
|
||||
" //If this branch is taken, it means that the last node in rightChildNextLevel has no parent\n"
|
||||
" if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )\n"
|
||||
" {\n"
|
||||
" //Set the node to the last node in rightChildNextLevel\n"
|
||||
" rightChildLevel = rightChildNextLevel;\n"
|
||||
" rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);\n"
|
||||
" int isRightChildLeaf = (rightChildLevel >= numLevels - 1);\n"
|
||||
" \n"
|
||||
" //If left/right child is a leaf node, the index needs to be corrected\n"
|
||||
" //the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,\n"
|
||||
" //with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays\n"
|
||||
" {\n"
|
||||
" int leafNodeLevel = numLevels - 1;\n"
|
||||
" leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;\n"
|
||||
" rightChildIndex = (isRightChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Set the negative sign bit if the node is internal\n"
|
||||
" int2 childIndices;\n"
|
||||
" childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);\n"
|
||||
" childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);\n"
|
||||
" out_internalNodeChildIndices[internalNodeIndex] = childIndices;\n"
|
||||
" \n"
|
||||
" //Assign parent node index to children\n"
|
||||
" __global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
|
||||
" out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;\n"
|
||||
" \n"
|
||||
" __global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
|
||||
" out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;\n"
|
||||
"}\n"
|
||||
"__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,\n"
|
||||
" __global int* numNodesPerLevel, \n"
|
||||
" __global int2* internalNodeChildIndices,\n"
|
||||
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global b3AabbCL* leafNodeAabbs, \n"
|
||||
" __global int2* out_internalNodeLeafIndexRanges,\n"
|
||||
" __global b3AabbCL* out_internalNodeAabbs, \n"
|
||||
" int numLevels, int numInternalNodes, int level)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if(i >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" //For each node in a level, check its child nodes to determine its AABB\n"
|
||||
" {\n"
|
||||
" int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]\n"
|
||||
" \n"
|
||||
" int numNodesInLevel = numNodesPerLevel[level];\n"
|
||||
" if(indexInLevel < numNodesInLevel)\n"
|
||||
" {\n"
|
||||
" int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];\n"
|
||||
" int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];\n"
|
||||
" \n"
|
||||
" int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);\n"
|
||||
" int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);\n"
|
||||
" \n"
|
||||
" int isLeftChildLeaf = isLeafNode(childIndicies.x);\n"
|
||||
" int isRightChildLeaf = isLeafNode(childIndicies.y);\n"
|
||||
" \n"
|
||||
" //left/RightChildLeafIndex == Rigid body indicies\n"
|
||||
" int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
|
||||
" int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
|
||||
" \n"
|
||||
" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];\n"
|
||||
" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" b3AabbCL internalNodeAabb;\n"
|
||||
" internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
|
||||
" internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
|
||||
" out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;\n"
|
||||
" \n"
|
||||
" //For index range, x == min and y == max; left child always has lower index\n"
|
||||
" int2 leafIndexRange;\n"
|
||||
" leafIndexRange.x = (isLeftChildLeaf) ? leftChildIndex : out_internalNodeLeafIndexRanges[leftChildIndex].x;\n"
|
||||
" leafIndexRange.y = (isRightChildLeaf) ? rightChildIndex : out_internalNodeLeafIndexRanges[rightChildIndex].y;\n"
|
||||
" \n"
|
||||
" out_internalNodeLeafIndexRanges[internalNodeIndexGlobal] = leafIndexRange;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"//From sap.cl\n"
|
||||
"#define NEW_PAIR_MARKER -1\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n"
|
||||
@@ -539,78 +401,57 @@ static const char* parallelLinearBvhCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1\n"
|
||||
"int longestCommonPrefix(int i, int j) { return clz(i ^ j); }\n"
|
||||
"__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global int* out_commonPrefixes,\n"
|
||||
" __global int* out_leftInternalNodePointers, \n"
|
||||
" __global int* out_rightInternalNodePointers,\n"
|
||||
"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
|
||||
"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
|
||||
"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n"
|
||||
"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
|
||||
"#define B3_PLBVH_ROOT_NODE_MARKER -1\n"
|
||||
"#define b3Int64 long\n"
|
||||
"int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n"
|
||||
"b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n"
|
||||
"{\n"
|
||||
" //This function only needs to return (i & j) in order for the algorithm to work,\n"
|
||||
" //but it may help with debugging to mask out the lower bits.\n"
|
||||
" b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n"
|
||||
" b3Int64 sharedBits = i & j;\n"
|
||||
" b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n"
|
||||
" \n"
|
||||
" return sharedBits & bitmask;\n"
|
||||
"}\n"
|
||||
"int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n"
|
||||
"{\n"
|
||||
" return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n"
|
||||
"}\n"
|
||||
"__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global b3Int64* out_commonPrefixes,\n"
|
||||
" __global int* out_commonPrefixLengths,\n"
|
||||
" int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if (internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" //Compute common prefix\n"
|
||||
" {\n"
|
||||
" //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n"
|
||||
" //and the number of internal nodes is always numLeafNodes - 1\n"
|
||||
" int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key;\n"
|
||||
" int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key;\n"
|
||||
" //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n"
|
||||
" //and the number of internal nodes is always numLeafNodes - 1\n"
|
||||
" int leftLeafIndex = internalNodeIndex;\n"
|
||||
" int rightLeafIndex = internalNodeIndex + 1;\n"
|
||||
" \n"
|
||||
" out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode);\n"
|
||||
" }\n"
|
||||
" int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n"
|
||||
" int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n"
|
||||
" \n"
|
||||
" //Assign neighbor pointers of this node\n"
|
||||
" {\n"
|
||||
" int leftInternalIndex = internalNodeIndex - 1;\n"
|
||||
" int rightInternalIndex = internalNodeIndex + 1;\n"
|
||||
" \n"
|
||||
" out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n"
|
||||
" out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n"
|
||||
" }\n"
|
||||
" //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n"
|
||||
" //Append the index of each leaf node to each morton code so that there are no duplicates.\n"
|
||||
" //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n"
|
||||
" //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n"
|
||||
" //\n"
|
||||
" //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n"
|
||||
" b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n"
|
||||
" b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n"
|
||||
" \n"
|
||||
" out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
|
||||
" out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
|
||||
"}\n"
|
||||
"__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if (internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int commonPrefix = commonPrefixes[internalNodeIndex];\n"
|
||||
" \n"
|
||||
" //Linear search to find the size of the subtree\n"
|
||||
" int firstSubTreeIndex = internalNodeIndex;\n"
|
||||
" int lastSubTreeIndex = internalNodeIndex;\n"
|
||||
" \n"
|
||||
" while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex;\n"
|
||||
" while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex;\n"
|
||||
" \n"
|
||||
" //Fix duplicate common prefixes by incrementing them so that a subtree is formed.\n"
|
||||
" //Recursively divide the tree until the position of the split is this node's index.\n"
|
||||
" //Every time this node is not the split node, increment the common prefix.\n"
|
||||
" int isCurrentSplitNode = false;\n"
|
||||
" int correctedCommonPrefix = commonPrefix;\n"
|
||||
" \n"
|
||||
" while(!isCurrentSplitNode)\n"
|
||||
" {\n"
|
||||
" int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1;\n"
|
||||
" int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2;\n"
|
||||
" \n"
|
||||
" if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1;\n"
|
||||
" else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1;\n"
|
||||
" //else if(internalNodeIndex == splitNodeIndex) break;\n"
|
||||
" \n"
|
||||
" isCurrentSplitNode = (internalNodeIndex == splitNodeIndex);\n"
|
||||
" if(!isCurrentSplitNode) correctedCommonPrefix++;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" commonPrefixes[internalNodeIndex] = correctedCommonPrefix;\n"
|
||||
" atomic_max(out_maxCommonPrefix, correctedCommonPrefix);\n"
|
||||
"}\n"
|
||||
"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
|
||||
"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
|
||||
"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n"
|
||||
"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
|
||||
"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes, \n"
|
||||
" __global int* out_rightChildNodes, int numLeafNodes)\n"
|
||||
"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n"
|
||||
" __global int2* out_childNodes, int numLeafNodes)\n"
|
||||
"{\n"
|
||||
" int leafNodeIndex = get_global_id(0);\n"
|
||||
" if (leafNodeIndex >= numLeafNodes) return;\n"
|
||||
@@ -620,8 +461,8 @@ static const char* parallelLinearBvhCL= \
|
||||
" int leftSplitIndex = leafNodeIndex - 1;\n"
|
||||
" int rightSplitIndex = leafNodeIndex;\n"
|
||||
" \n"
|
||||
" int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" \n"
|
||||
" //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n"
|
||||
" //Leaf nodes are considered as having the highest common prefix\n"
|
||||
@@ -633,71 +474,218 @@ static const char* parallelLinearBvhCL= \
|
||||
" if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
|
||||
" \n"
|
||||
" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n"
|
||||
" out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n"
|
||||
" \n"
|
||||
" //If the left node is the parent, then this node is its right child and vice versa\n"
|
||||
" __global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes;\n"
|
||||
" int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n"
|
||||
" \n"
|
||||
" //out_childNodesAsInt[0] == int2.x == left child\n"
|
||||
" //out_childNodesAsInt[1] == int2.y == right child\n"
|
||||
" int isLeaf = 1;\n"
|
||||
" out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n"
|
||||
" __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
|
||||
" out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n"
|
||||
"}\n"
|
||||
"__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global int* leftChildNodes, __global int* rightChildNodes,\n"
|
||||
" __global int* leftNeighborPointers, __global int* rightNeighborPointers,\n"
|
||||
" __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n"
|
||||
" __global int* out_rootNodeIndex,\n"
|
||||
" int processedCommonPrefix, int numInternalNodes)\n"
|
||||
"__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n"
|
||||
" __global int2* out_childNodes,\n"
|
||||
" __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n"
|
||||
" __global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix,\n"
|
||||
" __global int* TEMP_spl_left, __global int* TEMP_spl_right,\n"
|
||||
" int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if (internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
||||
" if(internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int commonPrefix = commonPrefixes[internalNodeIndex];\n"
|
||||
" if (commonPrefix == processedCommonPrefix)\n"
|
||||
" b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n"
|
||||
" int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n"
|
||||
" \n"
|
||||
"//#define USE_LINEAR_SEARCH\n"
|
||||
"#ifdef USE_LINEAR_SEARCH\n"
|
||||
" int leftIndex = -1;\n"
|
||||
" int rightIndex = -1;\n"
|
||||
" \n"
|
||||
" for(int i = internalNodeIndex - 1; i >= 0; --i)\n"
|
||||
" {\n"
|
||||
" //Check neighbors and compare the common prefix to select the parent node\n"
|
||||
" int leftNodeIndex = leftNeighborPointers[internalNodeIndex];\n"
|
||||
" int rightNodeIndex = rightNeighborPointers[internalNodeIndex];\n"
|
||||
" \n"
|
||||
" int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" \n"
|
||||
" //Parent node is the highest common prefix that is lower than the node's common prefix\n"
|
||||
" //Since the nodes with lower common prefixes are removed, that condition does not have to be tested for,\n"
|
||||
" //and we only need to pick the node with the higher prefix.\n"
|
||||
" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n"
|
||||
" else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
|
||||
" \n"
|
||||
" int isRootNode = false;\n"
|
||||
" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true;\n"
|
||||
" \n"
|
||||
" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex;\n"
|
||||
" \n"
|
||||
" //If the left node is the parent, then this node is its right child and vice versa\n"
|
||||
" __global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes;\n"
|
||||
" \n"
|
||||
" int isLeaf = 0;\n"
|
||||
" if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
|
||||
" \n"
|
||||
" if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
|
||||
" \n"
|
||||
" //Remove this node from the linked list, \n"
|
||||
" //so that the left and right nodes point at each other instead of this node\n"
|
||||
" if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex;\n"
|
||||
" if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex;\n"
|
||||
" \n"
|
||||
" //For debug\n"
|
||||
" leftNeighborPointers[internalNodeIndex] = -2;\n"
|
||||
" rightNeighborPointers[internalNodeIndex] = -2;\n"
|
||||
" int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
|
||||
" if(nodeLeftSharedPrefixLength < nodePrefixLength)\n"
|
||||
" {\n"
|
||||
" leftIndex = i;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Processing occurs from highest common prefix to lowest common prefix\n"
|
||||
" //Nodes in the previously processed level have their children set, so we merge their child AABBs here\n"
|
||||
" if (commonPrefix == processedCommonPrefix + 1)\n"
|
||||
" for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n"
|
||||
" {\n"
|
||||
" int leftChildIndex = leftChildNodes[internalNodeIndex];\n"
|
||||
" int rightChildIndex = rightChildNodes[internalNodeIndex];\n"
|
||||
" int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
|
||||
" if(nodeRightSharedPrefixLength < nodePrefixLength)\n"
|
||||
" {\n"
|
||||
" rightIndex = i;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"#else //Use binary search\n"
|
||||
" //Find nearest element to left with a lower common prefix\n"
|
||||
" int leftIndex = -1;\n"
|
||||
" {\n"
|
||||
" int lower = 0;\n"
|
||||
" int upper = internalNodeIndex - 1;\n"
|
||||
" \n"
|
||||
" while(lower <= upper)\n"
|
||||
" {\n"
|
||||
" int mid = (lower + upper) / 2;\n"
|
||||
" b3Int64 midPrefix = commonPrefixes[mid];\n"
|
||||
" int midPrefixLength = commonPrefixLengths[mid];\n"
|
||||
" \n"
|
||||
" int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
|
||||
" if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
|
||||
" {\n"
|
||||
" int right = mid + 1;\n"
|
||||
" if(right < internalNodeIndex)\n"
|
||||
" {\n"
|
||||
" b3Int64 rightPrefix = commonPrefixes[right];\n"
|
||||
" int rightPrefixLength = commonPrefixLengths[right];\n"
|
||||
" \n"
|
||||
" int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n"
|
||||
" if(nodeRightSharedPrefixLength < nodePrefixLength) \n"
|
||||
" {\n"
|
||||
" lower = right;\n"
|
||||
" leftIndex = right;\n"
|
||||
" }\n"
|
||||
" else \n"
|
||||
" {\n"
|
||||
" leftIndex = mid;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" else \n"
|
||||
" {\n"
|
||||
" leftIndex = mid;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" else upper = mid - 1;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Find nearest element to right with a lower common prefix\n"
|
||||
" int rightIndex = -1;\n"
|
||||
" {\n"
|
||||
" int lower = internalNodeIndex + 1;\n"
|
||||
" int upper = numInternalNodes - 1;\n"
|
||||
" \n"
|
||||
" while(lower <= upper)\n"
|
||||
" {\n"
|
||||
" int mid = (lower + upper) / 2;\n"
|
||||
" b3Int64 midPrefix = commonPrefixes[mid];\n"
|
||||
" int midPrefixLength = commonPrefixLengths[mid];\n"
|
||||
" \n"
|
||||
" int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
|
||||
" if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
|
||||
" {\n"
|
||||
" int left = mid - 1;\n"
|
||||
" if(left > internalNodeIndex)\n"
|
||||
" {\n"
|
||||
" b3Int64 leftPrefix = commonPrefixes[left];\n"
|
||||
" int leftPrefixLength = commonPrefixLengths[left];\n"
|
||||
" \n"
|
||||
" int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n"
|
||||
" if(nodeLeftSharedPrefixLength < nodePrefixLength) \n"
|
||||
" {\n"
|
||||
" upper = left;\n"
|
||||
" rightIndex = left;\n"
|
||||
" }\n"
|
||||
" else \n"
|
||||
" {\n"
|
||||
" rightIndex = mid;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" else \n"
|
||||
" {\n"
|
||||
" rightIndex = mid;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" else lower = mid + 1;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
" \n"
|
||||
" TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex;\n"
|
||||
" TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex;\n"
|
||||
" TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1;\n"
|
||||
" TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1;\n"
|
||||
" \n"
|
||||
" //Select parent\n"
|
||||
" {\n"
|
||||
" int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
" \n"
|
||||
" int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n"
|
||||
" \n"
|
||||
" if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n"
|
||||
" else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n"
|
||||
" \n"
|
||||
" int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n"
|
||||
" \n"
|
||||
" int isRootNode = (leftIndex == -1 && rightIndex == -1);\n"
|
||||
" out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n"
|
||||
" \n"
|
||||
" int isLeaf = 0;\n"
|
||||
" if(!isRootNode)\n"
|
||||
" {\n"
|
||||
" int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n"
|
||||
" \n"
|
||||
" //out_childNodesAsInt[0] == int2.x == left child\n"
|
||||
" //out_childNodesAsInt[1] == int2.y == right child\n"
|
||||
" __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
|
||||
" out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
|
||||
" }\n"
|
||||
" else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n"
|
||||
" __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if(internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" int distanceFromRoot = 0;\n"
|
||||
" {\n"
|
||||
" int parentIndex = internalNodeParentNodes[internalNodeIndex];\n"
|
||||
" while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n"
|
||||
" {\n"
|
||||
" parentIndex = internalNodeParentNodes[parentIndex];\n"
|
||||
" ++distanceFromRoot;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" __local int localMaxDistanceFromRoot;\n"
|
||||
" if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" \n"
|
||||
" atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" \n"
|
||||
" if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n"
|
||||
"}\n"
|
||||
"__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global int2* childNodes,\n"
|
||||
" __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n"
|
||||
" int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if(internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int distance = distanceFromRoot[internalNodeIndex];\n"
|
||||
" \n"
|
||||
" if(distance == processedDistance)\n"
|
||||
" {\n"
|
||||
" int leftChildIndex = childNodes[internalNodeIndex].x;\n"
|
||||
" int rightChildIndex = childNodes[internalNodeIndex].y;\n"
|
||||
" \n"
|
||||
" int isLeftChildLeaf = isLeafNode(leftChildIndex);\n"
|
||||
" int isRightChildLeaf = isLeafNode(rightChildIndex);\n"
|
||||
@@ -718,16 +706,4 @@ static const char* parallelLinearBvhCL= \
|
||||
" internalNodeAabbs[internalNodeIndex] = mergedAabb;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes, \n"
|
||||
" __global int2* out_childNodes, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if (internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int2 childNodesIndices;\n"
|
||||
" childNodesIndices.x = leftChildNodes[internalNodeIndex];\n"
|
||||
" childNodesIndices.y = rightChildNodes[internalNodeIndex];\n"
|
||||
" \n"
|
||||
" out_childNodes[internalNodeIndex] = childNodesIndices;\n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user