From f19f85368517ff08a4f329710a1710f547c545d2 Mon Sep 17 00:00:00 2001 From: Jackson Lee Date: Mon, 3 Mar 2014 14:33:53 -0800 Subject: [PATCH] Draft PLBVH construction using binary radix tree. --- .../b3GpuParallelLinearBvh.cpp | 158 +++++++++++++- .../b3GpuParallelLinearBvh.h | 17 +- .../kernels/parallelLinearBvh.cl | 203 +++++++++++++++++- .../kernels/parallelLinearBvhKernels.h | 192 +++++++++++++++++ 4 files changed, 564 insertions(+), 6 deletions(-) diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp index dd5557360..c5e8a89e1 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp @@ -31,6 +31,13 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_internalNodeChildNodes(context, queue), m_internalNodeParentNodes(context, queue), + m_maxCommonPrefix(context, queue), + m_commonPrefixes(context, queue), + m_leftInternalNodePointers(context, queue), + m_rightInternalNodePointers(context, queue), + m_internalNodeLeftChildNodes(context, queue), + m_internalNodeRightChildNodes(context, queue), + m_leafNodeParentNodes(context, queue), m_mortonCodesAndAabbIndicies(context, queue), m_mergedAabb(context, queue), @@ -39,6 +46,7 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_largeAabbs(context, queue) { m_rootNodeIndex.resize(1); + m_maxCommonPrefix.resize(1); // const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; @@ -61,6 +69,17 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_determineInternalNodeAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "determineInternalNodeAabbs", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_determineInternalNodeAabbsKernel); + m_computePrefixAndInitPointersKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computePrefixAndInitPointers", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_computePrefixAndInitPointersKernel); + m_correctDuplicatePrefixesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "correctDuplicatePrefixes", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_correctDuplicatePrefixesKernel); + m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_buildBinaryRadixTreeLeafNodesKernel); + m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_buildBinaryRadixTreeInternalNodesKernel); + m_convertChildNodeFormatKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "convertChildNodeFormat", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_convertChildNodeFormatKernel); + m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_plbvhCalculateOverlappingPairsKernel); m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros ); @@ -76,9 +95,16 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() clReleaseKernel(m_separateAabbsKernel); clReleaseKernel(m_findAllNodesMergedAabbKernel); clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel); + clReleaseKernel(m_constructBinaryTreeKernel); clReleaseKernel(m_determineInternalNodeAabbsKernel); + clReleaseKernel(m_computePrefixAndInitPointersKernel); + clReleaseKernel(m_correctDuplicatePrefixesKernel); + clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel); + clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel); + clReleaseKernel(m_convertChildNodeFormatKernel); + clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); clReleaseKernel(m_plbvhRayTraverseKernel); clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); @@ -159,6 +185,12 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab m_internalNodeChildNodes.resize(numInternalNodes); m_internalNodeParentNodes.resize(numInternalNodes); + m_commonPrefixes.resize(numInternalNodes); + m_leftInternalNodePointers.resize(numInternalNodes); + m_rightInternalNodePointers.resize(numInternalNodes); + m_internalNodeLeftChildNodes.resize(numInternalNodes); + m_internalNodeRightChildNodes.resize(numInternalNodes); + m_leafNodeParentNodes.resize(numLeaves); m_mortonCodesAndAabbIndicies.resize(numLeaves); m_mergedAabb.resize(numLeaves); @@ -166,7 +198,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab - //Find the AABB of all input AABBs; this is used to define the size of + //Find the merged AABB of all small AABBs; this is used to define the size of //each cell in the virtual grid(2^10 cells in each dimension). { B3_PROFILE("Find AABB of merged nodes"); @@ -196,7 +228,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab //then convert the discrete grid coordinates into a morton code //For each element in m_mortonCodesAndAabbIndicies, set // m_key == morton code (value to sort by) - // m_value = AABB index + // m_value == small AABB index { B3_PROFILE("Assign morton codes"); @@ -234,7 +266,8 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab } // - constructSimpleBinaryTree(); + //constructSimpleBinaryTree(); + constructRadixBinaryTree(); } void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray& out_numPairs, b3OpenCLArray& out_overlappingPairs) @@ -393,6 +426,8 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray= -1; --processedCommonPrefix) + { + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_commonPrefixes.getBufferCL() ), + b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), + b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ), + b3BufferInfoCL( m_leftInternalNodePointers.getBufferCL() ), + b3BufferInfoCL( m_rightInternalNodePointers.getBufferCL() ), + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(processedCommonPrefix); + launcher.setConst(numInternalNodes); + + launcher.launch1D(numInternalNodes); + } + + clFinish(m_queue); + } + + { + B3_PROFILE("m_convertChildNodeFormatKernel"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_convertChildNodeFormatKernel, "m_convertChildNodeFormatKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numInternalNodes); + + launcher.launch1D(numInternalNodes); + clFinish(m_queue); + } } \ No newline at end of file diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h index a8c7f111a..559786cbb 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h @@ -56,10 +56,17 @@ class b3GpuParallelLinearBvh cl_kernel m_findAllNodesMergedAabbKernel; cl_kernel m_assignMortonCodesAndAabbIndiciesKernel; - //Binary tree construction kernels + //Simple binary tree construction kernels cl_kernel m_constructBinaryTreeKernel; cl_kernel m_determineInternalNodeAabbsKernel; + //Radix binary tree construction kernels + cl_kernel m_computePrefixAndInitPointersKernel; + cl_kernel m_correctDuplicatePrefixesKernel; + cl_kernel m_buildBinaryRadixTreeLeafNodesKernel; + cl_kernel m_buildBinaryRadixTreeInternalNodesKernel; + cl_kernel m_convertChildNodeFormatKernel; + //Traversal kernels cl_kernel m_plbvhCalculateOverlappingPairsKernel; cl_kernel m_plbvhRayTraverseKernel; @@ -85,6 +92,14 @@ class b3GpuParallelLinearBvh b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child b3OpenCLArray m_internalNodeParentNodes; + //1 element per internal node; for radix binary tree construction + b3OpenCLArray m_maxCommonPrefix; + b3OpenCLArray m_commonPrefixes; + b3OpenCLArray m_leftInternalNodePointers; //Linked list + b3OpenCLArray m_rightInternalNodePointers; //Linked list + b3OpenCLArray m_internalNodeLeftChildNodes; + b3OpenCLArray m_internalNodeRightChildNodes; + //1 element per leaf node (leaf nodes only include small AABBs) b3OpenCLArray m_leafNodeParentNodes; b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index e2380145e..3bcd4f351 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -468,7 +468,7 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, b3Vector3 rayTo = rays[rayIndex].m_to; b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - + // int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; @@ -567,3 +567,204 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global } + +#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1 + +int longestCommonPrefix(int i, int j) { return clz(i ^ j); } + +__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices, + __global int* out_commonPrefixes, + __global int* out_leftInternalNodePointers, + __global int* out_rightInternalNodePointers, + int numInternalNodes) +{ + int internalNodeIndex = get_global_id(0); + if (internalNodeIndex >= numInternalNodes) return; + + //Compute common prefix + { + //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index, + //and the number of internal nodes is always numLeafNodes - 1 + int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key; + int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key; + + out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode); + } + + //Assign neighbor pointers of this node + { + int leftInternalIndex = internalNodeIndex - 1; + int rightInternalIndex = internalNodeIndex + 1; + + out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE; + out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE; + } +} + +__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes) +{ + int internalNodeIndex = get_global_id(0); + if (internalNodeIndex >= numInternalNodes) return; + + int commonPrefix = commonPrefixes[internalNodeIndex]; + + //Linear search to find the size of the subtree + int firstSubTreeIndex = internalNodeIndex; + int lastSubTreeIndex = internalNodeIndex; + + while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex; + while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex; + + //Fix duplicate common prefixes by incrementing them so that a subtree is formed. + //Recursively divide the tree until the position of the split is this node's index. + //Every time this node is not the split node, increment the common prefix. + int isCurrentSplitNode = false; + int correctedCommonPrefix = commonPrefix; + + while(!isCurrentSplitNode) + { + int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1; + int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2; + + if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1; + else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1; + //else if(internalNodeIndex == splitNodeIndex) break; + + isCurrentSplitNode = (internalNodeIndex == splitNodeIndex); + if(!isCurrentSplitNode) correctedCommonPrefix++; + } + + commonPrefixes[internalNodeIndex] = correctedCommonPrefix; + atomic_max(out_maxCommonPrefix, correctedCommonPrefix); +} + +//Set so that it is always greater than the actual common prefixes, and never selected as a parent node. +//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. +//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes. +#define B3_PLBVH_INVALID_COMMON_PREFIX 128 + +__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes, + __global int* out_rightChildNodes, int numLeafNodes) +{ + int leafNodeIndex = get_global_id(0); + if (leafNodeIndex >= numLeafNodes) return; + + int numInternalNodes = numLeafNodes - 1; + + int leftSplitIndex = leafNodeIndex - 1; + int rightSplitIndex = leafNodeIndex; + + int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; + int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; + + //Parent node is the highest adjacent common prefix that is lower than the node's common prefix + //Leaf nodes are considered as having the highest common prefix + int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); + + //Handle cases for the edge nodes; the first and last node + //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX + if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; + if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; + + int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex; + + //If the left node is the parent, then this node is its right child and vice versa + __global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes; + + int isLeaf = 1; + out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex); +} + +__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices, + __global int* leftChildNodes, __global int* rightChildNodes, + __global int* leftNeighborPointers, __global int* rightNeighborPointers, + __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs, + __global int* out_rootNodeIndex, + int processedCommonPrefix, int numInternalNodes) +{ + int internalNodeIndex = get_global_id(0); + if (internalNodeIndex >= numInternalNodes) return; + + int commonPrefix = commonPrefixes[internalNodeIndex]; + if (commonPrefix == processedCommonPrefix) + { + //Check neighbors and compare the common prefix to select the parent node + int leftNodeIndex = leftNeighborPointers[internalNodeIndex]; + int rightNodeIndex = rightNeighborPointers[internalNodeIndex]; + + int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; + int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; + + //Parent node is the highest common prefix that is lower than the node's common prefix + //Since the nodes with lower common prefixes are removed, that condition does not have to be tested for, + //and we only need to pick the node with the higher prefix. + int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); + + // + if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; + else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; + + int isRootNode = false; + if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true; + + int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex; + + //If the left node is the parent, then this node is its right child and vice versa + __global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes; + + int isLeaf = 0; + if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); + + if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); + + //Remove this node from the linked list, + //so that the left and right nodes point at each other instead of this node + if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex; + if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex; + + //For debug + leftNeighborPointers[internalNodeIndex] = -2; + rightNeighborPointers[internalNodeIndex] = -2; + } + + //Processing occurs from highest common prefix to lowest common prefix + //Nodes in the previously processed level have their children set, so we merge their child AABBs here + if (commonPrefix == processedCommonPrefix + 1) + { + int leftChildIndex = leftChildNodes[internalNodeIndex]; + int rightChildIndex = rightChildNodes[internalNodeIndex]; + + int isLeftChildLeaf = isLeafNode(leftChildIndex); + int isRightChildLeaf = isLeafNode(rightChildIndex); + + leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex); + rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex); + + //leftRigidIndex/rightRigidIndex is not used if internal node + int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1; + int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1; + + b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex]; + b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex]; + + b3AabbCL mergedAabb; + mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min); + mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max); + internalNodeAabbs[internalNodeIndex] = mergedAabb; + } +} + +__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes, + __global int2* out_childNodes, int numInternalNodes) +{ + int internalNodeIndex = get_global_id(0); + if (internalNodeIndex >= numInternalNodes) return; + + int2 childNodesIndices; + childNodesIndices.x = leftChildNodes[internalNodeIndex]; + childNodesIndices.y = rightChildNodes[internalNodeIndex]; + + out_childNodes[internalNodeIndex] = childNodesIndices; +} + + diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index cfb477d03..1a1437d10 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -443,6 +443,7 @@ static const char* parallelLinearBvhCL= \ " b3Vector3 rayTo = rays[rayIndex].m_to;\n" " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" \n" " //\n" " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" " \n" @@ -538,4 +539,195 @@ static const char* parallelLinearBvhCL= \ " }\n" " }\n" "}\n" +"#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1\n" +"int longestCommonPrefix(int i, int j) { return clz(i ^ j); }\n" +"__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices,\n" +" __global int* out_commonPrefixes,\n" +" __global int* out_leftInternalNodePointers, \n" +" __global int* out_rightInternalNodePointers,\n" +" int numInternalNodes)\n" +"{\n" +" int internalNodeIndex = get_global_id(0);\n" +" if (internalNodeIndex >= numInternalNodes) return;\n" +" \n" +" //Compute common prefix\n" +" {\n" +" //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n" +" //and the number of internal nodes is always numLeafNodes - 1\n" +" int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key;\n" +" int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key;\n" +" \n" +" out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode);\n" +" }\n" +" \n" +" //Assign neighbor pointers of this node\n" +" {\n" +" int leftInternalIndex = internalNodeIndex - 1;\n" +" int rightInternalIndex = internalNodeIndex + 1;\n" +" \n" +" out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n" +" out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n" +" }\n" +"}\n" +"__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes)\n" +"{\n" +" int internalNodeIndex = get_global_id(0);\n" +" if (internalNodeIndex >= numInternalNodes) return;\n" +" \n" +" int commonPrefix = commonPrefixes[internalNodeIndex];\n" +" \n" +" //Linear search to find the size of the subtree\n" +" int firstSubTreeIndex = internalNodeIndex;\n" +" int lastSubTreeIndex = internalNodeIndex;\n" +" \n" +" while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex;\n" +" while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex;\n" +" \n" +" //Fix duplicate common prefixes by incrementing them so that a subtree is formed.\n" +" //Recursively divide the tree until the position of the split is this node's index.\n" +" //Every time this node is not the split node, increment the common prefix.\n" +" int isCurrentSplitNode = false;\n" +" int correctedCommonPrefix = commonPrefix;\n" +" \n" +" while(!isCurrentSplitNode)\n" +" {\n" +" int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1;\n" +" int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2;\n" +" \n" +" if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1;\n" +" else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1;\n" +" //else if(internalNodeIndex == splitNodeIndex) break;\n" +" \n" +" isCurrentSplitNode = (internalNodeIndex == splitNodeIndex);\n" +" if(!isCurrentSplitNode) correctedCommonPrefix++;\n" +" }\n" +" \n" +" commonPrefixes[internalNodeIndex] = correctedCommonPrefix;\n" +" atomic_max(out_maxCommonPrefix, correctedCommonPrefix);\n" +"}\n" +"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" +"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" +"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n" +"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" +"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes, \n" +" __global int* out_rightChildNodes, int numLeafNodes)\n" +"{\n" +" int leafNodeIndex = get_global_id(0);\n" +" if (leafNodeIndex >= numLeafNodes) return;\n" +" \n" +" int numInternalNodes = numLeafNodes - 1;\n" +" \n" +" int leftSplitIndex = leafNodeIndex - 1;\n" +" int rightSplitIndex = leafNodeIndex;\n" +" \n" +" int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" +" int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" +" \n" +" //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n" +" //Leaf nodes are considered as having the highest common prefix\n" +" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" +" \n" +" //Handle cases for the edge nodes; the first and last node\n" +" //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n" +" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" +" if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" +" \n" +" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n" +" \n" +" //If the left node is the parent, then this node is its right child and vice versa\n" +" __global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes;\n" +" \n" +" int isLeaf = 1;\n" +" out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n" +"}\n" +"__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices,\n" +" __global int* leftChildNodes, __global int* rightChildNodes,\n" +" __global int* leftNeighborPointers, __global int* rightNeighborPointers,\n" +" __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n" +" __global int* out_rootNodeIndex,\n" +" int processedCommonPrefix, int numInternalNodes)\n" +"{\n" +" int internalNodeIndex = get_global_id(0);\n" +" if (internalNodeIndex >= numInternalNodes) return;\n" +" \n" +" int commonPrefix = commonPrefixes[internalNodeIndex];\n" +" if (commonPrefix == processedCommonPrefix)\n" +" {\n" +" //Check neighbors and compare the common prefix to select the parent node\n" +" int leftNodeIndex = leftNeighborPointers[internalNodeIndex];\n" +" int rightNodeIndex = rightNeighborPointers[internalNodeIndex];\n" +" \n" +" int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" +" int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" +" \n" +" //Parent node is the highest common prefix that is lower than the node's common prefix\n" +" //Since the nodes with lower common prefixes are removed, that condition does not have to be tested for,\n" +" //and we only need to pick the node with the higher prefix.\n" +" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" +" \n" +" //\n" +" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" +" else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" +" \n" +" int isRootNode = false;\n" +" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true;\n" +" \n" +" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex;\n" +" \n" +" //If the left node is the parent, then this node is its right child and vice versa\n" +" __global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes;\n" +" \n" +" int isLeaf = 0;\n" +" if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" +" \n" +" if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" +" \n" +" //Remove this node from the linked list, \n" +" //so that the left and right nodes point at each other instead of this node\n" +" if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex;\n" +" if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex;\n" +" \n" +" //For debug\n" +" leftNeighborPointers[internalNodeIndex] = -2;\n" +" rightNeighborPointers[internalNodeIndex] = -2;\n" +" }\n" +" \n" +" //Processing occurs from highest common prefix to lowest common prefix\n" +" //Nodes in the previously processed level have their children set, so we merge their child AABBs here\n" +" if (commonPrefix == processedCommonPrefix + 1)\n" +" {\n" +" int leftChildIndex = leftChildNodes[internalNodeIndex];\n" +" int rightChildIndex = rightChildNodes[internalNodeIndex];\n" +" \n" +" int isLeftChildLeaf = isLeafNode(leftChildIndex);\n" +" int isRightChildLeaf = isLeafNode(rightChildIndex);\n" +" \n" +" leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n" +" rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n" +" \n" +" //leftRigidIndex/rightRigidIndex is not used if internal node\n" +" int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n" +" int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n" +" \n" +" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n" +" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n" +" \n" +" b3AabbCL mergedAabb;\n" +" mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n" +" mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n" +" internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" +" }\n" +"}\n" +"__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes, \n" +" __global int2* out_childNodes, int numInternalNodes)\n" +"{\n" +" int internalNodeIndex = get_global_id(0);\n" +" if (internalNodeIndex >= numInternalNodes) return;\n" +" \n" +" int2 childNodesIndices;\n" +" childNodesIndices.x = leftChildNodes[internalNodeIndex];\n" +" childNodesIndices.y = rightChildNodes[internalNodeIndex];\n" +" \n" +" out_childNodes[internalNodeIndex] = childNodesIndices;\n" +"}\n" ;