diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp index fa1bc8db0..40a73658f 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp @@ -30,12 +30,7 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_commonPrefixes(context, queue), m_commonPrefixLengths(context, queue), - m_childNodeCount(context, queue), m_distanceFromRoot(context, queue), - m_TEMP_leftLowerPrefix(context, queue), - m_TEMP_rightLowerPrefix(context, queue), - m_TEMP_leftSharedPrefixLength(context, queue), - m_TEMP_rightSharedPrefixLength(context, queue), m_leafNodeParentNodes(context, queue), m_mortonCodesAndAabbIndicies(context, queue), @@ -164,6 +159,8 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab if(numLeaves < 2) { + //Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(), + //so it does not matter if numLeaves == 0 and rootNodeIndex == -1 int rootNodeIndex = numLeaves - 1; m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1); return; @@ -178,12 +175,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab m_commonPrefixes.resize(numInternalNodes); m_commonPrefixLengths.resize(numInternalNodes); - m_childNodeCount.resize(numInternalNodes); m_distanceFromRoot.resize(numInternalNodes); - m_TEMP_leftLowerPrefix.resize(numInternalNodes); - m_TEMP_rightLowerPrefix.resize(numInternalNodes); - m_TEMP_leftSharedPrefixLength.resize(numInternalNodes); - m_TEMP_rightSharedPrefixLength.resize(numInternalNodes); m_leafNodeParentNodes.resize(numLeaves); m_mortonCodesAndAabbIndicies.resize(numLeaves); @@ -191,7 +183,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab } //Find the merged AABB of all small AABBs; this is used to define the size of - //each cell in the virtual grid(2^10 cells in each dimension). + //each cell in the virtual grid for the next kernel(2^10 cells in each dimension). { B3_PROFILE("Find AABB of merged nodes"); @@ -215,7 +207,6 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab clFinish(m_queue); } - //Insert the center of the AABBs into a virtual grid, //then convert the discrete grid coordinates into a morton code //For each element in m_mortonCodesAndAabbIndicies, set @@ -405,12 +396,6 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray -int isLeafNode(int index) { return (index >> 31 == 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } - void b3GpuParallelLinearBvh::constructRadixBinaryTree() { B3_PROFILE("b3GpuParallelLinearBvh::constructRadixBinaryTree()"); @@ -418,7 +403,9 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree() int numLeaves = m_leafNodeAabbs.size(); int numInternalNodes = numLeaves - 1; - //For each internal node, compute common prefix and set pointers to left and right internal nodes + //Each internal node is placed in between 2 leaf nodes. + //By using this arrangement and computing the common prefix between + //these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree. { B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel"); @@ -437,185 +424,107 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree() clFinish(m_queue); } + //For each leaf node, select its parent node by + //comparing the 2 nearest internal nodes and assign child node indices { - static b3AlignedObjectArray prefixLengths; - m_commonPrefixLengths.copyToHost(prefixLengths); - clFinish(m_queue); + B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel"); - for(int i = 1; i < prefixLengths.size(); ++i) - if( prefixLengths[i - 1] == prefixLengths[i] ) - for(;;) printf("duplicate prefix[%d]: %d\n", i, prefixLengths[i]); + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), + b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numLeaves); + + launcher.launch1D(numLeaves); + clFinish(m_queue); } + //For each internal node, perform 2 binary searches among the other internal nodes + //to its left and right to find its potential parent nodes and assign child node indices { - //For each leaf node, find parent nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), - b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } + B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel"); - //For each internal node, find parent nodes and assign child node indices + b3BufferInfoCL bufferInfo[] = { - B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_commonPrefixes.getBufferCL() ), - b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_TEMP_leftLowerPrefix.getBufferCL() ), - b3BufferInfoCL( m_TEMP_rightLowerPrefix.getBufferCL() ), - b3BufferInfoCL( m_TEMP_leftSharedPrefixLength.getBufferCL() ), - b3BufferInfoCL( m_TEMP_rightSharedPrefixLength.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } + b3BufferInfoCL( m_commonPrefixes.getBufferCL() ), + b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ) + }; - if(0) - { - static b3AlignedObjectArray mortonCodesAndAabbIndices; - - static b3AlignedObjectArray child; - static b3AlignedObjectArray commonPrefixes; - static b3AlignedObjectArray commonPrefixLengths; - static b3AlignedObjectArray tempLeftLowerPrefixIndex; - static b3AlignedObjectArray tempRightLowerPrefixIndex; - static b3AlignedObjectArray tempLeftLowerPrefixSPL; - static b3AlignedObjectArray tempRightLowerPrefixSPL; - static b3AlignedObjectArray internalParent; - - m_mortonCodesAndAabbIndicies.copyToHost(mortonCodesAndAabbIndices); - - m_internalNodeChildNodes.copyToHost(child); - m_commonPrefixes.copyToHost(commonPrefixes); - m_commonPrefixLengths.copyToHost(commonPrefixLengths); - m_TEMP_leftLowerPrefix.copyToHost(tempLeftLowerPrefixIndex); - m_TEMP_rightLowerPrefix.copyToHost(tempRightLowerPrefixIndex); - m_TEMP_leftSharedPrefixLength.copyToHost(tempLeftLowerPrefixSPL); - m_TEMP_rightSharedPrefixLength.copyToHost(tempRightLowerPrefixSPL); - m_internalNodeParentNodes.copyToHost(internalParent); - - int rootNode = -1; - m_rootNodeIndex.copyToHostPointer(&rootNode, 1); - clFinish(m_queue); - - printf( "rootNode: %d\n", getIndexWithInternalNodeMarkerRemoved(rootNode) ); - - for(int i = 0; i < numInternalNodes; ++i) - { - b3Int2 childNodes = child[i]; - - printf("childNodes[%d]:", i); - printf( " %d", getIndexWithInternalNodeMarkerRemoved(childNodes.x) ); - if( !isLeafNode(childNodes.x) ) printf("i"); - printf( ", %d", getIndexWithInternalNodeMarkerRemoved(childNodes.y) ); - if( !isLeafNode(childNodes.y) ) printf("i"); - printf(" (lr: %d, %d)", tempLeftLowerPrefixIndex[i], tempRightLowerPrefixIndex[i]); - printf(" (spl: %d, %d)", tempLeftLowerPrefixSPL[i], tempRightLowerPrefixSPL[i]); - printf(" (prefix: %d)", commonPrefixLengths[i]); - printf(" (par: %d)", internalParent[i]); - printf("\n"); - } - printf("\n"); - - for(int i = 0; i < numInternalNodes; ++i) - { - int hi = static_cast(commonPrefixes[i] >> 32); - int lo = static_cast(commonPrefixes[i]); - - printf("commonPrefix[%d]: %x, %d, len %d \n", i, hi, lo, commonPrefixLengths[i]); - } - printf("\n"); - - for(int i = 0; i < numLeaves; ++i) - { - printf("z-curve[%d]: %x \n", i, mortonCodesAndAabbIndices[i].m_key); - } - printf("\n"); - - - std::cout << std::endl; - for(;;); - } + b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numInternalNodes); - //Find the number of nodes seperating each internal node and the root node - //so that the AABBs can be set using the next kernel - { - B3_PROFILE("m_findDistanceFromRootKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ), - b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Starting from the nodes nearest to the leaf nodes, recursively move up - //the tree to set the AABBs of each internal node; each internal node - //checks its children and merges their AABBs - { - B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel"); - - int maxDistanceFromRoot = -1; - { - B3_PROFILE("copy maxDistanceFromRoot to CPU"); - m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1); - clFinish(m_queue); - } - - for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxDistanceFromRoot); - launcher.setConst(distanceFromRoot); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - } - - clFinish(m_queue); - } + launcher.launch1D(numInternalNodes); + clFinish(m_queue); + } + //Find the number of nodes seperating each internal node and the root node + //so that the AABBs can be set using the next kernel. + //Also determine the maximum number of nodes separating an internal node and the root node. + { + B3_PROFILE("m_findDistanceFromRootKernel"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), + b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), + b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ), + b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numInternalNodes); + + launcher.launch1D(numInternalNodes); + clFinish(m_queue); + } + + //Starting from the internal nodes nearest to the leaf nodes, recursively move up + //the tree towards the root to set the AABBs of each internal node; each internal node + //checks its children and merges their AABBs + { + B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel"); + + int maxDistanceFromRoot = -1; + { + B3_PROFILE("copy maxDistanceFromRoot to CPU"); + m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1); + clFinish(m_queue); + } + + for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot) + { + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ), + b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(maxDistanceFromRoot); + launcher.setConst(distanceFromRoot); + launcher.setConst(numInternalNodes); + + //It may seem inefficent to launch a thread for each internal node when a + //much smaller number of nodes is actually processed, but this is actually + //faster than determining the exact nodes that are ready to merge their child AABBs. + launcher.launch1D(numInternalNodes); + } + + clFinish(m_queue); } } diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h index 4d5467481..7aabd71f9 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h @@ -70,28 +70,23 @@ class b3GpuParallelLinearBvh b3RadixSort32CL m_radixSorter; //1 element - b3OpenCLArray m_rootNodeIndex; - b3OpenCLArray m_maxDistanceFromRoot; + b3OpenCLArray m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node + b3OpenCLArray m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1) b3OpenCLArray m_internalNodeAabbs; b3OpenCLArray m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index - b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child - b3OpenCLArray m_internalNodeParentNodes; + b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node + b3OpenCLArray m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal //1 element per internal node; for binary radix tree construction b3OpenCLArray m_commonPrefixes; b3OpenCLArray m_commonPrefixLengths; - b3OpenCLArray m_childNodeCount; - b3OpenCLArray m_distanceFromRoot; - b3OpenCLArray m_TEMP_leftLowerPrefix; - b3OpenCLArray m_TEMP_rightLowerPrefix; - b3OpenCLArray m_TEMP_leftSharedPrefixLength; - b3OpenCLArray m_TEMP_rightSharedPrefixLength; + b3OpenCLArray m_distanceFromRoot; //Number of internal nodes between this node and the root //1 element per leaf node (leaf nodes only include small AABBs) - b3OpenCLArray m_leafNodeParentNodes; - b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index + b3OpenCLArray m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal + b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs b3OpenCLArray m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes b3OpenCLArray m_leafNodeAabbs; //Contains only small AABBs diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index 2c17f8623..b28c5ab56 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -448,6 +448,8 @@ b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) return sharedBits & bitmask; } + +//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB) { return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) ); @@ -521,8 +523,6 @@ __kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, _ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths, __global int2* out_childNodes, __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex, - __global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix, - __global int* TEMP_spl_left, __global int* TEMP_spl_right, int numInternalNodes) { int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); @@ -536,6 +536,7 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes int leftIndex = -1; int rightIndex = -1; + //Find nearest element to left with a lower common prefix for(int i = internalNodeIndex - 1; i >= 0; --i) { int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); @@ -546,6 +547,7 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes } } + //Find nearest element to right with a lower common prefix for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i) { int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); @@ -645,11 +647,6 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes } #endif - TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex; - TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex; - TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1; - TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1; - //Select parent { int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index 46848b1aa..731423098 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -418,6 +418,7 @@ static const char* parallelLinearBvhCL= \ " \n" " return sharedBits & bitmask;\n" "}\n" +"//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n" "int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n" "{\n" " return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n" @@ -487,8 +488,6 @@ static const char* parallelLinearBvhCL= \ "__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n" " __global int2* out_childNodes,\n" " __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n" -" __global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix,\n" -" __global int* TEMP_spl_left, __global int* TEMP_spl_right,\n" " int numInternalNodes)\n" "{\n" " int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" @@ -502,6 +501,7 @@ static const char* parallelLinearBvhCL= \ " int leftIndex = -1;\n" " int rightIndex = -1;\n" " \n" +" //Find nearest element to left with a lower common prefix\n" " for(int i = internalNodeIndex - 1; i >= 0; --i)\n" " {\n" " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" @@ -512,6 +512,7 @@ static const char* parallelLinearBvhCL= \ " }\n" " }\n" " \n" +" //Find nearest element to right with a lower common prefix\n" " for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n" " {\n" " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" @@ -610,11 +611,6 @@ static const char* parallelLinearBvhCL= \ " }\n" "#endif\n" " \n" -" TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex;\n" -" TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex;\n" -" TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1;\n" -" TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1;\n" -" \n" " //Select parent\n" " {\n" " int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"