Fix PLBVH reduction kernels, simplify nodes per level calculation.
Also calculate index ranges for each internal node.
This commit is contained in:
@@ -75,59 +75,32 @@ static const char* parallelLinearBvhCL= \
|
||||
"{\n"
|
||||
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
||||
"}\n"
|
||||
"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbs)\n"
|
||||
"//Should replace with an optimized parallel reduction\n"
|
||||
"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n"
|
||||
"{\n"
|
||||
" int aabbIndex = get_global_id(0);\n"
|
||||
" if(aabbIndex >= numAabbs) return;\n"
|
||||
" \n"
|
||||
" //Find the most significant bit(msb)\n"
|
||||
" int mostSignificantBit = 0;\n"
|
||||
" {\n"
|
||||
" int temp = numAabbs;\n"
|
||||
" while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit );\n"
|
||||
" int numRemainingAabbs = (1 << mostSignificantBit);\n"
|
||||
" \n"
|
||||
" //Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2\n"
|
||||
" //For example, if there are 159 AABBs = 128 + 31, then merge indices [0, 30] and 128 + [0, 30]\n"
|
||||
" if(aabbIndex < numberOfAabbsAboveMsbSplit)\n"
|
||||
" {\n"
|
||||
" int otherAabbIndex = numRemainingAabbs + aabbIndex;\n"
|
||||
" \n"
|
||||
" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
|
||||
" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
|
||||
" \n"
|
||||
" b3AabbCL mergedAabb;\n"
|
||||
" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
|
||||
" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
|
||||
" out_mergedAabb[aabbIndex] = mergedAabb;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" \n"
|
||||
" //Each time this kernel is added to the command queue, \n"
|
||||
" //the number of AABBs needing to be merged is halved\n"
|
||||
" //\n"
|
||||
" int offset = numRemainingAabbs / 2;\n"
|
||||
" while(offset >= 1)\n"
|
||||
" {\n"
|
||||
" if(aabbIndex < offset)\n"
|
||||
" {\n"
|
||||
" int otherAabbIndex = aabbIndex + offset;\n"
|
||||
" //Example with 159 AABBs:\n"
|
||||
" // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n"
|
||||
" // numMergedAabbs == 159 - 80 == 79\n"
|
||||
" //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n"
|
||||
" \n"
|
||||
" int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n"
|
||||
" int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n"
|
||||
" \n"
|
||||
" int aabbIndex = get_global_id(0);\n"
|
||||
" if(aabbIndex >= numMergedAabbs) return;\n"
|
||||
" \n"
|
||||
" int otherAabbIndex = aabbIndex + numRemainingAabbs;\n"
|
||||
" \n"
|
||||
" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
|
||||
" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
|
||||
" \n"
|
||||
" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
|
||||
" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
|
||||
" \n"
|
||||
" b3AabbCL mergedAabb;\n"
|
||||
" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
|
||||
" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
|
||||
" out_mergedAabb[aabbIndex] = mergedAabb;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" offset /= 2;\n"
|
||||
" \n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
" b3AabbCL mergedAabb;\n"
|
||||
" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
|
||||
" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
|
||||
" out_mergedAabb[aabbIndex] = mergedAabb;\n"
|
||||
"}\n"
|
||||
"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n"
|
||||
" __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n"
|
||||
@@ -244,7 +217,7 @@ static const char* parallelLinearBvhCL= \
|
||||
" {\n"
|
||||
" int leafNodeLevel = numLevels - 1;\n"
|
||||
" leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;\n"
|
||||
" rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;\n"
|
||||
" rightChildIndex = (isRightChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Set the negative sign bit if the node is internal\n"
|
||||
@@ -265,20 +238,19 @@ static const char* parallelLinearBvhCL= \
|
||||
" __global int2* internalNodeChildIndices,\n"
|
||||
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global b3AabbCL* leafNodeAabbs, \n"
|
||||
" __global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)\n"
|
||||
" __global int2* out_internalNodeLeafIndexRanges,\n"
|
||||
" __global b3AabbCL* out_internalNodeAabbs, \n"
|
||||
" int numLevels, int numInternalNodes, int level)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if(i >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int numInternalLevels = numLevels - 1;\n"
|
||||
" \n"
|
||||
" //Starting from the level next to the leaf nodes, move towards the root(level 0)\n"
|
||||
" for(int level = numInternalLevels - 1; level >= 0; --level)\n"
|
||||
" //For each node in a level, check its child nodes to determine its AABB\n"
|
||||
" {\n"
|
||||
" int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]\n"
|
||||
" \n"
|
||||
" int numNodesInLevel = numNodesPerLevel[level];\n"
|
||||
" if(i < numNodesInLevel)\n"
|
||||
" if(indexInLevel < numNodesInLevel)\n"
|
||||
" {\n"
|
||||
" int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];\n"
|
||||
" int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];\n"
|
||||
@@ -289,19 +261,26 @@ static const char* parallelLinearBvhCL= \
|
||||
" int isLeftChildLeaf = isLeafNode(childIndicies.x);\n"
|
||||
" int isRightChildLeaf = isLeafNode(childIndicies.y);\n"
|
||||
" \n"
|
||||
" //left/RightChildLeafIndex == Rigid body indicies\n"
|
||||
" int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
|
||||
" int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
|
||||
" \n"
|
||||
" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];\n"
|
||||
" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" b3AabbCL internalNodeAabb;\n"
|
||||
" internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
|
||||
" internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
|
||||
" out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;\n"
|
||||
" \n"
|
||||
" //For index range, x == min and y == max; left child always has lower index\n"
|
||||
" int2 leafIndexRange;\n"
|
||||
" leafIndexRange.x = (isLeftChildLeaf) ? leftChildIndex : out_internalNodeLeafIndexRanges[leftChildIndex].x;\n"
|
||||
" leafIndexRange.y = (isRightChildLeaf) ? rightChildIndex : out_internalNodeLeafIndexRanges[rightChildIndex].y;\n"
|
||||
" \n"
|
||||
" out_internalNodeLeafIndexRanges[internalNodeIndexGlobal] = leafIndexRange;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"//From sap.cl\n"
|
||||
@@ -316,7 +295,9 @@ static const char* parallelLinearBvhCL= \
|
||||
"}\n"
|
||||
"//From sap.cl\n"
|
||||
"__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n"
|
||||
" __global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,\n"
|
||||
" __global int2* internalNodeChildIndices, \n"
|
||||
" __global b3AabbCL* internalNodeAabbs,\n"
|
||||
" __global int2* internalNodeLeafIndexRanges,\n"
|
||||
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
|
||||
" int maxPairs, int numQueryAabbs)\n"
|
||||
@@ -326,7 +307,8 @@ static const char* parallelLinearBvhCL= \
|
||||
" int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
||||
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||
" \n"
|
||||
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;\n"
|
||||
" int queryBvhNodeIndex = queryRigidIndex;\n"
|
||||
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value; // fix queryRigidIndex naming for this branch\n"
|
||||
"#else\n"
|
||||
" int queryRigidIndex = get_global_id(0);\n"
|
||||
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||
@@ -347,7 +329,15 @@ static const char* parallelLinearBvhCL= \
|
||||
" \n"
|
||||
" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
|
||||
" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" //Optimization - if the node is not a leaf, check whether the highest leaf index of that node\n"
|
||||
" //is less than the queried node's index to avoid testing each pair twice.\n"
|
||||
" {\n"
|
||||
" // fix: produces duplicate pairs\n"
|
||||
" // int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
|
||||
" // if(highestLeafIndex < queryBvhNodeIndex) continue;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //bvhRigidIndex is not used if internal node\n"
|
||||
" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
|
||||
" \n"
|
||||
|
||||
Reference in New Issue
Block a user