Draft Parallel Linear BVH Broadphase.

This commit is contained in:
Jackson Lee
2014-02-18 19:23:25 -08:00
parent fabdf8b4a9
commit b7b7356af8
7 changed files with 1242 additions and 0 deletions

View File

@@ -0,0 +1,399 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014
typedef float b3Scalar;
typedef float4 b3Vector3;
#define b3Max max
#define b3Min min
typedef struct
{
unsigned int m_key;
unsigned int m_value;
} SortDataCL;
typedef struct
{
union
{
float4 m_min;
float m_minElems[4];
int m_minIndices[4];
};
union
{
float4 m_max;
float m_maxElems[4];
int m_maxIndices[4];
};
} b3AabbCL;
unsigned int interleaveBits(unsigned int x)
{
//........ ........ ......12 3456789A //x
//....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits
//........ ....1234 56789A12 3456789A //x |= (x << 10)
//........ ....1111 1....... ...11111 //0x 00 0F 80 1F
//........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F;
//.......1 23451234 5.....67 89A6789A //x |= (x << 5)
//.......1 1.....11 1.....11 .....111 //0x 01 83 83 07
//.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;
//....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)
//....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23
//....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;
//...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )
//....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45
//....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;
//...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)
//....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29
//....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;
//........ ........ ......11 11111111 //0x000003FF
x &= 0x000003FF; //Clear all bits above bit 10
x = ( x | (x << 10) ) & 0x000F801F;
x = ( x | (x << 5) ) & 0x01838307;
x = ( x | (x << 3) ) & 0x08919123;
x = ( x | (x << 1) ) & 0x09229245;
x = ( x | (x << 1) ) & 0x09349229;
return x;
}
unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
{
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
}
/*
__kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* out_mergedAabb, int numAabbs)
{
int aabbIndex = get_global_id(0);
if(aabbIndex >= numAabbs) return;
//Find the most significant bit(msb)
int mostSignificantBit = 0;
{
int temp = numLeaves;
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
}
int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit ); // verify
int numRemainingAabbs = (1 << mostSignificantBit);
//Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2
//For example, if there are 159 AABBs = 128 + 31, then merge indices [0, 30] and 128 + [0, 30]
if(aabbIndex < numberOfAabbsAboveMsbSplit)
{
int otherAabbIndex = numRemainingAabbs + aabbIndex;
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
b3AabbCL mergedAabb;
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
out_mergedAabb[aabbIndex] = mergedAabb;
}
barrier(CLK_GLOBAL_MEM_FENCE);
//
int offset = numRemainingAabbs / 2;
while(offset >= 1)
{
if(aabbIndex < offset)
{
int otherAabbIndex = aabbIndex + offset;
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
b3AabbCL mergedAabb;
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
out_mergedAabb[aabbIndex] = mergedAabb;
}
offset = offset / 2;
barrier(CLK_GLOBAL_MEM_FENCE);
}
}
*/
__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs,
__global SortDataCL* out_mortonCodesAndAabbIndices,
b3Scalar cellSize, int numAabbs)
{
int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
if(leafNodeIndex >= numAabbs) return;
b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;
//Quantize into integer coordinates
//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
b3Vector3 gridPosition = center / cellSize;
int4 discretePosition;
discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );
discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );
//Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]
discretePosition = b3Max( -512, b3Min(discretePosition, 511) );
discretePosition += 512;
//Interleave bits(assign a morton code, also known as a z-curve)
unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);
//
SortDataCL mortonCodeIndexPair;
mortonCodeIndexPair.m_key = mortonCode;
mortonCodeIndexPair.m_value = leafNodeIndex;
out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;
}
#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128
#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the (root) node has no parent
#define B3_PLBVH_ROOT_NODE_INDEX 0
//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.
//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).
//
//Since the root internal node is at index 0, no internal nodes should reference it as a child,
//and so index 0 is always used to indicate a leaf node.
int isLeafNode(int index) { return (index >= 0); }
int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; }
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; }
__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,
__global int* numNodesPerLevel,
__global int2* out_internalNodeChildIndices,
__global int* out_internalNodeParentNodes,
__global int* out_leafNodeParentNodes,
int numLevels, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if(internalNodeIndex >= numInternalNodes) return;
//Find the level that this node is in, using linear search(could replace with binary search)
int level = 0;
int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes
for(; level < numInternalLevels; ++level)
{
if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;
}
//Check lower levels to find child nodes
//Left child is always in the next level, but the same does not apply to the right child
int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];
int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)
int leftChildLevel = level + 1;
int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;
int rightChildLevel = level + 1;
int rightChildIndex = leftChildIndex + 1;
//Under certain conditions, the right child index as calculated above is invalid; need to find the correct index
//
//First condition: must be at least 2 levels apart from the leaf node level;
//if the current level is right next to the leaf node level, then the right child
//will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)
//
//Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid
//
//Third condition: must be the last node in its level
if( level < numLevels - 2
&& numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1]
&& indexInLevel == numNodesPerLevel[level] - 1 )
{
//Check lower levels until we find a node without a parent
for(; rightChildLevel < numLevels - 1; ++rightChildLevel)
{
int rightChildNextLevel = rightChildLevel + 1;
//If this branch is taken, it means that the last node in rightChildNextLevel has no parent
if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )
{
//Set the node to the last node in rightChildNextLevel
rightChildLevel = rightChildNextLevel;
rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;
break;
}
}
}
int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);
int isRightChildLeaf = (rightChildLevel >= numLevels - 1);
//If left/right child is a leaf node, the index needs to be corrected
//the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,
//with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays
{
int leafNodeLevel = numLevels - 1;
leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;
rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;
}
//Set the negative sign bit if the node is internal
int2 childIndices;
childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);
childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);
out_internalNodeChildIndices[internalNodeIndex] = childIndices;
//Assign parent node index to children
__global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;
__global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;
}
__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,
__global int* numNodesPerLevel,
__global int2* internalNodeChildIndices,
__global SortDataCL* mortonCodesAndAabbIndices,
__global b3AabbCL* leafNodeAabbs,
__global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)
{
int i = get_global_id(0);
if(i >= numInternalNodes) return;
int numInternalLevels = numLevels - 1;
//Starting from the level next to the leaf nodes, move towards the root(level 0)
for(int level = numInternalLevels - 1; level >= 0; --level)
{
int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]
int numNodesInLevel = numNodesPerLevel[level];
if(i < numNodesInLevel)
{
int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];
int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];
int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);
int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);
int isLeftChildLeaf = isLeafNode(childIndicies.x);
int isRightChildLeaf = isLeafNode(childIndicies.y);
int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];
b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];
b3AabbCL internalNodeAabb;
internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
}
//From sap.cl
#define NEW_PAIR_MARKER -1
bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)
{
bool overlap = true;
overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
return overlap;
}
//From sap.cl
__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
__global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,
__global SortDataCL* mortonCodesAndAabbIndices,
__global int* out_numPairs, __global int4* out_overlappingPairs,
int maxPairs, int numQueryAabbs)
{
#define USE_SPATIALLY_COHERENT_INDICIES //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve
#ifdef USE_SPATIALLY_COHERENT_INDICIES
int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
if(queryRigidIndex >= numQueryAabbs) return;
queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;
#else
int queryRigidIndex = get_global_id(0);
if(queryRigidIndex >= numQueryAabbs) return;
#endif
b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
//Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)
int stackSize = 2;
stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;
stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;
while(stackSize)
{
int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
--stackSize;
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
//bvhRigidIndex is not used if internal node
int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
{
if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])
{
int4 pair;
pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];
pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
pair.z = NEW_PAIR_MARKER;
pair.w = NEW_PAIR_MARKER;
int pairIndex = atomic_inc(out_numPairs);
if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
}
if(!isLeaf) //Internal node
{
if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
{
//Error
}
else
{
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
}
}
}
}
}

View File

@@ -0,0 +1,325 @@
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
static const char* parallelLinearBvhCL= \
"/*\n"
"This software is provided 'as-is', without any express or implied warranty.\n"
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
"Permission is granted to anyone to use this software for any purpose,\n"
"including commercial applications, and to alter it and redistribute it freely,\n"
"subject to the following restrictions:\n"
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
"3. This notice may not be removed or altered from any source distribution.\n"
"*/\n"
"//Initial Author Jackson Lee, 2014\n"
"typedef float b3Scalar;\n"
"typedef float4 b3Vector3;\n"
"#define b3Max max\n"
"#define b3Min min\n"
"typedef struct\n"
"{\n"
" unsigned int m_key;\n"
" unsigned int m_value;\n"
"} SortDataCL;\n"
"typedef struct \n"
"{\n"
" union\n"
" {\n"
" float4 m_min;\n"
" float m_minElems[4];\n"
" int m_minIndices[4];\n"
" };\n"
" union\n"
" {\n"
" float4 m_max;\n"
" float m_maxElems[4];\n"
" int m_maxIndices[4];\n"
" };\n"
"} b3AabbCL;\n"
"unsigned int interleaveBits(unsigned int x)\n"
"{\n"
" //........ ........ ......12 3456789A //x\n"
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n"
" \n"
" //........ ....1234 56789A12 3456789A //x |= (x << 10)\n"
" //........ ....1111 1....... ...11111 //0x 00 0F 80 1F\n"
" //........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F; \n"
" \n"
" //.......1 23451234 5.....67 89A6789A //x |= (x << 5)\n"
" //.......1 1.....11 1.....11 .....111 //0x 01 83 83 07\n"
" //.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;\n"
" \n"
" //....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)\n"
" //....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23\n"
" //....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;\n"
" \n"
" //...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )\n"
" //....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45\n"
" //....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;\n"
" \n"
" //...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)\n"
" //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29\n"
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;\n"
" \n"
" //........ ........ ......11 11111111 //0x000003FF\n"
" x &= 0x000003FF; //Clear all bits above bit 10\n"
" \n"
" x = ( x | (x << 10) ) & 0x000F801F;\n"
" x = ( x | (x << 5) ) & 0x01838307;\n"
" x = ( x | (x << 3) ) & 0x08919123;\n"
" x = ( x | (x << 1) ) & 0x09229245;\n"
" x = ( x | (x << 1) ) & 0x09349229;\n"
" \n"
" return x;\n"
"}\n"
"unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n"
"{\n"
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
"}\n"
"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, \n"
" __global SortDataCL* out_mortonCodesAndAabbIndices, \n"
" b3Scalar cellSize, int numAabbs)\n"
"{\n"
" int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n"
" if(leafNodeIndex >= numAabbs) return;\n"
" \n"
" b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
" \n"
" b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;\n"
" \n"
" //Quantize into integer coordinates\n"
" //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
" b3Vector3 gridPosition = center / cellSize;\n"
" \n"
" int4 discretePosition;\n"
" discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
" discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n"
" discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n"
" \n"
" //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n"
" discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n"
" discretePosition += 512;\n"
" \n"
" //Interleave bits(assign a morton code, also known as a z-curve)\n"
" unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n"
" \n"
" //\n"
" SortDataCL mortonCodeIndexPair;\n"
" mortonCodeIndexPair.m_key = mortonCode;\n"
" mortonCodeIndexPair.m_value = leafNodeIndex;\n"
" \n"
" out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
"}\n"
"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
"#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the node has no parent \n"
"#define B3_PLBVH_ROOT_NODE_INDEX 0\n"
"//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.\n"
"//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).\n"
"//\n"
"//Since the root internal node is at index 0, no internal nodes should reference it as a child,\n"
"//and so index 0 is always used to indicate a leaf node.\n"
"int isLeafNode(int index) { return (index >= 0); }\n"
"int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; }\n"
"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; }\n"
"__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,\n"
" __global int* numNodesPerLevel,\n"
" __global int2* out_internalNodeChildIndices, \n"
" __global int* out_internalNodeParentNodes, \n"
" __global int* out_leafNodeParentNodes, \n"
" int numLevels, int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if(internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" //Find the level that this node is in, using linear search(could replace with binary search)\n"
" int level = 0;\n"
" int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes\n"
" for(; level < numInternalLevels; ++level)\n"
" {\n"
" if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;\n"
" }\n"
" \n"
" //Check lower levels to find child nodes\n"
" //Left child is always in the next level, but the same does not apply to the right child\n"
" int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];\n"
" int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)\n"
" \n"
" int leftChildLevel = level + 1;\n"
" int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;\n"
" \n"
" int rightChildLevel = level + 1;\n"
" int rightChildIndex = leftChildIndex + 1;\n"
" \n"
" //Under certain conditions, the right child index as calculated above is invalid; need to find the correct index\n"
" //\n"
" //First condition: must be at least 2 levels apart from the leaf node level;\n"
" //if the current level is right next to the leaf node level, then the right child\n"
" //will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)\n"
" //\n"
" //Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid\n"
" //\n"
" //Third condition: must be the last node in its level\n"
" if( level < numLevels - 2 \n"
" && numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1] \n"
" && indexInLevel == numNodesPerLevel[level] - 1 )\n"
" {\n"
" //Check lower levels until we find a node without a parent\n"
" for(; rightChildLevel < numLevels - 1; ++rightChildLevel)\n"
" {\n"
" int rightChildNextLevel = rightChildLevel + 1;\n"
" \n"
" //If this branch is taken, it means that the last node in rightChildNextLevel has no parent\n"
" if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )\n"
" {\n"
" //Set the node to the last node in rightChildNextLevel\n"
" rightChildLevel = rightChildNextLevel;\n"
" rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;\n"
" break;\n"
" }\n"
" }\n"
" }\n"
" \n"
" int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);\n"
" int isRightChildLeaf = (rightChildLevel >= numLevels - 1);\n"
" \n"
" //If left/right child is a leaf node, the index needs to be corrected\n"
" //the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,\n"
" //with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays\n"
" {\n"
" int leafNodeLevel = numLevels - 1;\n"
" leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;\n"
" rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;\n"
" }\n"
" \n"
" //Set the negative sign bit if the node is internal\n"
" int2 childIndices;\n"
" childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);\n"
" childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);\n"
" out_internalNodeChildIndices[internalNodeIndex] = childIndices;\n"
" \n"
" //Assign parent node index to children\n"
" __global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
" out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;\n"
" \n"
" __global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
" out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;\n"
"}\n"
"__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,\n"
" __global int* numNodesPerLevel, \n"
" __global int2* internalNodeChildIndices,\n"
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
" __global b3AabbCL* leafNodeAabbs, \n"
" __global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)\n"
"{\n"
" int i = get_global_id(0);\n"
" if(i >= numInternalNodes) return;\n"
" \n"
" int numInternalLevels = numLevels - 1;\n"
" \n"
" //Starting from the level next to the leaf nodes, move towards the root(level 0)\n"
" for(int level = numInternalLevels - 1; level >= 0; --level)\n"
" {\n"
" int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]\n"
" \n"
" int numNodesInLevel = numNodesPerLevel[level];\n"
" if(i < numNodesInLevel)\n"
" {\n"
" int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];\n"
" int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];\n"
" \n"
" int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);\n"
" int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);\n"
" \n"
" int isLeftChildLeaf = isLeafNode(childIndicies.x);\n"
" int isRightChildLeaf = isLeafNode(childIndicies.y);\n"
" \n"
" int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
" int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
" \n"
" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];\n"
" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];\n"
" \n"
" b3AabbCL internalNodeAabb;\n"
" internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
" internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
" out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;\n"
" }\n"
" \n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" }\n"
"}\n"
"//From sap.cl\n"
"#define NEW_PAIR_MARKER -1\n"
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n"
"{\n"
" bool overlap = true;\n"
" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
" return overlap;\n"
"}\n"
"//From sap.cl\n"
"__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n"
" __global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,\n"
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
" int maxPairs, int numQueryAabbs)\n"
"{\n"
" int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
" if(queryRigidIndex >= numQueryAabbs) return;\n"
" \n"
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;\n"
" //int queryRigidIndex = get_global_id(0);\n"
" //if(queryRigidIndex >= numQueryAabbs) return;\n"
" \n"
" b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
" \n"
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
" \n"
" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n"
" int stackSize = 2;\n"
" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n"
" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n"
" \n"
" while(stackSize)\n"
" {\n"
" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
" --stackSize;\n"
" \n"
" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
" \n"
" //bvhRigidIndex is not used if internal node\n"
" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
" \n"
" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
" if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
" {\n"
" if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])\n"
" {\n"
" int4 pair;\n"
" pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n"
" pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
" pair.z = NEW_PAIR_MARKER;\n"
" pair.w = NEW_PAIR_MARKER;\n"
" \n"
" int pairIndex = atomic_inc(out_numPairs);\n"
" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
" }\n"
" \n"
" if(!isLeaf) //Internal node\n"
" {\n"
" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
" {\n"
" //Error\n"
" }\n"
" else\n"
" {\n"
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
" }\n"
" }\n"
" }\n"
" \n"
" }\n"
"}\n"
;