Fix PLBVH reduction kernels, simplify nodes per level calculation.

Also calculate index ranges for each internal node.
2014-02-19 21:49:30 -08:00
parent 7f0e361fa0
commit e955192971
4 changed files with 193 additions and 175 deletions
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
@@ -80,60 +80,32 @@ unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
 	return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
 }

-
-__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbs)
+//Should replace with an optimized parallel reduction
+__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)
 {
-	int aabbIndex = get_global_id(0);
-	if(aabbIndex >= numAabbs) return;
-	
-	//Find the most significant bit(msb)
-	int mostSignificantBit = 0;
-	{
-		int temp = numAabbs;
-		while(temp >>= 1) mostSignificantBit++;		//Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
-	}
-	
-	int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit );
-	int numRemainingAabbs = (1 << mostSignificantBit);
-	
-	//Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2
-	//For example, if there are 159 AABBs = 128 + 31, then merge indices [0, 30] and 128 + [0, 30]
-	if(aabbIndex < numberOfAabbsAboveMsbSplit)
-	{
-		int otherAabbIndex = numRemainingAabbs + aabbIndex;
-		
-		b3AabbCL aabb = out_mergedAabb[aabbIndex];
-		b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
-		
-		b3AabbCL mergedAabb;
-		mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
-		mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
-		out_mergedAabb[aabbIndex] = mergedAabb;
-	}
-	
-	barrier(CLK_GLOBAL_MEM_FENCE);
-	
+	//Each time this kernel is added to the command queue, 
+	//the number of AABBs needing to be merged is halved
 	//
-	int offset = numRemainingAabbs / 2;
-	while(offset >= 1)
-	{
-		if(aabbIndex < offset)
-		{
-			int otherAabbIndex = aabbIndex + offset;
+	//Example with 159 AABBs:
+	//	numRemainingAabbs == 159 / 2 + 159 % 2 == 80
+	//	numMergedAabbs == 159 - 80 == 79
+	//So, indices [0, 78] are merged with [0 + 80, 78 + 80]
+	
+	int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;
+	int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;
+	
+	int aabbIndex = get_global_id(0);
+	if(aabbIndex >= numMergedAabbs) return;
+	
+	int otherAabbIndex = aabbIndex + numRemainingAabbs;
+	
+	b3AabbCL aabb = out_mergedAabb[aabbIndex];
+	b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
 		
-			b3AabbCL aabb = out_mergedAabb[aabbIndex];
-			b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
-		
-			b3AabbCL mergedAabb;
-			mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
-			mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
-			out_mergedAabb[aabbIndex] = mergedAabb;
-		}
-		
-		offset /= 2;
-		
-		barrier(CLK_GLOBAL_MEM_FENCE);
-	}
+	b3AabbCL mergedAabb;
+	mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
+	mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
+	out_mergedAabb[aabbIndex] = mergedAabb;
 }

 __kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, 
@@ -254,7 +226,7 @@ __kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,
 	{
 		int leafNodeLevel = numLevels - 1;
 		leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;
-		rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;
+		rightChildIndex = (isRightChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;
 	}
 	
 	//Set the negative sign bit if the node is internal
@@ -276,20 +248,19 @@ __kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,
 										__global int2* internalNodeChildIndices,
 										__global SortDataCL* mortonCodesAndAabbIndices,
 										__global b3AabbCL* leafNodeAabbs, 
-										__global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)
+										__global int2* out_internalNodeLeafIndexRanges,
+										__global b3AabbCL* out_internalNodeAabbs, 
+										int numLevels, int numInternalNodes, int level)
 {
 	int i = get_global_id(0);
 	if(i >= numInternalNodes) return;
 	
-	int numInternalLevels = numLevels - 1;
-	
-	//Starting from the level next to the leaf nodes, move towards the root(level 0)
-	for(int level = numInternalLevels - 1; level >= 0; --level)
+	//For each node in a level, check its child nodes to determine its AABB
 	{
 		int indexInLevel = i;	//Index relative to firstIndexOffsetPerLevel[level]
 		
 		int numNodesInLevel = numNodesPerLevel[level];
-		if(i < numNodesInLevel)
+		if(indexInLevel < numNodesInLevel)
 		{
 			int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];
 			int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];
@@ -300,19 +271,26 @@ __kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,
 			int isLeftChildLeaf = isLeafNode(childIndicies.x);
 			int isRightChildLeaf = isLeafNode(childIndicies.y);
 			
+			//left/RightChildLeafIndex == Rigid body indicies
 			int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
 			int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
 			
 			b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];
 			b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];
 			
+			//
 			b3AabbCL internalNodeAabb;
 			internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
 			internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
 			out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;
+			
+			//For index range, x == min and y == max; left child always has lower index
+			int2 leafIndexRange;
+			leafIndexRange.x = (isLeftChildLeaf) ? leftChildIndex : out_internalNodeLeafIndexRanges[leftChildIndex].x;
+			leafIndexRange.y = (isRightChildLeaf) ? rightChildIndex : out_internalNodeLeafIndexRanges[rightChildIndex].y;
+			
+			out_internalNodeLeafIndexRanges[internalNodeIndexGlobal] = leafIndexRange;
 		}
-	
-		barrier(CLK_GLOBAL_MEM_FENCE);
 	}
 }

@@ -331,7 +309,9 @@ bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)
 //From sap.cl

 __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, 
-											__global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,
+											__global int2* internalNodeChildIndices, 
+											__global b3AabbCL* internalNodeAabbs,
+											__global int2* internalNodeLeafIndexRanges,
 											__global SortDataCL* mortonCodesAndAabbIndices,
 											__global int* out_numPairs, __global int4* out_overlappingPairs, 
 											int maxPairs, int numQueryAabbs)
@@ -341,7 +321,8 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
 	int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
 	if(queryRigidIndex >= numQueryAabbs) return;
 	
-	queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;
+	int queryBvhNodeIndex = queryRigidIndex;
+	queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;		//	fix queryRigidIndex naming for this branch
 #else
 	int queryRigidIndex = get_global_id(0);
 	if(queryRigidIndex >= numQueryAabbs) return;
@@ -363,7 +344,15 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
 		
 		int isLeaf = isLeafNode(internalOrLeafNodeIndex);	//Internal node if false
 		int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
-	
+		
+		//Optimization - if the node is not a leaf, check whether the highest leaf index of that node
+		//is less than the queried node's index to avoid testing each pair twice.
+		{
+			//	fix: produces duplicate pairs
+		//	int highestLeafIndex = (isLeaf) ? numQueryAabbs : internalNodeLeafIndexRanges[bvhNodeIndex].y;
+		//	if(highestLeafIndex < queryBvhNodeIndex) continue;
+		}
+		
 		//bvhRigidIndex is not used if internal node
 		int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;