From b709d6beebde777ab29455be1ea7c3ba25ace257 Mon Sep 17 00:00:00 2001
From: Jackson Lee <rtrius@gmail.com>
Date: Fri, 14 Mar 2014 13:55:22 -0700
Subject: [PATCH] Clean up plbvhCalculateOverlappingPairs kernel.

Also fix overlapping pair generation with triangle mesh.
(Currently, large/concave AABBs must be the first entry in a pair.)
---
 .../kernels/parallelLinearBvh.cl              | 26 +++++++------------
 .../kernels/parallelLinearBvhKernels.h        | 25 +++++++-----------
 2 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
index 83f355849..0c7e1db69 100644
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
@@ -184,18 +184,12 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
 											__global int* out_numPairs, __global int4* out_overlappingPairs, 
 											int maxPairs, int numQueryAabbs)
 {
-#define USE_SPATIALLY_COHERENT_INDICIES		//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve
-#ifdef USE_SPATIALLY_COHERENT_INDICIES
-	int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
-	if(queryRigidIndex >= numQueryAabbs) return;
+	//Using get_group_id()/get_local_id() is Faster than get_global_id(0) since
+	//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)
+	int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
+	if(queryBvhNodeIndex >= numQueryAabbs) return;
 	
-	int queryBvhNodeIndex = queryRigidIndex;
-	queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;		//	fix queryRigidIndex naming for this branch
-#else
-	int queryRigidIndex = get_global_id(0);
-	if(queryRigidIndex >= numQueryAabbs) return;
-#endif
-
+	int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;
 	b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];
 	
 	int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
@@ -213,17 +207,17 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
 		
 		//Optimization - if the BVH is structured as a binary radix tree, then
 		//each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).
-		//This can be used to avoid testing each AABB-AABB pair twice.
+		//This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.
 		{
 			int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;
-			if(highestLeafIndex < queryBvhNodeIndex) continue;
+			if(highestLeafIndex <= queryBvhNodeIndex) continue;
 		}
 		
 		//bvhRigidIndex is not used if internal node
 		int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
 	
 		b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
-		if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
+		if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
 		{
 			if(isLeaf)
 			{
@@ -388,8 +382,8 @@ __kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3A
 		if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )
 		{
 			int4 pair;
-			pair.x = smallAabb.m_minIndices[3];
-			pair.y = largeAabb.m_minIndices[3];
+			pair.x = largeAabb.m_minIndices[3];
+			pair.y = smallAabb.m_minIndices[3];
 			pair.z = NEW_PAIR_MARKER;
 			pair.w = NEW_PAIR_MARKER;
 			
diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
index 40c5e3218..37a1e8a5b 100644
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
@@ -170,17 +170,12 @@ static const char* parallelLinearBvhCL= \
 "											__global int* out_numPairs, __global int4* out_overlappingPairs, \n"
 "											int maxPairs, int numQueryAabbs)\n"
 "{\n"
-"#define USE_SPATIALLY_COHERENT_INDICIES		//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve\n"
-"#ifdef USE_SPATIALLY_COHERENT_INDICIES\n"
-"	int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
-"	if(queryRigidIndex >= numQueryAabbs) return;\n"
+"	//Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n"
+"	//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n"
+"	int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
+"	if(queryBvhNodeIndex >= numQueryAabbs) return;\n"
 "	\n"
-"	int queryBvhNodeIndex = queryRigidIndex;\n"
-"	queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;		//	fix queryRigidIndex naming for this branch\n"
-"#else\n"
-"	int queryRigidIndex = get_global_id(0);\n"
-"	if(queryRigidIndex >= numQueryAabbs) return;\n"
-"#endif\n"
+"	int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n"
 "	b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
 "	\n"
 "	int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
@@ -198,17 +193,17 @@ static const char* parallelLinearBvhCL= \
 "		\n"
 "		//Optimization - if the BVH is structured as a binary radix tree, then\n"
 "		//each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n"
-"		//This can be used to avoid testing each AABB-AABB pair twice.\n"
+"		//This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n"
 "		{\n"
 "			int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
-"			if(highestLeafIndex < queryBvhNodeIndex) continue;\n"
+"			if(highestLeafIndex <= queryBvhNodeIndex) continue;\n"
 "		}\n"
 "		\n"
 "		//bvhRigidIndex is not used if internal node\n"
 "		int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
 "	\n"
 "		b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
-"		if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
+"		if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
 "		{\n"
 "			if(isLeaf)\n"
 "			{\n"
@@ -365,8 +360,8 @@ static const char* parallelLinearBvhCL= \
 "		if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n"
 "		{\n"
 "			int4 pair;\n"
-"			pair.x = smallAabb.m_minIndices[3];\n"
-"			pair.y = largeAabb.m_minIndices[3];\n"
+"			pair.x = largeAabb.m_minIndices[3];\n"
+"			pair.y = smallAabb.m_minIndices[3];\n"
 "			pair.z = NEW_PAIR_MARKER;\n"
 "			pair.w = NEW_PAIR_MARKER;\n"
 "			\n"