Add overlapping pairs validation to PairBench.

2014-03-19 22:35:23 -07:00
parent b709d6beeb
commit 26bcff31f7
4 changed files with 122 additions and 8 deletions
--- a/Demos3/GpuDemos/broadphase/PairBench.cpp
+++ b/Demos3/GpuDemos/broadphase/PairBench.cpp
@@ -121,6 +121,7 @@ static BroadphaseEntry allBroadphases[]=
 struct	PairBenchInternalData
 {
 	b3GpuBroadphaseInterface*	m_broadphaseGPU;
 	b3GpuBroadphaseInterface*	m_validationBroadphase;
 	cl_kernel	m_moveObjectsKernel;
 	cl_kernel	m_sineWaveKernel;
@@ -508,6 +509,15 @@ void	PairBench::initPhysics(const ConstructionInfo& ci)
 		m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg);
 		m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg);
 		//Method for validating the overlapping pairs requires that the
 		//reference broadphase does not maintain internal state aside from AABB data.
 		//That is, overwriting the AABB state in the broadphase using
 		//	b3GpuBroadphaseInterface::getAllAabbsGPU(),
 		//	b3GpuBroadphaseInterface::getSmallAabbIndicesGPU(), and
 		//	b3GpuBroadphaseInterface::getLargeAabbIndicesGPU()
 		//and then calling b3GpuBroadphaseInterface::calculateOverlappingPairs() should 
 		//always produce the same result regardless of the current state of the broadphase.
 		m_data->m_validationBroadphase = b3GpuParallelLinearBvhBroadphase::CreateFunc(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 	}
 	if (ci.m_window)
@@ -770,6 +780,17 @@ void PairBench::renderScene()
 	m_instancingRenderer->renderScene();
 }
 struct OverlappingPairSortPredicate 
 {
 	inline bool operator() (const b3Int4& a, const b3Int4& b) const 
 	{
 		if(a.x != b.x) return (a.x < b.x);
 		if(a.y != b.y) return (a.y < b.y);
 		if(a.z != b.z) return (a.z < b.z);
 		return (a.w < b.w);
 	}
 };
 void PairBench::clientMoveAndDisplay()
 {
 	//color all objects blue
@@ -904,6 +925,9 @@ void PairBench::clientMoveAndDisplay()
 		}
 	}
 	int prealloc = 3*1024*1024;
 	int maxOverlap = b3Min(prealloc,16*numObjects);
 	unsigned long dt = 0;
 	if (numObjects)
 	{
@@ -912,16 +936,104 @@ void PairBench::clientMoveAndDisplay()
 		B3_PROFILE("calculateOverlappingPairs");
 		int sz = sizeof(b3Int4)*64*numObjects;
 		int prealloc = 3*1024*1024;
 		int maxOverlap = b3Min(prealloc,16*numObjects);
 		m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap);
 		int numPairs = m_data->m_broadphaseGPU->getNumOverlap();
 		//printf("numPairs = %d\n", numPairs);
 		dt = cl.getTimeMicroseconds()-dt;
 	}
 	const bool VALIDATE_BROADPHASE = false;	//Check that overlapping pairs of 2 broadphases are the same
 	if(numObjects && VALIDATE_BROADPHASE)
 	{
 		B3_PROFILE("validate broadphases");
 		{
 			B3_PROFILE("calculateOverlappingPairs m_validationBroadphase");
 			//m_data->m_validationBroadphase->getAllAabbsCPU() = m_data->m_broadphaseGPU->getAllAabbsCPU();
 			m_data->m_validationBroadphase->getAllAabbsGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getAllAabbsGPU() );
 			m_data->m_validationBroadphase->getSmallAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getSmallAabbIndicesGPU() );
 			m_data->m_validationBroadphase->getLargeAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getLargeAabbIndicesGPU() );
 			m_data->m_validationBroadphase->calculateOverlappingPairs(maxOverlap);
 		}
 		static b3AlignedObjectArray<b3Int4> overlappingPairs;
 		static b3AlignedObjectArray<b3Int4> overlappingPairsReference;
 		m_data->m_broadphaseGPU->getOverlappingPairsGPU().copyToHost(overlappingPairs);
 		m_data->m_validationBroadphase->getOverlappingPairsGPU().copyToHost(overlappingPairsReference);
 		//Reorder pairs so that (pair.x < pair.y) is always true
 		{
 			B3_PROFILE("reorder pairs");
 			for(int i = 0; i < overlappingPairs.size(); ++i)
 			{
 				b3Int4 pair = overlappingPairs[i];
 				if(pair.x > pair.y)
 				{
 					b3Swap(pair.x, pair.y);
 					b3Swap(pair.z, pair.w);
 					overlappingPairs[i] = pair;
 				}
 			}
 			for(int i = 0; i < overlappingPairsReference.size(); ++i)
 			{
 				b3Int4 pair = overlappingPairsReference[i];
 				if(pair.x > pair.y)
 				{
 					b3Swap(pair.x, pair.y);
 					b3Swap(pair.z, pair.w);
 					overlappingPairsReference[i] = pair;
 				}
 			}
 		}
 		//
 		{
 			B3_PROFILE("Sort overlapping pairs from most to least significant bit");
 			overlappingPairs.quickSort( OverlappingPairSortPredicate() );
 			overlappingPairsReference.quickSort( OverlappingPairSortPredicate() );
 		}
 		//Compare
 		{
 			B3_PROFILE("compare pairs");
 			int numPairs = overlappingPairs.size();
 			int numPairsReference = overlappingPairsReference.size();
 			bool success = true;
 			if(numPairs == numPairsReference)
 			{
 				for(int i = 0; i < numPairsReference; ++i)
 				{
 					const b3Int4& pairA = overlappingPairs[i];
 					const b3Int4& pairB = overlappingPairsReference[i];
 					if(  pairA.x != pairB.x
 					  || pairA.y != pairB.y
 					  || pairA.z != pairB.z
 					  || pairA.w != pairB.w ) 
 					{
 						b3Error("Error: one or more overlappingPairs differs from reference.\n");
 						success = false;
 						break;
 					}
 				}
 			}
 			else 
 			{
 				b3Error("Error: numPairs %d != numPairsReference %d \n", numPairs, numPairsReference);
 				success = false;
 			}
 			printf("Broadphase validation: %d \n", success);
 		}
 	}
 	if (m_data->m_gui)
 	{
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
@@ -422,7 +422,8 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global
 //Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
 //If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
-//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.
+//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.
 //Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).
 #define B3_PLBVH_INVALID_COMMON_PREFIX 128
 #define B3_PLBVH_ROOT_NODE_MARKER -1
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
@@ -398,7 +398,8 @@ static const char* parallelLinearBvhCL= \
 "}\n"
 "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
 "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
-"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n"
+"//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n"
 "//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n"
 "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
 "#define B3_PLBVH_ROOT_NODE_MARKER -1\n"
 "#define b3Int64 long\n"
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
@@ -39,7 +39,7 @@ struct b3GpuRaycastInternalData
 	b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
 	b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
-	//1 element per (ray index, rigid index) pair
+	//1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
 	b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
 	b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs;	//x == ray index, y == rigid index