diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp
index 53995fd65..67a233b84 100644
--- a/Demos3/GpuDemos/broadphase/PairBench.cpp
+++ b/Demos3/GpuDemos/broadphase/PairBench.cpp
@@ -121,6 +121,7 @@ static BroadphaseEntry allBroadphases[]=
 struct	PairBenchInternalData
 {
 	b3GpuBroadphaseInterface*	m_broadphaseGPU;
+	b3GpuBroadphaseInterface*	m_validationBroadphase;
 
 	cl_kernel	m_moveObjectsKernel;
 	cl_kernel	m_sineWaveKernel;
@@ -507,7 +508,16 @@ void	PairBench::initPhysics(const ConstructionInfo& ci)
 		m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"sineWaveKernel",&errNum,pairBenchProg);
 		m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg);
 		m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg);
-			
+		
+		//Method for validating the overlapping pairs requires that the
+		//reference broadphase does not maintain internal state aside from AABB data.
+		//That is, overwriting the AABB state in the broadphase using
+		//	b3GpuBroadphaseInterface::getAllAabbsGPU(),
+		//	b3GpuBroadphaseInterface::getSmallAabbIndicesGPU(), and
+		//	b3GpuBroadphaseInterface::getLargeAabbIndicesGPU()
+		//and then calling b3GpuBroadphaseInterface::calculateOverlappingPairs() should 
+		//always produce the same result regardless of the current state of the broadphase.
+		m_data->m_validationBroadphase = b3GpuParallelLinearBvhBroadphase::CreateFunc(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 	}
 
 	if (ci.m_window)
@@ -770,6 +780,17 @@ void PairBench::renderScene()
 	m_instancingRenderer->renderScene();
 }
 
+struct OverlappingPairSortPredicate 
+{
+	inline bool operator() (const b3Int4& a, const b3Int4& b) const 
+	{
+		if(a.x != b.x) return (a.x < b.x);
+		if(a.y != b.y) return (a.y < b.y);
+		if(a.z != b.z) return (a.z < b.z);
+		return (a.w < b.w);
+	}
+};
+
 void PairBench::clientMoveAndDisplay()
 {
 	//color all objects blue
@@ -903,7 +924,10 @@ void PairBench::clientMoveAndDisplay()
 		
 		}
 	}
-
+	
+	int prealloc = 3*1024*1024;
+	int maxOverlap = b3Min(prealloc,16*numObjects);
+	
 	unsigned long dt = 0;
 	if (numObjects)
 	{
@@ -912,16 +936,104 @@ void PairBench::clientMoveAndDisplay()
 		B3_PROFILE("calculateOverlappingPairs");
 		int sz = sizeof(b3Int4)*64*numObjects;
 
-		int prealloc = 3*1024*1024;
-
-		int maxOverlap = b3Min(prealloc,16*numObjects);
 
 		m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap);
 		int numPairs = m_data->m_broadphaseGPU->getNumOverlap();
 		//printf("numPairs = %d\n", numPairs);
 		dt = cl.getTimeMicroseconds()-dt;
+		
 	}
 	
+	const bool VALIDATE_BROADPHASE = false;	//Check that overlapping pairs of 2 broadphases are the same
+	if(numObjects && VALIDATE_BROADPHASE)
+	{
+		B3_PROFILE("validate broadphases");
+			
+		{
+			B3_PROFILE("calculateOverlappingPairs m_validationBroadphase");
+			//m_data->m_validationBroadphase->getAllAabbsCPU() = m_data->m_broadphaseGPU->getAllAabbsCPU();
+			
+			m_data->m_validationBroadphase->getAllAabbsGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getAllAabbsGPU() );
+			m_data->m_validationBroadphase->getSmallAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getSmallAabbIndicesGPU() );
+			m_data->m_validationBroadphase->getLargeAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getLargeAabbIndicesGPU() );
+			
+			m_data->m_validationBroadphase->calculateOverlappingPairs(maxOverlap);
+		}
+		
+		static b3AlignedObjectArray<b3Int4> overlappingPairs;
+		static b3AlignedObjectArray<b3Int4> overlappingPairsReference;
+		m_data->m_broadphaseGPU->getOverlappingPairsGPU().copyToHost(overlappingPairs);
+		m_data->m_validationBroadphase->getOverlappingPairsGPU().copyToHost(overlappingPairsReference);
+		
+		//Reorder pairs so that (pair.x < pair.y) is always true
+		{
+			B3_PROFILE("reorder pairs");
+			
+			for(int i = 0; i < overlappingPairs.size(); ++i)
+			{
+				b3Int4 pair = overlappingPairs[i];
+				if(pair.x > pair.y)
+				{
+					b3Swap(pair.x, pair.y);
+					b3Swap(pair.z, pair.w);
+					overlappingPairs[i] = pair;
+				}
+			}
+			for(int i = 0; i < overlappingPairsReference.size(); ++i)
+			{
+				b3Int4 pair = overlappingPairsReference[i];
+				if(pair.x > pair.y)
+				{
+					b3Swap(pair.x, pair.y);
+					b3Swap(pair.z, pair.w);
+					overlappingPairsReference[i] = pair;
+				}
+			}
+		}
+		
+		//
+		{
+			B3_PROFILE("Sort overlapping pairs from most to least significant bit");
+			
+			overlappingPairs.quickSort( OverlappingPairSortPredicate() );
+			overlappingPairsReference.quickSort( OverlappingPairSortPredicate() );
+		}
+		
+		//Compare
+		{
+			B3_PROFILE("compare pairs");
+			
+			int numPairs = overlappingPairs.size();
+			int numPairsReference = overlappingPairsReference.size();
+			
+			bool success = true;
+			
+			if(numPairs == numPairsReference)
+			{
+				for(int i = 0; i < numPairsReference; ++i)
+				{
+					const b3Int4& pairA = overlappingPairs[i];
+					const b3Int4& pairB = overlappingPairsReference[i];
+					if(  pairA.x != pairB.x
+					  || pairA.y != pairB.y
+					  || pairA.z != pairB.z
+					  || pairA.w != pairB.w ) 
+					{
+						b3Error("Error: one or more overlappingPairs differs from reference.\n");
+						success = false;
+						break;
+					}
+				}
+			}
+			else 
+			{
+				b3Error("Error: numPairs %d != numPairsReference %d \n", numPairs, numPairsReference);
+				success = false;
+			}
+			
+			printf("Broadphase validation: %d \n", success);
+		}
+	}
 			
 	if (m_data->m_gui)
 	{
diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
index 0c7e1db69..586bb8abb 100644
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
@@ -422,7 +422,8 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global
 
 //Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
 //If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
-//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.
+//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.
+//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).
 #define B3_PLBVH_INVALID_COMMON_PREFIX 128
 
 #define B3_PLBVH_ROOT_NODE_MARKER -1
diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
index 37a1e8a5b..1b72803d3 100644
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
@@ -398,7 +398,8 @@ static const char* parallelLinearBvhCL= \
 "}\n"
 "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
 "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
-"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n"
+"//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n"
+"//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n"
 "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
 "#define B3_PLBVH_ROOT_NODE_MARKER -1\n"
 "#define b3Int64 long\n"
diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
index 294a20f74..4ef38bd1d 100644
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
@@ -39,7 +39,7 @@ struct b3GpuRaycastInternalData
 	b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
 	b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
 	
-	//1 element per (ray index, rigid index) pair
+	//1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
 	b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
 	b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs;	//x == ray index, y == rigid index