implement compound versus compound BVH acceleration (quantized tree-versus-tree, using subtrees and quantization) on host

2013-08-16 08:58:52 -07:00
parent b32ae0c75c
commit ef224370ab
4 changed files with 340 additions and 18 deletions
--- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
@@ -19,6 +19,7 @@ subject to the following restrictions:
 ///And contact clipping based on work from Simon Hobbs

 //#define B3_DEBUG_SAT_FACE
+
 //#define CHECK_ON_HOST

 #ifdef CHECK_ON_HOST
@@ -1368,6 +1369,23 @@ void computeContactPlaneConvex(int pairIndex,



+B3_FORCE_INLINE b3Vector3	MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
+	{
+			b3Vector3	vecOut;
+			vecOut.setValue(
+			(b3Scalar)(vecIn[0]) / (quantization.getX()),
+			(b3Scalar)(vecIn[1]) / (quantization.getY()),
+			(b3Scalar)(vecIn[2]) / (quantization.getZ()));
+			vecOut += bvhAabbMin;
+			return vecOut;
+	}
+
+void traverseTreeTree()
+{
+
+}
+
+
 // work-in-progress
 __kernel void   findCompoundPairsKernel( 
 	int pairIndex,
@@ -1384,7 +1402,10 @@ __kernel void   findCompoundPairsKernel(
 	__global const b3GpuChildShape* gpuChildShapes,
 	__global b3Int4* gpuCompoundPairsOut,
 	__global  int* numCompoundPairsOut,
-	int maxNumCompoundPairsCapacity
+	int maxNumCompoundPairsCapacity,
+	b3AlignedObjectArray<b3QuantizedBvhNode>&	treeNodesCPU,
+	b3AlignedObjectArray<b3BvhSubtreeInfo>&	subTreesCPU,
+	b3AlignedObjectArray<b3BvhInfo>&	bvhInfoCPU
 	)
 {

@@ -1403,6 +1424,186 @@ __kernel void   findCompoundPairsKernel(
 			return;
 		}

+		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
+		{
+			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
+			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
+			int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
+			int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
+			int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
+
+
+			int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
+			
+			float4 posA = rigidBodies[bodyIndexA].m_pos;
+			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
+
+			b3Transform transA;
+			transA.setIdentity();
+			transA.setOrigin(posA);
+			transA.setRotation(ornA);
+
+			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
+			float4 posB = rigidBodies[bodyIndexB].m_pos;
+
+			b3Transform transB;
+			transB.setIdentity();
+			transB.setOrigin(posB);
+			transB.setRotation(ornB);
+
+
+
+			for (int p=0;p<numSubTreesA;p++)
+			{
+				b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p];
+				//bvhInfoCPU[bvhA].m_quantization
+				b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
+				b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
+
+				b3Vector3 aabbAMinOut,aabbAMaxOut;
+				float margin=0.f;
+				b3TransformAabb(treeAminLocal,treeAmaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut);
+
+				for (int q=0;q<numSubTreesB;q++)
+				{
+					b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q];
+
+					b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
+					b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
+
+					b3Vector3 aabbBMinOut,aabbBMaxOut;
+					float margin=0.f;
+					b3TransformAabb(treeBminLocal,treeBmaxLocal, margin,transB,aabbBMinOut,aabbBMaxOut);
+
+					bool aabbOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
+					if (aabbOverlap)
+					{
+						
+						int startNodeIndexA = subtreeA.m_rootNodeIndex;
+						int endNodeIndexA = subtreeA.m_rootNodeIndex+subtreeA.m_subtreeSize;
+
+						int startNodeIndexB = subtreeB.m_rootNodeIndex;
+						int endNodeIndexB = subtreeB.m_rootNodeIndex+subtreeB.m_subtreeSize;
+
+						b3AlignedObjectArray<b3Int2> nodeStack;
+						b3Int2 node0;
+						node0.x = startNodeIndexA;
+						node0.y = startNodeIndexB;
+
+						int maxStackDepth = 1024;
+						nodeStack.resize(maxStackDepth);
+						int depth=0;
+						nodeStack[depth++]=node0;
+
+						do
+						{
+							b3Int2 node = nodeStack[--depth];
+
+							b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
+							b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
+
+							b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
+							b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
+
+							float margin=0.f;
+							b3Vector3 aabbAMinOut,aabbAMaxOut;
+							b3TransformAabb(aMinLocal,aMaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut);
+
+							b3Vector3 aabbBMinOut,aabbBMaxOut;
+							b3TransformAabb(bMinLocal,bMaxLocal, margin,transB,aabbBMinOut,aabbBMaxOut);
+
+							bool nodeOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
+							if (nodeOverlap)
+							{
+								bool isLeafA = treeNodesCPU[node.x].isLeafNode();
+								bool isLeafB = treeNodesCPU[node.y].isLeafNode();
+								bool isInternalA = !isLeafA;
+								bool isInternalB = !isLeafB;
+
+								//fail, even though it might hit two leaf nodes
+								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))
+								{
+									b3Error("Error: traversal exceeded maxStackDepth\n");
+									continue;
+								}
+
+								if(isInternalA)
+								{
+									int nodeAleftChild = node.x+1;
+									bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode();
+									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x + treeNodesCPU[node.x+1].getEscapeIndex();
+
+									if(isInternalB)
+									{					
+										int nodeBleftChild = node.y+1;
+										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode();
+										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y + treeNodesCPU[node.y+1].getEscapeIndex();
+
+										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
+										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
+										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
+										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
+									}
+									else
+									{
+										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);
+										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);
+									}
+								}
+								else
+								{
+									if(isInternalB)
+									{
+										int nodeBleftChild = node.y+1;
+										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode();
+										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y + treeNodesCPU[node.y+1].getEscapeIndex();
+										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);
+										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);
+									}
+									else
+									{
+										int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
+										if (compoundPairIdx<maxNumCompoundPairsCapacity)
+										{
+											int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
+											int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
+											gpuCompoundPairsOut[compoundPairIdx]  = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
+										}
+									}
+								}
+							}
+						} while (depth);
+					}
+
+					/*
+					for (i=0;i<this->m_SubtreeHeaders.size();i++)
+					{
+						const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
+
+						//PCK: unsigned instead of bool
+						unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
+						if (overlap != 0)
+						{
+							walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
+								subtree.m_rootNodeIndex,
+								subtree.m_rootNodeIndex+subtree.m_subtreeSize);
+						}
+					}
+					*/
+					
+					/*bvhInfoCPU[bvhA].m_numNodes;
+					bvhInfoCPU[bvhA].m_nodeOffset
+
+					b3AlignedObjectArray<b3Int2> nodeStack;
+					b3Int2 n;n.x = 
+					nodeStack.push_back(
+					*/
+
+				}
+			}
+			return;
+		}
+
 		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
 		{

@@ -1847,7 +2048,11 @@ void computeContactCompoundCompound(int pairIndex,
 																
 																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
-																int maxContactCapacity)
+																int maxContactCapacity,
+																b3AlignedObjectArray<b3QuantizedBvhNode>&	treeNodesCPU,
+																b3AlignedObjectArray<b3BvhSubtreeInfo>&	subTreesCPU,
+																b3AlignedObjectArray<b3BvhInfo>&	bvhInfoCPU
+																)
 {

 	int shapeTypeB = collidables[collidableIndexB].m_shapeType;
@@ -1858,7 +2063,6 @@ void computeContactCompoundCompound(int pairIndex,
 	int maxNumCompoundPairsCapacity = 1024;
 	cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);

-
 	// work-in-progress
 	findCompoundPairsKernel( 
 							pairIndex,
@@ -1873,7 +2077,11 @@ void computeContactCompoundCompound(int pairIndex,
 							cpuChildShapes,
 							&cpuCompoundPairsOut[0],
 							&numCompoundPairsOut,
-							maxNumCompoundPairsCapacity	);
+							maxNumCompoundPairsCapacity	,
+							treeNodesCPU,
+							subTreesCPU,
+							bvhInfoCPU
+							);

 	b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
 	b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
@@ -2543,7 +2751,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
            b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
            b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
            b3OpenCLArray<b3Vector3>& worldVertsB2GPU,    
-			b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
+			b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
 			b3OpenCLArray<b3QuantizedBvhNode>*	treeNodesGPU,
 			b3OpenCLArray<b3BvhSubtreeInfo>*	subTreesGPU,
 			b3OpenCLArray<b3BvhInfo>*	bvhInfo,
@@ -2560,6 +2768,17 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*


 #ifdef CHECK_ON_HOST
+
+
+	b3AlignedObjectArray<b3QuantizedBvhNode>	treeNodesCPU;
+	treeNodesGPU->copyToHost(treeNodesCPU);
+
+	b3AlignedObjectArray<b3BvhSubtreeInfo>	subTreesCPU;
+	subTreesGPU->copyToHost(subTreesCPU);
+
+	b3AlignedObjectArray<b3BvhInfo>	bvhInfoCPU;
+	bvhInfo->copyToHost(bvhInfoCPU);
+
 	b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
 	clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);

@@ -2655,7 +2874,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
 		{
 			computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0],
-			&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],nContacts,maxContactCapacity);
+			&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],
+			nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);	
 //			printf("convex-plane\n");
 			
 		}