implement compound versus compound BVH acceleration (quantized tree-versus-tree, using subtrees and quantization) on host

This commit is contained in:
erwin coumans
2013-08-16 08:58:52 -07:00
parent b32ae0c75c
commit ef224370ab
4 changed files with 340 additions and 18 deletions

View File

@@ -46,5 +46,11 @@ struct b3Int2
};
};
inline b3Int2 b3MakeInt2(int x, int y)
{
b3Int2 v;
v.s[0] = x; v.s[1] = y;
return v;
}
#endif

View File

@@ -19,6 +19,7 @@ subject to the following restrictions:
///And contact clipping based on work from Simon Hobbs
//#define B3_DEBUG_SAT_FACE
//#define CHECK_ON_HOST
#ifdef CHECK_ON_HOST
@@ -1368,6 +1369,23 @@ void computeContactPlaneConvex(int pairIndex,
B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
{
b3Vector3 vecOut;
vecOut.setValue(
(b3Scalar)(vecIn[0]) / (quantization.getX()),
(b3Scalar)(vecIn[1]) / (quantization.getY()),
(b3Scalar)(vecIn[2]) / (quantization.getZ()));
vecOut += bvhAabbMin;
return vecOut;
}
void traverseTreeTree()
{
}
// work-in-progress
__kernel void findCompoundPairsKernel(
int pairIndex,
@@ -1384,7 +1402,10 @@ __kernel void findCompoundPairsKernel(
__global const b3GpuChildShape* gpuChildShapes,
__global b3Int4* gpuCompoundPairsOut,
__global int* numCompoundPairsOut,
int maxNumCompoundPairsCapacity
int maxNumCompoundPairsCapacity,
b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU
)
{
@@ -1403,6 +1424,186 @@ __kernel void findCompoundPairsKernel(
return;
}
if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
{
int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
float4 posA = rigidBodies[bodyIndexA].m_pos;
b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
b3Transform transA;
transA.setIdentity();
transA.setOrigin(posA);
transA.setRotation(ornA);
b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
float4 posB = rigidBodies[bodyIndexB].m_pos;
b3Transform transB;
transB.setIdentity();
transB.setOrigin(posB);
transB.setRotation(ornB);
for (int p=0;p<numSubTreesA;p++)
{
b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p];
//bvhInfoCPU[bvhA].m_quantization
b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
b3Vector3 aabbAMinOut,aabbAMaxOut;
float margin=0.f;
b3TransformAabb(treeAminLocal,treeAmaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut);
for (int q=0;q<numSubTreesB;q++)
{
b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q];
b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
b3Vector3 aabbBMinOut,aabbBMaxOut;
float margin=0.f;
b3TransformAabb(treeBminLocal,treeBmaxLocal, margin,transB,aabbBMinOut,aabbBMaxOut);
bool aabbOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
if (aabbOverlap)
{
int startNodeIndexA = subtreeA.m_rootNodeIndex;
int endNodeIndexA = subtreeA.m_rootNodeIndex+subtreeA.m_subtreeSize;
int startNodeIndexB = subtreeB.m_rootNodeIndex;
int endNodeIndexB = subtreeB.m_rootNodeIndex+subtreeB.m_subtreeSize;
b3AlignedObjectArray<b3Int2> nodeStack;
b3Int2 node0;
node0.x = startNodeIndexA;
node0.y = startNodeIndexB;
int maxStackDepth = 1024;
nodeStack.resize(maxStackDepth);
int depth=0;
nodeStack[depth++]=node0;
do
{
b3Int2 node = nodeStack[--depth];
b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin);
b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin);
float margin=0.f;
b3Vector3 aabbAMinOut,aabbAMaxOut;
b3TransformAabb(aMinLocal,aMaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut);
b3Vector3 aabbBMinOut,aabbBMaxOut;
b3TransformAabb(bMinLocal,bMaxLocal, margin,transB,aabbBMinOut,aabbBMaxOut);
bool nodeOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
if (nodeOverlap)
{
bool isLeafA = treeNodesCPU[node.x].isLeafNode();
bool isLeafB = treeNodesCPU[node.y].isLeafNode();
bool isInternalA = !isLeafA;
bool isInternalB = !isLeafB;
//fail, even though it might hit two leaf nodes
if (depth+4>maxStackDepth && !(isLeafA && isLeafB))
{
b3Error("Error: traversal exceeded maxStackDepth\n");
continue;
}
if(isInternalA)
{
int nodeAleftChild = node.x+1;
bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode();
int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x + treeNodesCPU[node.x+1].getEscapeIndex();
if(isInternalB)
{
int nodeBleftChild = node.y+1;
bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode();
int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y + treeNodesCPU[node.y+1].getEscapeIndex();
nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
}
else
{
nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);
nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);
}
}
else
{
if(isInternalB)
{
int nodeBleftChild = node.y+1;
bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode();
int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y + treeNodesCPU[node.y+1].getEscapeIndex();
nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);
nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);
}
else
{
int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
if (compoundPairIdx<maxNumCompoundPairsCapacity)
{
int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
}
}
}
}
} while (depth);
}
/*
for (i=0;i<this->m_SubtreeHeaders.size();i++)
{
const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
//PCK: unsigned instead of bool
unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
if (overlap != 0)
{
walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
subtree.m_rootNodeIndex,
subtree.m_rootNodeIndex+subtree.m_subtreeSize);
}
}
*/
/*bvhInfoCPU[bvhA].m_numNodes;
bvhInfoCPU[bvhA].m_nodeOffset
b3AlignedObjectArray<b3Int2> nodeStack;
b3Int2 n;n.x =
nodeStack.push_back(
*/
}
}
return;
}
if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
{
@@ -1847,7 +2048,11 @@ void computeContactCompoundCompound(int pairIndex,
b3Contact4* globalContactsOut,
int& nGlobalContactsOut,
int maxContactCapacity)
int maxContactCapacity,
b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU
)
{
int shapeTypeB = collidables[collidableIndexB].m_shapeType;
@@ -1858,7 +2063,6 @@ void computeContactCompoundCompound(int pairIndex,
int maxNumCompoundPairsCapacity = 1024;
cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);
// work-in-progress
findCompoundPairsKernel(
pairIndex,
@@ -1873,7 +2077,11 @@ void computeContactCompoundCompound(int pairIndex,
cpuChildShapes,
&cpuCompoundPairsOut[0],
&numCompoundPairsOut,
maxNumCompoundPairsCapacity );
maxNumCompoundPairsCapacity ,
treeNodesCPU,
subTreesCPU,
bvhInfoCPU
);
b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
@@ -2543,7 +2751,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
b3OpenCLArray<b3BvhInfo>* bvhInfo,
@@ -2560,6 +2768,17 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
#ifdef CHECK_ON_HOST
b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
treeNodesGPU->copyToHost(treeNodesCPU);
b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
subTreesGPU->copyToHost(subTreesCPU);
b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
bvhInfo->copyToHost(bvhInfoCPU);
b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
@@ -2655,7 +2874,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0],
&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],nContacts,maxContactCapacity);
&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],
nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);
// printf("convex-plane\n");
}

View File

@@ -13,6 +13,8 @@
#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h"
#include "b3GpuNarrowPhaseInternalData.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h"
@@ -417,8 +419,9 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShap
b3Collidable& col = getCollidableCpu(collidableIndex);
col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS;
col.m_shapeIndex = m_data->m_cpuChildShapes.size();
col.m_compoundBvhIndex = m_data->m_bvhInfoCPU.size();
{
b3Assert(col.m_shapeIndex+childShapes->size()<m_data->m_config.m_maxCompoundChildShapes);
for (int i=0;i<childShapes->size();i++)
@@ -432,10 +435,13 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShap
col.m_numChildShapes = childShapes->size();
b3SapAabb aabbWS;
b3SapAabb aabbLocalSpace;
b3Vector3 myAabbMin(1e30f,1e30f,1e30f);
b3Vector3 myAabbMax(-1e30f,-1e30f,-1e30f);
b3AlignedObjectArray<b3Aabb> childLocalAabbs;
childLocalAabbs.resize(childShapes->size());
//compute local AABB of the compound of all children
for (int i=0;i<childShapes->size();i++)
{
@@ -460,19 +466,109 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShap
b3TransformAabb(childLocalAabbMin,childLocalAabbMax,margin,childTr,aMin,aMax);
myAabbMin.setMin(aMin);
myAabbMax.setMax(aMax);
childLocalAabbs[i].m_min[0] = aMin[0];
childLocalAabbs[i].m_min[1] = aMin[1];
childLocalAabbs[i].m_min[2] = aMin[2];
childLocalAabbs[i].m_min[3] = 0;
childLocalAabbs[i].m_max[0] = aMax[0];
childLocalAabbs[i].m_max[1] = aMax[1];
childLocalAabbs[i].m_max[2] = aMax[2];
childLocalAabbs[i].m_max[3] = 0;
}
aabbWS.m_min[0] = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x;
aabbWS.m_min[1]= myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y;
aabbWS.m_min[2]= myAabbMin[2];//s_convexHeightField->m_aabb.m_min.z;
aabbWS.m_minIndices[3] = 0;
aabbLocalSpace.m_min[0] = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x;
aabbLocalSpace.m_min[1]= myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y;
aabbLocalSpace.m_min[2]= myAabbMin[2];//s_convexHeightField->m_aabb.m_min.z;
aabbLocalSpace.m_minIndices[3] = 0;
aabbWS.m_max[0] = myAabbMax[0];//s_convexHeightField->m_aabb.m_max.x;
aabbWS.m_max[1]= myAabbMax[1];//s_convexHeightField->m_aabb.m_max.y;
aabbWS.m_max[2]= myAabbMax[2];//s_convexHeightField->m_aabb.m_max.z;
aabbWS.m_signedMaxIndices[3] = 0;
aabbLocalSpace.m_max[0] = myAabbMax[0];//s_convexHeightField->m_aabb.m_max.x;
aabbLocalSpace.m_max[1]= myAabbMax[1];//s_convexHeightField->m_aabb.m_max.y;
aabbLocalSpace.m_max[2]= myAabbMax[2];//s_convexHeightField->m_aabb.m_max.z;
aabbLocalSpace.m_signedMaxIndices[3] = 0;
m_data->m_localShapeAABBCPU->push_back(aabbWS);
m_data->m_localShapeAABBCPU->push_back(aabbLocalSpace);
b3QuantizedBvh* bvh = new b3QuantizedBvh;
bvh->setQuantizationValues(myAabbMin,myAabbMax);
QuantizedNodeArray& nodes = bvh->getLeafNodeArray();
int numNodes = childShapes->size();
for (int i=0;i<numNodes;i++)
{
b3QuantizedBvhNode node;
b3Vector3 aabbMin,aabbMax;
aabbMin = (b3Vector3&) childLocalAabbs[i].m_min;
aabbMax = (b3Vector3&) childLocalAabbs[i].m_max;
bvh->quantize(&node.m_quantizedAabbMin[0],aabbMin,0);
bvh->quantize(&node.m_quantizedAabbMax[0],aabbMax,1);
int partId = 0;
node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | i;
nodes.push_back(node);
}
bvh->buildInternal();
int numSubTrees = bvh->getSubtreeInfoArray().size();
//void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0));
//QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; }
///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized
//void buildInternal();
b3BvhInfo bvhInfo;
bvhInfo.m_aabbMin = bvh->m_bvhAabbMin;
bvhInfo.m_aabbMax = bvh->m_bvhAabbMax;
bvhInfo.m_quantization = bvh->m_bvhQuantization;
bvhInfo.m_numNodes = numNodes;
bvhInfo.m_numSubTrees = numSubTrees;
bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size();
bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size();
int numNewNodes = bvh->getQuantizedNodeArray().size();
for (int i=0;i<numNewNodes-1;i++)
{
if (bvh->getQuantizedNodeArray()[i].isLeafNode())
{
int orgIndex = bvh->getQuantizedNodeArray()[i].getTriangleIndex();
b3Vector3 nodeMinVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMin);
b3Vector3 nodeMaxVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMax);
for (int c=0;c<3;c++)
{
if (childLocalAabbs[orgIndex].m_min[c] < nodeMinVec[c])
{
printf("min org (%f) and new (%f) ? at i:%d,c:%d\n",childLocalAabbs[i].m_min[c],nodeMinVec[c],i,c);
}
if (childLocalAabbs[orgIndex].m_max[c] > nodeMaxVec[c])
{
printf("max org (%f) and new (%f) ? at i:%d,c:%d\n",childLocalAabbs[i].m_max[c],nodeMaxVec[c],i,c);
}
}
}
}
m_data->m_bvhInfoCPU.push_back(bvhInfo);
int numNewSubtrees = bvh->getSubtreeInfoArray().size();
m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size()+numNewSubtrees);
for (int i=0;i<numNewSubtrees;i++)
{
m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]);
}
int numNewTreeNodes = bvh->getQuantizedNodeArray().size();
for (int i=0;i<numNewTreeNodes;i++)
{
m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]);
}
// m_data->m_localShapeAABBGPU->push_back(aabbWS);
clFinish(m_queue);
return collidableIndex;

View File

@@ -33,7 +33,7 @@ subject to the following restrictions:
#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
bool useDbvt = false;//true;
bool useDbvt = true;//false;//true;
bool useBullet2CpuSolver = true;
bool dumpContactStats = false;