diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp index d1a49dcdb..adabce684 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp @@ -12,6 +12,10 @@ #include "SpuContactResult.h" #include "BulletCollision/CollisionShapes/btOptimizedBvh.h" #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" +#include "BulletCollision/CollisionShapes/btSphereShape.h" + + + #include "BulletCollision/CollisionShapes/btConvexShape.h" #include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h" #include "BulletCollision/CollisionShapes/btConvexHullShape.h" @@ -83,8 +87,390 @@ void* createCollisionLocalStoreMemory() }; +void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts); +inline bool spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) +{ + bool overlap = true; + overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap; + overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap; + overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap; + return overlap; +} + + +void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex) +{ + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + + int escapeIndex; + + bool aabbOverlap, isLeafNode; + + while (curIndex < endNodeIndex) + { + //catch bugs in tree data + assert (walkIterations < subTreeSize); + + walkIterations++; + aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + + if (isLeafNode && aabbOverlap) + { + nodeCallback->processNode(0,rootNode->getTriangleIndex()); +// spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex()); + } + + if (aabbOverlap || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + +} + + +void small_cache_read(void* buffer, uint64_t ea, size_t size) +{ +#if USE_SOFTWARE_CACHE + // Check for alignment requirements. We need to make sure the entire request fits within one cache line, + // so the first and last bytes should fall on the same cache line + btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); + + void* ls = spe_cache_read(ea); + memcpy(buffer, ls, size); +#else + cellDmaLargeGet(buffer, ea, size, DMA_TAG(16), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(16)); +#endif +} + + + +class spuNodeCallback : public btNodeOverlapCallback +{ + SpuCollisionPairInput* m_wuInput; + SpuContactResult& m_spuContacts; + CollisionTask_LocalStoreMemory* m_lsMemPtr; + + ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); + ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); + ATTRIBUTE_ALIGNED16(int spuIndices[16]); + + +public: + spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts) + : m_wuInput(wuInput), + m_lsMemPtr(lsMemPtr), + m_spuContacts(spuContacts) + { + } + + virtual void processNode(int subPart, int triangleIndex) + { + ///Create a triangle on the stack, call process collision, with GJK + ///DMA the vertices, can benefit from software caching + +// spu_printf("processNode with triangleIndex %d\n",triangleIndex); + + + + int* indexBasePtr = (int*)(m_lsMemPtr->gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->gIndexMesh.m_triangleIndexStride); + + ///DMA the indices + small_cache_read(&m_lsMemPtr->spuIndices[0],(uint64_t)&indexBasePtr[0],sizeof(int)); + small_cache_read(&m_lsMemPtr->spuIndices[1],(uint64_t)&indexBasePtr[1],sizeof(int)); + small_cache_read(&m_lsMemPtr->spuIndices[2],(uint64_t)&indexBasePtr[2],sizeof(int)); + +// spu_printf("SPU index0=%d ,",spuIndices[0]); +// spu_printf("SPU index1=%d ,",spuIndices[1]); +// spu_printf("SPU index2=%d ,",spuIndices[2]); +// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); + + const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterface.getScaling(); + for (int j=2;j>=0;j--) + { + int graphicsindex = m_lsMemPtr->spuIndices[j]; + +// spu_printf("SPU index=%d ,",graphicsindex); + btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->gIndexMesh.m_vertexStride); +// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); + + + ///handle un-aligned vertices... + + //another DMA for each vertex + small_cache_read(&m_lsMemPtr->spuUnscaledVertex[0],(uint64_t)&graphicsbasePtr[0],sizeof(btScalar)); + small_cache_read(&m_lsMemPtr->spuUnscaledVertex[1],(uint64_t)&graphicsbasePtr[1],sizeof(btScalar)); + small_cache_read(&m_lsMemPtr->spuUnscaledVertex[2],(uint64_t)&graphicsbasePtr[2],sizeof(btScalar)); + + spuTriangleVertices[j] = btVector3( + spuUnscaledVertex[0]*meshScaling.getX(), + spuUnscaledVertex[1]*meshScaling.getY(), + spuUnscaledVertex[2]*meshScaling.getZ()); + +// spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); + } + + + + //btTriangleShape tmpTriangleShape(spuTriangleVertices[0],spuTriangleVertices[1],spuTriangleVertices[2]); + + + SpuCollisionPairInput triangleConcaveInput(*m_wuInput); + triangleConcaveInput.m_spuCollisionShapes[1] = &spuTriangleVertices[0]; + triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE; + + m_spuContacts.setShapeIdentifiers(-1,-1,subPart,triangleIndex); + +// m_spuContacts.flush(); + + ProcessSpuConvexConvexCollision(&triangleConcaveInput, m_lsMemPtr,m_spuContacts); + ///this flush should be automatic + // m_spuContacts.flush(); + } + +}; + + + + +//////////////////////// +/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc) +/////////////////// +void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) +{ + //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite + + + + btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1]; + //need the mesh interface, for access to triangle vertices + { + int dmaSize = sizeof(btTriangleIndexVertexArray); + uint64_t dmaPpuAddress2 = reinterpret_cast(trimeshShape->getMeshInterface()); + // spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2); + cellDmaGet(&lsMemPtr->gTriangleMeshInterface, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + ///now DMA over the BVH + { + int dmaSize = sizeof(btOptimizedBvh); + uint64_t dmaPpuAddress2 = reinterpret_cast(trimeshShape->getOptimizedBvh()); + //spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2); + cellDmaGet(&lsMemPtr->gOptimizedBvh, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + btVector3 aabbMin(-1,-400,-1); + btVector3 aabbMax(1,400,1); + + + //recalc aabbs + btTransform convexInTriangleSpace; + convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0; + btConvexShape* convexShape = (btConvexShape*)wuInput->m_spuCollisionShapes[0]; + //calculate the aabb, given the types... + switch (wuInput->m_shapeType0) + { + case CYLINDER_SHAPE_PROXYTYPE: + + case BOX_SHAPE_PROXYTYPE: + { + float margin=convexShape->getMarginNV(); + btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); + btTransform& t = wuInput->m_worldTransform0; + btMatrix3x3 abs_b = t.getBasis().absolute(); + btPoint3 center = t.getOrigin(); + btVector3 extent = btVector3(abs_b[0].dot(halfExtents), + abs_b[1].dot(halfExtents), + abs_b[2].dot(halfExtents)); + extent += btVector3(margin,margin,margin); + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + + case CAPSULE_SHAPE_PROXYTYPE: + { + float margin=convexShape->getMarginNV(); + btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); + //add the radius to y-axis to get full height + btScalar radius = halfExtents[0]; + halfExtents[1] += radius; + btTransform& t = wuInput->m_worldTransform0; + btMatrix3x3 abs_b = t.getBasis().absolute(); + btPoint3 center = t.getOrigin(); + btVector3 extent = btVector3(abs_b[0].dot(halfExtents), + abs_b[1].dot(halfExtents), + abs_b[2].dot(halfExtents)); + extent += btVector3(margin,margin,margin); + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + + + case SPHERE_SHAPE_PROXYTYPE: + { + float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX(); + float margin = radius + convexShape->getMarginNV(); + btTransform& t = wuInput->m_worldTransform0; + const btVector3& center = t.getOrigin(); + btVector3 extent(margin,margin,margin); + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + case CONVEX_HULL_SHAPE_PROXYTYPE: + { + int dmaSize = sizeof(btConvexHullShape); + uint64_t dmaPpuAddress2 = wuInput->m_collisionShapes[0]; + + ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); + + cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; + btTransform& t = wuInput->m_worldTransform0; + + btScalar margin = convexShape->getMarginNV(); + + localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin); + + //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); + //spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ()); + + break; + } + + default: + spu_printf("SPU: unsupported shapetype %d in AABB calculation\n"); + }; + + //CollisionShape* triangleShape = static_cast(triBody->m_collisionShape); + //convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax); + +// btScalar extraMargin = collisionMarginTriangle; +// btVector3 extra(extraMargin,extraMargin,extraMargin); +// aabbMax += extra; +// aabbMin -= extra; + + + + ///quantize query AABB + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + lsMemPtr->gOptimizedBvh.quantizeWithClamp(quantizedQueryAabbMin,aabbMin); + lsMemPtr->gOptimizedBvh.quantizeWithClamp(quantizedQueryAabbMax,aabbMax); + + QuantizedNodeArray& nodeArray = lsMemPtr->gOptimizedBvh.getQuantizedNodeArray(); + //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); + + BvhSubtreeInfoArray& subTrees = lsMemPtr->gOptimizedBvh.getSubtreeInfoArray(); + + spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts); + IndexedMeshArray& indexArray = lsMemPtr->gTriangleMeshInterface.getIndexedMeshArray(); + //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); + + +// spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); + //not likely to happen + if (subTrees.size() && indexArray.size() == 1) + { + ///DMA in the index info + { + int dmaSize = sizeof(btIndexedMesh); + uint64_t dmaPpuAddress2 = reinterpret_cast(&indexArray[0]); + cellDmaGet(&lsMemPtr->gIndexMesh, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + //spu_printf("SPU gIndexMesh dma finished\n"); + + //display the headers + int numBatch = subTrees.size(); + for (int i=0;i(&subTrees[i]); +// spu_printf("&subtree[i]=%llx, dmaSize = %d\n",dmaPpuAddress2,dmaSize); + cellDmaGet(&lsMemPtr->gSubtreeHeaders[0], dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + +// spu_printf("nextBatch = %d\n",nextBatch); + + for (int j=0;jgSubtreeHeaders[j]; + + bool overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap) + { + btAssert(subtree.m_subtreeSize); + + //dma the actual nodes of this subtree + { + int dmaSize = subtree.m_subtreeSize* sizeof(btQuantizedBvhNode); + uint64_t dmaPpuAddress2 = reinterpret_cast(&nodeArray[subtree.m_rootNodeIndex]); + cellDmaGet(&lsMemPtr->gSubtreeNodes[0], dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + + + spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, + &lsMemPtr->gSubtreeNodes[0], + 0, + subtree.m_subtreeSize); + + } + + +// spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); + } + + // unsigned short int m_quantizedAabbMin[3]; + // unsigned short int m_quantizedAabbMax[3]; + // int m_rootNodeIndex; + // int m_subtreeSize; + i+=nextBatch; + } + + //pre-fetch first tree, then loop and double buffer + + + + } + +} + + + + + + + +//////////////////////// +/// Convex versus Convex collision detection (handles collision between sphere, box, cylinder, triangle, cone, convex polyhedron etc) +/////////////////// void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) { @@ -374,13 +760,13 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) { { - int dmaSize = lsMem.maxShapeSize; + int dmaSize = sizeof(btSphereShape);//lsMem.maxShapeSize; uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj0.getCollisionShape(); cellDmaGet(lsMem.gCollisionShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } { - int dmaSize = lsMem.maxShapeSize; + int dmaSize = sizeof(btSphereShape);//lsMem.maxShapeSize; uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj1.getCollisionShape(); cellDmaGet(lsMem.gCollisionShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(2)); @@ -429,14 +815,14 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) ///dma and initialize the convex object { - int dmaSize = lsMem.maxShapeSize; + int dmaSize = sizeof(btSphereShape);//lsMem.maxShapeSize; uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj0.getCollisionShape(); cellDmaGet(lsMem.gCollisionShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); } - ///dma and initialize the convex object + ///dma and initialize the concave object { - int dmaSize = lsMem.maxShapeSize; + int dmaSize = sizeof(btBvhTriangleMeshShape);//lsMem.maxShapeSize; uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj1.getCollisionShape(); // spu_printf("SPU: trimesh = %llx\n",dmaPpuAddress2); cellDmaGet(lsMem.gCollisionShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); @@ -452,8 +838,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0; collisionPairInput.m_spuCollisionShapes[1] = trimeshShape; - btAssert(0); - //ProcessConvexConcaveSpuCollision(&collisionPairInput,spuContacts); + ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts); } }