#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" #include "BulletCollision/CollisionDispatch/btCollisionObject.h" #include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h" #include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h" #include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h" #include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h" #include "BulletCollision/CollisionShapes/btConvexShape.h" #include "BulletCollision/CollisionShapes/btOptimizedBvh.h" #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" #include "BulletCollision/CollisionShapes/btSphereShape.h" #include "BulletCollision/CollisionShapes/btTriangleShape.h" #include "BulletCollision/CollisionShapes/btCapsuleShape.h" #include "BulletCollision/CollisionShapes/btConvexShape.h" #include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h" #include "BulletCollision/CollisionShapes/btConvexHullShape.h" #include "BulletCollision/CollisionShapes/btCompoundShape.h" #include "BulletCollision/CollisionShapes/btConvexPointCloudShape.h" #include "SpuGatheringCollisionTask.h" //#define DEBUG_SPU_COLLISION_DETECTION 1 #include "../SpuDoubleBuffer.h" #include "../SpuCollisionTaskProcess.h" #include "../SpuGatheringCollisionDispatcher.h" //for SPU_BATCHSIZE_BROADPHASE_PAIRS #include "../SpuContactManifoldCollisionAlgorithm.h" #include "SpuContactResult.h" #include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData #ifdef __SPU__ ///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases #ifndef USE_LIBSPE2 #define USE_SOFTWARE_CACHE 1 #endif #endif //__SPU__ //////////////////////////////////////////////// /// software caching #if USE_SOFTWARE_CACHE #include #include #include #include #define SPE_CACHE_NWAY 4 //#define SPE_CACHE_NSETS 32, 16 #define SPE_CACHE_NSETS 8 //#define SPE_CACHELINE_SIZE 512 #define SPE_CACHELINE_SIZE 128 #define SPE_CACHE_SET_TAGID(set) 15 ///make sure that spe_cache.h is below those defines! #include "spe_cache.h" int g_CacheMisses=0; int g_CacheHits=0; #if 0 // Added to allow cache misses and hits to be tracked, change this to 1 to restore unmodified version #define spe_cache_read(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1) #else #define spe_cache_read(ea) \ ({ \ int set, idx, line, byte; \ _spe_cache_nway_lookup_(ea, set, idx); \ \ if (btUnlikely(idx < 0)) { \ ++g_CacheMisses; \ idx = _spe_cache_miss_(ea, set, -1); \ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ spu_mfcstat(MFC_TAG_UPDATE_ALL); \ } \ else \ { \ ++g_CacheHits; \ } \ line = _spe_cacheline_num_(set, idx); \ byte = _spe_cacheline_byte_offset_(ea); \ (void *) &spe_cache_mem[line + byte]; \ }) #endif #endif // USE_SOFTWARE_CACHE bool gUseEpa = false; #ifdef USE_SN_TUNER #include #endif //USE_SN_TUNER #if defined (__CELLOS_LV2__) || defined (USE_LIBSPE2) #else #define IGNORE_ALIGNMENT 1 #define spu_printf printf #include #endif //int gNumConvexPoints0=0; ///Make sure no destructors are called on this memory struct CollisionTask_LocalStoreMemory { ATTRIBUTE_ALIGNED16(char bufferProxy0[16]); ATTRIBUTE_ALIGNED16(char bufferProxy1[16]); ATTRIBUTE_ALIGNED16(btBroadphaseProxy* gProxyPtr0); ATTRIBUTE_ALIGNED16(btBroadphaseProxy* gProxyPtr1); btBroadphaseProxy* getProxyPtr0 () { return (btBroadphaseProxy*)bufferProxy0; } btBroadphaseProxy* getProxyPtr1 () { return (btBroadphaseProxy*)bufferProxy1; } ATTRIBUTE_ALIGNED16(char gColObj0 [sizeof(btCollisionObject)+16]); ATTRIBUTE_ALIGNED16(char gColObj1 [sizeof(btCollisionObject)+16]); btCollisionObject* getColObj0() { return (btCollisionObject*) gColObj0; } btCollisionObject* getColObj1() { return (btCollisionObject*) gColObj1; } DoubleBuffer g_workUnitTaskBuffers; ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairs[SPU_BATCHSIZE_BROADPHASE_PAIRS]); SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() { return (SpuContactManifoldCollisionAlgorithm*)&gSpuContactManifoldAlgo; } btPersistentManifold gPersistentManifold; SpuInternalShape gInternalShapes[3]; // the third is temporary storage SpuInternalConvexHull gInternalConvexHull[3]; // the third is for temporary storage ///we reserve 32bit integer indices, even though they might be 16bit ATTRIBUTE_ALIGNED16(int spuIndices[16]); SpuBvhMeshShape gBvhMeshShape; SpuCompoundShape compoundShapeData[2]; }; static void print_size_stats () { spu_printf("sizeof(CollisionTask_LocalStoreMemory) = %d\n", sizeof(CollisionTask_LocalStoreMemory)); spu_printf("sizeof(SpuInternalShape) = %d\n", sizeof(SpuInternalShape)); spu_printf("sizeof(SpuInternalConvexHull) = %d\n", sizeof(SpuInternalConvexHull)); spu_printf("sizeof(SpuBvhMeshShape) = %d\n", sizeof(SpuBvhMeshShape)); } #if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory gLocalStoreMemory); void* createCollisionLocalStoreMemory() { //print_size_stats (); return &gLocalStoreMemory; } #else void* createCollisionLocalStoreMemory() { return new CollisionTask_LocalStoreMemory; }; #endif void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts); SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) { #if USE_SOFTWARE_CACHE // Check for alignment requirements. We need to make sure the entire request fits within one cache line, // so the first and last bytes should fall on the same cache line btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); void* ls = spe_cache_read(ea); memcpy(buffer, ls, size); #else stallingUnalignedDmaSmallGet(buffer,ea,size); #endif } SIMD_FORCE_INLINE void small_cache_read_triple( void* ls0, ppu_address_t ea0, void* ls1, ppu_address_t ea1, void* ls2, ppu_address_t ea2, size_t size) { btAssert(size<16); ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); uint32_t i; ///make sure last 4 bits are the same, for cellDmaSmallGet char* localStore0 = (char*)ls0; uint32_t last4BitsOffset = ea0 & 0x0f; char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; #ifdef __SPU__ cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0); #else tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); #endif char* localStore1 = (char*)ls1; last4BitsOffset = ea1 & 0x0f; char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; #ifdef __SPU__ cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0); #else tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); #endif char* localStore2 = (char*)ls2; last4BitsOffset = ea2 & 0x0f; char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; #ifdef __SPU__ cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0); #else tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); #endif cellDmaWaitTagStatusAll( DMA_MASK(1) ); //this is slowish, perhaps memcpy on SPU is smarter? for (i=0; btLikely( im_spuCollisionShapes[1]; //need the mesh interface, for access to triangle vertices dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape); btVector3 aabbMin(-1,-400,-1); btVector3 aabbMax(1,400,1); //recalc aabbs btTransform convexInTriangleSpace; convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0; btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0]; computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace); //CollisionShape* triangleShape = static_cast(triBody->m_collisionShape); //convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax); // btScalar extraMargin = collisionMarginTriangle; // btVector3 extra(extraMargin,extraMargin,extraMargin); // aabbMax += extra; // aabbMin -= extra; ///quantize query AABB unsigned short int quantizedQueryAabbMin[3]; unsigned short int quantizedQueryAabbMax[3]; lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts); IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); //not likely to happen if (subTrees.size() && indexArray.size() == 1) { ///DMA in the index info dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); cellDmaWaitTagStatusAll(DMA_MASK(1)); //display the headers int numBatch = subTrees.size(); for (int i=0;ibvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); // spu_printf("nextBatch = %d\n",nextBatch); for (int j=0;jbvhShapeData.gSubtreeHeaders[j]; unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); if (overlap) { btAssert(subtree.m_subtreeSize); //dma the actual nodes of this subtree dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); cellDmaWaitTagStatusAll(DMA_MASK(2)); /* Walk this subtree */ spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, &lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize); } // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); } // unsigned short int m_quantizedAabbMin[3]; // unsigned short int m_quantizedAabbMax[3]; // int m_rootNodeIndex; // int m_subtreeSize; i+=nextBatch; } //pre-fetch first tree, then loop and double buffer } } #endif template void DoSwap(T& a, T& b) { char tmp[sizeof(T)]; memcpy(tmp, &a, sizeof(T)); memcpy(&a, &b, sizeof(T)); memcpy(&b, tmp, sizeof(T)); } SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem) { register int dmaSize; register ppu_address_t dmaPpuAddress2; dmaSize = sizeof(btCollisionObject); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr0->m_clientObject; cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); dmaSize = sizeof(btCollisionObject); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr1->m_clientObject; cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); collisionPairInput.m_worldTransform0 = lsMem.getColObj0()->getWorldTransform(); collisionPairInput.m_worldTransform1 = lsMem.getColObj1()->getWorldTransform(); } void SpuConvexConvexCollisionAlgorithm (SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem, SpuContactResult &spuContacts, SpuInternalShape* shape0Internal, SpuInternalShape* shape1Internal, SpuInternalConvexHull* shape0Points, SpuInternalConvexHull* shape1Points) { btConvexPointCloudShape shape0ConvexHull (NULL, 0); btConvexPointCloudShape shape1ConvexHull (NULL, 0); btConvexShape* shape0; btConvexShape* shape1; btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("SpuConvexConvexCollisionAlgorithm ()\n"); #endif if (collisionPairInput.m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) { shape0ConvexHull.setPoints (shape0Points->m_points, shape0Points->m_numPoints); shape0 = &shape0ConvexHull; } else { shape0 = shape0Internal->m_convexShape; } if (collisionPairInput.m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) { shape1ConvexHull.setPoints (shape1Points->m_points, shape1Points->m_numPoints); shape1 = &shape1ConvexHull; } else { shape1 = shape1Internal->m_convexShape; } #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("shape0 = %p shape1 = %p\n", shape0, shape1); #endif { btVoronoiSimplexSolver vsSolver; btConvexPenetrationDepthSolver* penetrationSolver = NULL; #define SPU_ENABLE_EPA 1 #ifdef SPU_ENABLE_EPA btGjkEpaPenetrationDepthSolver epaPenetrationSolver; btMinkowskiPenetrationDepthSolver minkowskiPenetrationSolver; if (gUseEpa) { penetrationSolver = &epaPenetrationSolver; } else { penetrationSolver = &minkowskiPenetrationSolver; } #else btMinkowskiPenetrationDepthSolver minkowskiPenetrationSolver; penetrationSolver = &minkowskiPenetrationSolver; #endif btDiscreteCollisionDetectorInterface::ClosestPointInput cpInput; cpInput.m_transformA = collisionPairInput.m_worldTransform0; cpInput.m_transformB = collisionPairInput.m_worldTransform1; btPersistentManifold* spuManifold= &lsMem.gPersistentManifold; ppu_address_t manifoldAddress = (ppu_address_t)manifold; spuContacts.setContactInfo(spuManifold,manifoldAddress, lsMem.getColObj0()->getWorldTransform(), lsMem.getColObj1()->getWorldTransform(), lsMem.getColObj0()->getRestitution(), lsMem.getColObj1()->getRestitution(), lsMem.getColObj0()->getFriction(), lsMem.getColObj1()->getFriction(), collisionPairInput.m_isSwapped); #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("start GJK\n"); #endif btGjkPairDetector gjk(shape0,shape1,&vsSolver,penetrationSolver); gjk.getClosestPoints(cpInput,spuContacts, NULL);//,debugDraw); #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("stop GJK\n"); #endif } } class spuNodeCallback : public btNodeOverlapCallback { const SpuCollisionPairInput& m_collisionPairInput; CollisionTask_LocalStoreMemory& m_lsMem; SpuContactResult& m_spuContacts; ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); public: spuNodeCallback(const SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem, SpuContactResult& spuContacts) : m_collisionPairInput(collisionPairInput), m_lsMem(lsMem), m_spuContacts(spuContacts) { } virtual void processNode(int subPart, int triangleIndex) { ///Create a triangle on the stack, call process collision, with GJK ///DMA the vertices, can benefit from software caching //spu_printf("processNode with triangleIndex %d\n",triangleIndex); ///TODO: add switch between short int, and int indices, based on indexType // ugly solution to support both 16bit and 32bit indices if (m_lsMem.gBvhMeshShape.m_indexMesh.m_indexType == PHY_SHORT) { unsigned short int* indexBasePtr = (unsigned short int*)(m_lsMem.gBvhMeshShape.m_indexMesh.m_triangleIndexBase+triangleIndex*m_lsMem.gBvhMeshShape.m_indexMesh.m_triangleIndexStride); ATTRIBUTE_ALIGNED16(unsigned short int tmpIndices[3]); small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], sizeof(unsigned short int)); m_lsMem.spuIndices[0] = int(tmpIndices[0]); m_lsMem.spuIndices[1] = int(tmpIndices[1]); m_lsMem.spuIndices[2] = int(tmpIndices[2]); } else { unsigned int* indexBasePtr = (unsigned int*)(m_lsMem.gBvhMeshShape.m_indexMesh.m_triangleIndexBase+triangleIndex*m_lsMem.gBvhMeshShape.m_indexMesh.m_triangleIndexStride); small_cache_read_triple(&m_lsMem.spuIndices[0],(ppu_address_t)&indexBasePtr[0], &m_lsMem.spuIndices[1],(ppu_address_t)&indexBasePtr[1], &m_lsMem.spuIndices[2],(ppu_address_t)&indexBasePtr[2], sizeof(int)); } //spu_printf("SPU index0=%d ,",spuIndices[0]); //spu_printf("SPU index1=%d ,",spuIndices[1]); //spu_printf("SPU index2=%d ,",spuIndices[2]); //spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); const btVector3& meshScaling = m_lsMem.gBvhMeshShape.m_triangleMeshInterface->getScaling(); for (int j=2;btLikely( j>=0 );j--) { int graphicsindex = m_lsMem.spuIndices[j]; //spu_printf("SPU index=%d ,",graphicsindex); btScalar* graphicsbasePtr = (btScalar*)(m_lsMem.gBvhMeshShape.m_indexMesh.m_vertexBase+graphicsindex*m_lsMem.gBvhMeshShape.m_indexMesh.m_vertexStride); //spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); ///handle un-aligned vertices... //another DMA for each vertex small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], sizeof(btScalar)); spuTriangleVertices[j] = btVector3( spuUnscaledVertex[0]*meshScaling.getX(), spuUnscaledVertex[1]*meshScaling.getY(), spuUnscaledVertex[2]*meshScaling.getZ()); //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); } btTriangleShape tmpTriangleShape (spuTriangleVertices[0], spuTriangleVertices[1], spuTriangleVertices[2]); SpuCollisionPairInput triangleConcaveInput(m_collisionPairInput); triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE; m_spuContacts.setShapeIdentifiers(-1,-1,subPart,triangleIndex); //m_spuContacts.flush(); m_lsMem.gInternalShapes[2].m_collisionShape = &tmpTriangleShape; m_lsMem.gInternalShapes[2].m_convexShape = &tmpTriangleShape; SpuConvexConvexCollisionAlgorithm (triangleConcaveInput, m_lsMem, m_spuContacts, &m_lsMem.gInternalShapes[0], &m_lsMem.gInternalShapes[2], &m_lsMem.gInternalConvexHull[0], NULL); ///this flush should be automatic //m_spuContacts.flush(); } }; void SpuConvexConcaveCollisionAlgorithm (SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem, SpuContactResult &spuContacts, SpuInternalShape* shape0Internal, SpuInternalShape* shape1Internal, SpuInternalConvexHull* shape0Points, SpuInternalConvexHull* shape1Points) { btConvexPointCloudShape shape0ConvexHull (NULL, 0); btConvexShape* shape0; #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("SpuConvexConcaveCollisionAlgorithm ()\n"); #endif if (collisionPairInput.m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) { shape0ConvexHull.setPoints (shape0Points->m_points, shape0Points->m_numPoints); shape0 = &shape0ConvexHull; } else { shape0 = shape0Internal->m_convexShape; } SpuBvhMeshShape* bvhShape = &lsMem.gBvhMeshShape; bvhShape->dmaMeshInterfaceAndOptimizedBvh (*shape1Internal, 1, 2); cellDmaWaitTagStatusAll (DMA_MASK(1) | DMA_MASK(2)); // determine aabb of convex shape in triangle mesh btVector3 aabbMin; btVector3 aabbMax; btTransform convexInTriangleSpace; convexInTriangleSpace = collisionPairInput.m_worldTransform1.inverse() * collisionPairInput.m_worldTransform0; shape0->getAabbNonVirtual (convexInTriangleSpace, aabbMin, aabbMax); // btScalar extraMargin = collisionMarginTriangle; // btVector3 extra(extraMargin,extraMargin,extraMargin); // aabbMax += extra; // aabbMin -= extra; //quantize query AABB unsigned short int quantizedQueryAabbMin[3]; unsigned short int quantizedQueryAabbMax[3]; bvhShape->m_optimizedBvh->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); bvhShape->m_optimizedBvh->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); QuantizedNodeArray& nodeArray = bvhShape->m_optimizedBvh->getQuantizedNodeArray(); //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); BvhSubtreeInfoArray& subTrees = bvhShape->m_optimizedBvh->getSubtreeInfoArray(); spuNodeCallback nodeCallback(collisionPairInput,lsMem,spuContacts); IndexedMeshArray& indexArray = bvhShape->m_triangleMeshInterface->getIndexedMeshArray(); //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); //spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); if (subTrees.size() && indexArray.size() == 1) { //DMA in the index info, we only support meshes with a single index (hence the '0') bvhShape->dmaIndexedMesh (0, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); //display the headers int numBatch = subTrees.size(); for (int i=0;idmaSubTreeHeaders ((ppu_address_t)(&subTrees[i]), nextBatch, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); // spu_printf("nextBatch = %d\n",nextBatch); for (int j=0;jm_subtreeHeaders[j]; unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); if (overlap) { btAssert(subtree.m_subtreeSize); //dma the actual nodes of this subtree bvhShape->dmaSubTreeNodes (subtree, nodeArray, 2); cellDmaWaitTagStatusAll(DMA_MASK(2)); /* Walk this subtree */ spuWalkStacklessQuantizedTree(&nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, &bvhShape->m_subtreeNodes[0], 0,subtree.m_subtreeSize); } // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); } i+=nextBatch; } //pre-fetch first tree, then loop and double buffer } } void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem, SpuContactResult &spuContacts, ppu_address_t collisionShape0Ptr, ppu_address_t collisionShape1Ptr, bool dmaShapes = true) { #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("handleCollisionPair ()\n"); #endif if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) { if (dmaShapes) { /* Don't load the same shape twice. */ if (collisionShape0Ptr != lsMem.gInternalShapes[0].m_ppuConvexShapePtr) { lsMem.gInternalShapes[0].dmaShapeData (collisionShape0Ptr, collisionPairInput.m_shapeType0, 1); } /* Don't load the same shape twice. */ if (collisionShape1Ptr != lsMem.gInternalShapes[1].m_ppuConvexShapePtr) { lsMem.gInternalShapes[1].dmaShapeData (collisionShape1Ptr, collisionPairInput.m_shapeType1, 2); } if (collisionPairInput.m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE) { btConvexHullShape* convexHull = (btConvexHullShape*)lsMem.gInternalShapes[0].m_convexShape; cellDmaWaitTagStatusAll (DMA_MASK(1)); /* Don't load the same verts twice */ if ((ppu_address_t)convexHull->getPoints() != lsMem.gInternalConvexHull[0].m_ppuPointsPtr) { lsMem.gInternalConvexHull[0].dmaPointsData (lsMem.gInternalShapes[0], 1); } } if (collisionPairInput.m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE) { btConvexHullShape* convexHull = (btConvexHullShape*)lsMem.gInternalShapes[1].m_convexShape; cellDmaWaitTagStatusAll (DMA_MASK(2)); /* Don't load the same verts twice */ if ((ppu_address_t)convexHull->getPoints() != lsMem.gInternalConvexHull[1].m_ppuPointsPtr) { lsMem.gInternalConvexHull[1].dmaPointsData (lsMem.gInternalShapes[1], 2); } } cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); #if 0 if (collisionPairInput.m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE) { for (int i = 0; i < lsMem.gInternalConvexHull[0].m_numPoints; i++) { btVector3 vtx = lsMem.gInternalConvexHull[0].m_points[i]; spu_printf("%d %f %f %f\n", i, vtx.getX(), vtx.getY(), vtx.getZ()); } } if (collisionPairInput.m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE) { for (int i = 0; i < lsMem.gInternalConvexHull[1].m_numPoints; i++) { btVector3 vtx = lsMem.gInternalConvexHull[1].m_points[i]; spu_printf("%d %f %f %f\n", i, vtx.getX(), vtx.getY(), vtx.getZ()); } } #endif } SpuConvexConvexCollisionAlgorithm (collisionPairInput, lsMem, spuContacts, &lsMem.gInternalShapes[0], &lsMem.gInternalShapes[1], &lsMem.gInternalConvexHull[0], &lsMem.gInternalConvexHull[1]); } else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) && btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1)) { //snPause(); /* Don't load the same shape twice. */ if (collisionShape0Ptr != lsMem.gInternalShapes[0].m_ppuConvexShapePtr) { lsMem.gInternalShapes[0].dmaShapeData (collisionShape0Ptr, collisionPairInput.m_shapeType0, 1); } /* Don't load the same shape twice. */ if (collisionShape1Ptr != lsMem.gInternalShapes[1].m_ppuConvexShapePtr) { lsMem.gInternalShapes[1].dmaShapeData (collisionShape1Ptr, collisionPairInput.m_shapeType1, 2); } cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); // Both are compounds, do N^2 CD for now // TODO: add some AABB-based pruning btCompoundShape* spuCompoundShape0 = (btCompoundShape*)lsMem.gInternalShapes[0].m_collisionShape; btCompoundShape* spuCompoundShape1 = (btCompoundShape*)lsMem.gInternalShapes[1].m_collisionShape; lsMem.compoundShapeData[0].dmaChildShapeInfo (spuCompoundShape0, 1); lsMem.compoundShapeData[1].dmaChildShapeInfo (spuCompoundShape1, 2); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); int childShapeCount0 = spuCompoundShape0->getNumChildShapes(); int childShapeCount1 = spuCompoundShape1->getNumChildShapes(); // Start the N^2 for (int i = 0; i < childShapeCount0; ++i) { btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].m_subshapes[i]; // dma childshape0 into gInternalShapes 0 (this overwrites the original compound shape) lsMem.compoundShapeData[0].dmaChildShape (i, &lsMem.gInternalShapes[0], &lsMem.gInternalConvexHull[0], 1); for (int j = 0; j < childShapeCount1; ++j) { btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].m_subshapes[j]; lsMem.compoundShapeData[1].dmaChildShape (j, &lsMem.gInternalShapes[1], &lsMem.gInternalConvexHull[1], 2); /* Create a new collision pair input struct using the two child shapes */ SpuCollisionPairInput cinput (collisionPairInput); cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform; cinput.m_shapeType0 = childShape0.m_childShapeType; cinput.m_collisionMargin0 = childShape0.m_childMargin; cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform; cinput.m_shapeType1 = childShape1.m_childShapeType; cinput.m_collisionMargin1 = childShape1.m_childMargin; cellDmaWaitTagStatusAll (DMA_MASK(1) | DMA_MASK(2)); /* Recursively call handleCollisionPair () with new collision pair input */ handleCollisionPair(cinput, lsMem, spuContacts, (ppu_address_t)childShape0.m_childShape, (ppu_address_t)childShape1.m_childShape,false); // bug fix: changed index to j. } } } else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) ) { /* Don't load the same shape twice. */ if (collisionShape0Ptr != lsMem.gInternalShapes[0].m_ppuConvexShapePtr) { lsMem.gInternalShapes[0].dmaShapeData (collisionShape0Ptr, collisionPairInput.m_shapeType0, 1); } /* Don't load the same shape twice. */ if (collisionShape1Ptr != lsMem.gInternalShapes[1].m_ppuConvexShapePtr) { lsMem.gInternalShapes[1].dmaShapeData (collisionShape1Ptr, collisionPairInput.m_shapeType1, 2); } if (collisionPairInput.m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE) { btConvexHullShape* convexHull = (btConvexHullShape*)lsMem.gInternalShapes[1].m_convexShape; cellDmaWaitTagStatusAll (DMA_MASK(2)); /* Don't load the same verts twice */ if ((ppu_address_t)convexHull->getPoints() != lsMem.gInternalConvexHull[1].m_ppuPointsPtr) { lsMem.gInternalConvexHull[1].dmaPointsData (lsMem.gInternalShapes[1], 2); } } cellDmaWaitTagStatusAll (DMA_MASK(1)); btCompoundShape* spuCompoundShape0 = (btCompoundShape*)lsMem.gInternalShapes[0].m_collisionShape; lsMem.compoundShapeData[0].dmaChildShapeInfo (spuCompoundShape0, 1); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); int childShapeCount0 = spuCompoundShape0->getNumChildShapes(); for (int i = 0; i < childShapeCount0; ++i) { btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].m_subshapes[i]; lsMem.compoundShapeData[0].dmaChildShape (i, &lsMem.gInternalShapes[0], &lsMem.gInternalConvexHull[0], 1); SpuCollisionPairInput cinput (collisionPairInput); cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform; cinput.m_shapeType0 = childShape.m_childShapeType; cinput.m_collisionMargin0 = childShape.m_childMargin; cellDmaWaitTagStatusAll(DMA_MASK(1)); handleCollisionPair(cinput, lsMem, spuContacts, (ppu_address_t)childShape.m_childShape, collisionShape1Ptr, false); } } else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1) ) { //snPause(); /* Don't load the same shape twice. */ if (collisionShape0Ptr != lsMem.gInternalShapes[0].m_ppuConvexShapePtr) { lsMem.gInternalShapes[0].dmaShapeData (collisionShape0Ptr, collisionPairInput.m_shapeType0, 1); } /* Don't load the same shape twice. */ if (collisionShape1Ptr != lsMem.gInternalShapes[1].m_ppuConvexShapePtr) { lsMem.gInternalShapes[1].dmaShapeData (collisionShape1Ptr, collisionPairInput.m_shapeType1, 2); } cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); // object 0 non-compound, object 1 compound btCompoundShape* spuCompoundShape = (btCompoundShape*)lsMem.gInternalShapes[1].m_collisionShape; lsMem.compoundShapeData[1].dmaChildShapeInfo (spuCompoundShape, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); int childShapeCount = spuCompoundShape->getNumChildShapes(); for (int i = 0; i < childShapeCount; ++i) { btCompoundShapeChild& childShape = lsMem.compoundShapeData[1].m_subshapes[i]; // Dma the child shape lsMem.compoundShapeData[1].dmaChildShape (i, &lsMem.gInternalShapes[1], &lsMem.gInternalConvexHull[1], 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); SpuCollisionPairInput cinput (collisionPairInput); cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform; cinput.m_shapeType1 = childShape.m_childShapeType; cinput.m_collisionMargin1 = childShape.m_childMargin; handleCollisionPair(cinput, lsMem, spuContacts, collisionShape0Ptr, (ppu_address_t)childShape.m_childShape, false); } } else { //we only support convex v. concave //a non-convex shape is involved bool handleConvexConcave = false; //concave v. convex //swap into convex v. concave if (btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType0) && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) { // Swap stuff DoSwap(collisionShape0Ptr, collisionShape1Ptr); DoSwap(collisionPairInput.m_shapeType0, collisionPairInput.m_shapeType1); DoSwap(collisionPairInput.m_worldTransform0, collisionPairInput.m_worldTransform1); DoSwap(collisionPairInput.m_collisionMargin0, collisionPairInput.m_collisionMargin1); collisionPairInput.m_isSwapped = true; } if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)&& btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType1)) { handleConvexConcave = true; } if (handleConvexConcave) { if (dmaShapes) { /* Don't load the same shape twice. */ if (collisionShape0Ptr != lsMem.gInternalShapes[0].m_ppuConvexShapePtr) { lsMem.gInternalShapes[0].dmaShapeData (collisionShape0Ptr, collisionPairInput.m_shapeType0, 1); } /* Don't load the same shape twice. */ if (collisionShape1Ptr != lsMem.gInternalShapes[1].m_ppuConvexShapePtr) { lsMem.gInternalShapes[1].dmaShapeData (collisionShape1Ptr, collisionPairInput.m_shapeType1, 2); } if (collisionPairInput.m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE) { btConvexHullShape* convexHull = (btConvexHullShape*)lsMem.gInternalShapes[0].m_convexShape; cellDmaWaitTagStatusAll (DMA_MASK(1)); /* Don't load the same verts twice */ if ((ppu_address_t)convexHull->getPoints() != lsMem.gInternalConvexHull[0].m_ppuPointsPtr) { lsMem.gInternalConvexHull[0].dmaPointsData (lsMem.gInternalShapes[0], 1); } } /* This can't happen because shape1 is the optimized Bvh shape */ #if 0 if (collisionPairInput.m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE) { btConvexHullShape* convexHull = (btConvexHullShape*)lsMem.gInternalShapes[1].m_convexShape; cellDmaWaitTagStatusAll (DMA_MASK(2)); /* Don't load the same verts twice */ if ((ppu_address_t)convexHull->getPoints() != lsMem.gInternalConvexHull[1].m_ppuPointsPtr) { lsMem.gInternalConvexHull[1].dmaPointsData (lsMem.gInternalShapes[1], 2); } } #endif cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); } SpuConvexConcaveCollisionAlgorithm (collisionPairInput, lsMem, spuContacts, &lsMem.gInternalShapes[0], &lsMem.gInternalShapes[1], &lsMem.gInternalConvexHull[0], &lsMem.gInternalConvexHull[1]); } } spuContacts.flush(); } void processCollisionTask(void* userPtr, void* lsMemPtr) { SpuGatherAndProcessPairsTaskDesc* taskDescPtr = (SpuGatherAndProcessPairsTaskDesc*)userPtr; SpuGatherAndProcessPairsTaskDesc& taskDesc = *taskDescPtr; CollisionTask_LocalStoreMemory* colMemPtr = (CollisionTask_LocalStoreMemory*)lsMemPtr; CollisionTask_LocalStoreMemory& lsMem = *(colMemPtr); gUseEpa = taskDesc.m_useEpa; // spu_printf("taskDescPtr=%llx\n",taskDescPtr); SpuContactResult spuContacts; //////////////////// ppu_address_t dmaInPtr = taskDesc.inPtr; unsigned int numPages = taskDesc.numPages; unsigned int numOnLastPage = taskDesc.numOnLastPage; // prefetch first set of inputs and wait lsMem.g_workUnitTaskBuffers.init(); unsigned int nextNumOnPage = (numPages > 1)? MIDPHASE_NUM_WORKUNITS_PER_PAGE : numOnLastPage; lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3)); dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE; register unsigned char *inputPtr; register unsigned int numOnPage; register unsigned int j; SpuGatherAndProcessWorkUnitInput* wuInputs; register int dmaSize; register ppu_address_t dmaPpuAddress; register ppu_address_t dmaPpuAddress2; int userInfo; int numPairs; register int p; SpuCollisionPairInput collisionPairInput; for (unsigned int i = 0; btLikely(i < numPages); i++) { // wait for back buffer dma and swap buffers inputPtr = lsMem.g_workUnitTaskBuffers.swapBuffers(); // number on current page is number prefetched last iteration numOnPage = nextNumOnPage; // prefetch next set of inputs #if MIDPHASE_NUM_WORKUNIT_PAGES > 2 if ( btLikely( i < numPages-1 ) ) #else if ( btUnlikely( i < numPages-1 ) ) #endif { nextNumOnPage = (i == numPages-2)? numOnLastPage : MIDPHASE_NUM_WORKUNITS_PER_PAGE; lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3)); dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE; } wuInputs = reinterpret_cast(inputPtr); for (j = 0; btLikely( j < numOnPage ); j++) { #ifdef DEBUG_SPU_COLLISION_DETECTION // printMidphaseInput(&wuInputs[j]); #endif //DEBUG_SPU_COLLISION_DETECTION numPairs = wuInputs[j].m_endIndex - wuInputs[j].m_startIndex; if ( btLikely( numPairs ) ) { // DMA: broadphase pairs dmaSize = numPairs*sizeof(btBroadphasePair); dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair); cellDmaGet(&lsMem.gBroadphasePairs, dmaPpuAddress , dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); //for each broadphase pair, do something for (p=0;pm_userInfo = %d\n",pair.m_userInfo); spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm); spu_printf("pair->m_pProxy0 = %d\n",pair.m_pProxy0); spu_printf("pair->m_pProxy1 = %d\n",pair.m_pProxy1); #endif //DEBUG_SPU_COLLISION_DETECTION userInfo = int(pair.m_userInfo); // skip pairs we don't support if (!pair.m_algorithm || !pair.m_pProxy0 || !pair.m_pProxy1 || userInfo != 2) continue; // DMA: SpuContactManifoldCollisionAlgorithm // SpuContactManifoldCollisionAlgorithm: // A dummy collision algorithm that gives us the // collision types, and contact manifold pointers dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm); dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; cellDmaGet(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); //cellDmaWaitTagStatusAll(DMA_MASK(1)); //snPause(); #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("SPU: manifoldPtr: %llx",collisionPairInput.m_persistentManifoldPtr); #endif //DEBUG_SPU_COLLISION_DETECTION // DMA: btBroadphaseProxy for object 0 dmaSize = sizeof(btBroadphaseProxy); dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy0; lsMem.gProxyPtr0 = (btBroadphaseProxy*) lsMem.bufferProxy0; stallingUnalignedDmaSmallGet(lsMem.gProxyPtr0, dmaPpuAddress2 , dmaSize); // NOTE: The DMA initiated for SpuContactManifoldCollisionAlgorithm is synced in // stallingUnalignedDmaSmallGet collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.gSpuContactManifoldAlgo.getContactManifoldPtr(); collisionPairInput.m_isSwapped = false; // DMA: btPersistentManifold dmaSize = sizeof(btPersistentManifold); dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr; cellDmaGet(&lsMem.gPersistentManifold, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); // DMA: btBroadphaseProxy for object 0 dmaSize = sizeof(btBroadphaseProxy); dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy1; lsMem.gProxyPtr1 = (btBroadphaseProxy*) lsMem.bufferProxy1; stallingUnalignedDmaSmallGet(lsMem.gProxyPtr1, dmaPpuAddress2 , dmaSize); // NOTE: The DMA initiated for btPersistentManifold is synced in // stallingUnalignedDmaSmallGet #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("SPU collisionPairInput->m_shapeType0 = %d\n",collisionPairInput.m_shapeType0); spu_printf("SPU collisionPairInput->m_shapeType1 = %d\n",collisionPairInput.m_shapeType1); #endif //DEBUG_SPU_COLLISION_DETECTION // Construct the collision Pair collisionPairInput.m_shapeType0 = lsMem.gSpuContactManifoldAlgo.getShapeType0(); collisionPairInput.m_shapeType1 = lsMem.gSpuContactManifoldAlgo.getShapeType1(); collisionPairInput.m_collisionMargin0 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin0(); collisionPairInput.m_collisionMargin1 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin1(); cellDmaWaitTagStatusAll(DMA_MASK(1)); // might not be necessary dmaAndSetupCollisionObjects(collisionPairInput, lsMem); if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive()) { handleCollisionPair(collisionPairInput, lsMem, spuContacts, (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), (ppu_address_t)lsMem.getColObj1()->getCollisionShape()); } } } } //end for (j = 0; j < numOnPage; j++) }// for #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("processCollisionTask %d / %d\n",numPages, numOnLastPage); #endif //DEBUG_SPU_COLLISION_DETECTION return; }