diff --git a/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp index 578a88690..b2d93b38c 100644 --- a/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp +++ b/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp @@ -19,11 +19,7 @@ subject to the following restrictions: #include "BulletCollision/CollisionShapes/btCollisionShape.h" #include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h" -SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm() -:m_manifoldPtr(0) -{ - -} + void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) @@ -40,6 +36,9 @@ btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollision #ifndef __SPU__ SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1) :btCollisionAlgorithm(ci) +#ifdef USE_SEPDISTANCE_UTIL +,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc()) +#endif //USE_SEPDISTANCE_UTIL { m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1); m_shapeType0 = body0->getCollisionShape()->getShapeType(); diff --git a/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h index 125c7a444..486a49742 100644 --- a/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h +++ b/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h @@ -20,9 +20,12 @@ subject to the following restrictions: #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" #include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h" #include "BulletCollision/BroadphaseCollision/btDispatcher.h" +#include "LinearMath/btTransformUtil.h" class btPersistentManifold; +//#define USE_SEPDISTANCE_UTIL 1 + /// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU. ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm { @@ -37,6 +40,7 @@ ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btColli btCollisionObject* m_collisionObject0; btCollisionObject* m_collisionObject1; + public: @@ -45,9 +49,11 @@ public: virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); - SpuContactManifoldCollisionAlgorithm(); - + SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1); +#ifdef USE_SEPDISTANCE_UTIL + btConvexSeparatingDistanceUtil m_sepDistance; +#endif //USE_SEPDISTANCE_UTIL virtual ~SpuContactManifoldCollisionAlgorithm(); diff --git a/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp index e17db1c68..3c5c5e76c 100644 --- a/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp +++ b/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp @@ -126,7 +126,11 @@ public: if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1)) { int so = sizeof(SpuContactManifoldCollisionAlgorithm); - void* mem = btAlignedAlloc(sizeof(SpuContactManifoldCollisionAlgorithm),16);//m_dispatcher->allocateCollisionAlgorithm(so); +#ifdef ALLOCATE_SEPARATELY + void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so); +#else + void* mem = m_dispatcher->allocateCollisionAlgorithm(so); +#endif collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1); collisionPair.m_userInfo = (void*) 2; } else diff --git a/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h index 9f7fb9e8d..f99e00381 100644 --- a/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h +++ b/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h @@ -21,8 +21,13 @@ subject to the following restrictions: ///Tuning value to optimized SPU utilization ///Too small value means Task overhead is large compared to computation (too fine granularity) ///Too big value might render some SPUs are idle, while a few other SPUs are doing all work. -#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16 +#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024 + class SpuCollisionTaskProcess; diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp index 159418ee3..04c43c9fc 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp @@ -16,6 +16,17 @@ subject to the following restrictions: #include "SpuCollisionShapes.h" +#ifdef __SPU__ +#include +static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 ) +{ + vec_float4 result; + result = spu_mul( vec0, vec1 ); + result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result ); + return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result ); +} +#endif //__SPU__ + btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex) { switch (shapeType) @@ -170,7 +181,45 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons { //spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n"); - +#if defined (__SPU__) + vec_float4 v_distMax = {-FLT_MAX,0,0,0}; + vec_int4 v_idxMax = {-999,0,0,0}; + int v=0; + int numverts = convexVertexData->gNumConvexPoints; + btVector3* points = convexVertexData->gConvexPoints; + + for(;v<(int)numverts-4;v+=4) { + vec_float4 p0 = vec_dot3(points[v ].get128(),localDir.get128()); + vec_float4 p1 = vec_dot3(points[v+1].get128(),localDir.get128()); + vec_float4 p2 = vec_dot3(points[v+2].get128(),localDir.get128()); + vec_float4 p3 = vec_dot3(points[v+3].get128(),localDir.get128()); + const vec_int4 i0 = {v ,0,0,0}; + const vec_int4 i1 = {v+1,0,0,0}; + const vec_int4 i2 = {v+2,0,0,0}; + const vec_int4 i3 = {v+3,0,0,0}; + vec_uint4 retGt01 = spu_cmpgt(p0,p1); + vec_float4 pmax01 = spu_sel(p1,p0,retGt01); + vec_int4 imax01 = spu_sel(i1,i0,retGt01); + vec_uint4 retGt23 = spu_cmpgt(p2,p3); + vec_float4 pmax23 = spu_sel(p3,p2,retGt23); + vec_int4 imax23 = spu_sel(i3,i2,retGt23); + vec_uint4 retGt0123 = spu_cmpgt(pmax01,pmax23); + vec_float4 pmax0123 = spu_sel(pmax23,pmax01,retGt0123); + vec_int4 imax0123 = spu_sel(imax23,imax01,retGt0123); + vec_uint4 retGtMax = spu_cmpgt(v_distMax,pmax0123); + v_distMax = spu_sel(pmax0123,v_distMax,retGtMax); + v_idxMax = spu_sel(imax0123,v_idxMax,retGtMax); + } + for(;v<(int)numverts;v++) { + vec_float4 p = vec_dot3(points[v].get128(),localDir.get128()); + const vec_int4 i = {v,0,0,0}; + vec_uint4 retGtMax = spu_cmpgt(v_distMax,p); + v_distMax = spu_sel(p,v_distMax,retGtMax); + v_idxMax = spu_sel(i,v_idxMax,retGtMax); + } + int ptIndex = spu_extract(v_idxMax,0); + const btVector3& supVec= points[ptIndex]; +#else btVector3* points = 0; int numPoints = 0; @@ -179,7 +228,7 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons // spu_printf("numPoints = %d\n",numPoints); - btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.)); + int ptIndex = 0; btScalar newDot,maxDot = btScalar(-1e30); btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); @@ -197,15 +246,18 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons for (int i=0;i maxDot) { maxDot = newDot; - supVec = vtx; + ptIndex = i; } } + const btVector3& supVec= points[ptIndex]; + +#endif return btVector3(supVec.getX(),supVec.getY(),supVec.getZ()); break; @@ -223,7 +275,7 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons } } -void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform) +void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform) { //calculate the aabb, given the types... switch (shapeType) @@ -235,7 +287,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* float margin=convexShape->getMarginNV(); btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); halfExtents += btVector3(margin,margin,margin); - btTransform& t = xform; + const btTransform& t = xform; btMatrix3x3 abs_b = t.getBasis().absolute(); btVector3 center = t.getOrigin(); btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); @@ -258,7 +310,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* btScalar radius = convexShape->getRadius(); halfExtents[capsuleUpAxis] = radius + halfHeight; #endif - btTransform& t = xform; + const btTransform& t = xform; btMatrix3x3 abs_b = t.getBasis().absolute(); btVector3 center = t.getOrigin(); btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); @@ -271,7 +323,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* { float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX(); float margin = radius + convexShape->getMarginNV(); - btTransform& t = xform; + const btTransform& t = xform; const btVector3& center = t.getOrigin(); btVector3 extent(margin,margin,margin); aabbMin = center - extent; @@ -284,7 +336,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; - btTransform& t = xform; + const btTransform& t = xform; btScalar margin = convexShape->getMarginNV(); localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin); //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); @@ -450,7 +502,9 @@ void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned int curIndex = startNodeIndex; int walkIterations = 0; +#ifdef BT_DEBUG int subTreeSize = endNodeIndex - startNodeIndex; +#endif int escapeIndex; @@ -459,7 +513,7 @@ void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned while (curIndex < endNodeIndex) { //catch bugs in tree data - assert (walkIterations < subTreeSize); + btAssert (walkIterations < subTreeSize); walkIterations++; aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h index c3928ee8c..ce9aedf88 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h @@ -64,7 +64,7 @@ struct bvhMeshShape_LocalStoreMemory btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData);//, int *featureIndex) -void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform); +void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform); void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape); void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag); void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag); @@ -76,18 +76,27 @@ void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionSha void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); + #define USE_BRANCHFREE_TEST 1 #ifdef USE_BRANCHFREE_TEST SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) { +#if defined(__CELLOS_LV2__) && defined (__SPU__) + vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0}; + vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0}; + vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax); + return spu_extract(spu_gather(isGt),0)==0; + +#else return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])), 1, 0); +#endif } #else -unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) +SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) { unsigned int overlap = 1; overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap; diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp index 0a610d547..001a26663 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp @@ -118,6 +118,8 @@ struct CollisionTask_LocalStoreMemory return (btCollisionObject*) gColObj1; } + bool needsDmaPutContactManifoldAlgo; + DoubleBuffer g_workUnitTaskBuffers; ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairs[SPU_BATCHSIZE_BROADPHASE_PAIRS]); @@ -125,7 +127,7 @@ struct CollisionTask_LocalStoreMemory //SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; //ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo[sizeof(SpuContactManifoldCollisionAlgorithm)+128]); - SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; + ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo [sizeof(SpuContactManifoldCollisionAlgorithm)+16]); SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() { @@ -250,8 +252,8 @@ class spuNodeCallback : public btNodeOverlapCallback public: spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts) : m_wuInput(wuInput), - m_lsMemPtr(lsMemPtr), - m_spuContacts(spuContacts) + m_spuContacts(spuContacts), + m_lsMemPtr(lsMemPtr) { } @@ -346,9 +348,7 @@ public: void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) { //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite - register int dmaSize; - register ppu_address_t dmaPpuAddress2; - + btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1]; //need the mesh interface, for access to triangle vertices dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape); @@ -559,8 +559,17 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(), wuInput->m_isSwapped); - SpuGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&vsSolver,penetrationSolver); - gjk.getClosestPoints(cpInput,spuContacts);//,debugDraw); + { + SpuGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&vsSolver,penetrationSolver); + gjk.getClosestPoints(cpInput,spuContacts);//,debugDraw); +#ifdef USE_SEPDISTANCE_UTIL + btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold(); + lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1); + lsMemPtr->needsDmaPutContactManifoldAlgo = true; +#endif //USE_SEPDISTANCE_UTIL + + } + } @@ -581,11 +590,11 @@ SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collis register ppu_address_t dmaPpuAddress2; dmaSize = sizeof(btCollisionObject);//btTransform); - dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.gSpuContactManifoldAlgo.getCollisionObject0(); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0(); cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); dmaSize = sizeof(btCollisionObject);//btTransform); - dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.gSpuContactManifoldAlgo.getCollisionObject1(); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1(); cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); @@ -601,8 +610,6 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas ppu_address_t collisionShape0Ptr, void* collisionShape0Loc, ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true) { - register int dmaSize; - register ppu_address_t dmaPpuAddress2; if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) @@ -904,31 +911,11 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; cellDmaGet(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //snPause(); - -#ifdef DEBUG_SPU_COLLISION_DETECTION - //spu_printf("SPU: manifoldPtr: %llx",collisionPairInput->m_persistentManifoldPtr); -#endif //DEBUG_SPU_COLLISION_DETECTION - - - /* - dmaSize = sizeof(btBroadphaseProxy); - dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy0; - //stallingUnalignedDmaSmallGet(lsMem.gProxyPtr0, dmaPpuAddress2 , dmaSize); - void* tmpPtr = cellDmaSmallGetReadOnly(&lsMem.bufferProxy0, dmaPpuAddress2 , dmaSize,DMA_TAG(1), 0, 0); - lsMem.gProxyPtr0 = (btBroadphaseProxy*) tmpPtr; - - dmaSize = sizeof(btBroadphaseProxy); - dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy1; - tmpPtr = cellDmaSmallGetReadOnly(&lsMem.bufferProxy1, dmaPpuAddress2 , dmaSize,DMA_TAG(1), 0, 0); - - lsMem.gProxyPtr1 = (btBroadphaseProxy*)tmpPtr; - */ - - cellDmaWaitTagStatusAll(DMA_MASK(1)); - collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.gSpuContactManifoldAlgo.getContactManifoldPtr(); + lsMem.needsDmaPutContactManifoldAlgo = false; + + collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr(); collisionPairInput.m_isSwapped = false; if (1) @@ -948,10 +935,10 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr; cellDmaGet(&lsMem.gPersistentManifold, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - collisionPairInput.m_shapeType0 = lsMem.gSpuContactManifoldAlgo.getShapeType0(); - collisionPairInput.m_shapeType1 = lsMem.gSpuContactManifoldAlgo.getShapeType1(); - collisionPairInput.m_collisionMargin0 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin0(); - collisionPairInput.m_collisionMargin1 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin1(); + collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0(); + collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1(); + collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); @@ -965,11 +952,18 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) // Get the collision objects dmaAndSetupCollisionObjects(collisionPairInput, lsMem); - //if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive()) + if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive()) { - bool boxbox = ((lsMem.gSpuContactManifoldAlgo.getShapeType0()==BOX_SHAPE_PROXYTYPE)&& - (lsMem.gSpuContactManifoldAlgo.getShapeType1()==BOX_SHAPE_PROXYTYPE)); - if (boxbox && !gUseEpa)//for now use gUseEpa for this toggle + + lsMem.needsDmaPutContactManifoldAlgo = true; +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1); +#endif //USE_SEPDISTANCE_UTIL + + + bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&& + (lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE)); + if (boxbox)// && !gUseEpa)//for now use gUseEpa for this toggle { //getVmVector3 //getBtVector3 @@ -977,73 +971,118 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) //getBtQuat //getVmMatrix3 - //getCollisionMargin0 - btScalar margin0 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin0(); - btScalar margin1 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin1(); - btVector3 shapeDim0 = lsMem.gSpuContactManifoldAlgo.getShapeDimensions0()+btVector3(margin0,margin0,margin0); - btVector3 shapeDim1 = lsMem.gSpuContactManifoldAlgo.getShapeDimensions1()+btVector3(margin1,margin1,margin1); + + //spu_printf("boxbox dist = %f\n",distance); + btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; + btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; + ppu_address_t manifoldAddress = (ppu_address_t)manifold; - Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); - Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); - Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()); - Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()); - Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()); + spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(), + lsMem.getColObj1()->getWorldTransform(), + lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(), + lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(), + collisionPairInput.m_isSwapped); - Transform3 transformA(vmMatrix0,vmPos0); - Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ()); - Transform3 transformB(vmMatrix1,vmPos1); - BoxPoint resultClosestBoxPointA; - BoxPoint resultClosestBoxPointB; - Vector3 resultNormal; - float distanceThreshold = gContactBreakingThreshold;//0.0f;//FLT_MAX;//use epsilon? - float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, - boxA, transformA, boxB,transformB,distanceThreshold); - - if(distance < distanceThreshold) + if (//!gUseEpa && +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f +#else + 1 +#endif + ) { - //spu_printf("boxbox dist = %f\n",distance); + + //getCollisionMargin0 + btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); + btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); + + Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); + Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); + Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()); + Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()); + Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()); + + Transform3 transformA(vmMatrix0,vmPos0); + Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ()); + Transform3 transformB(vmMatrix1,vmPos1); + BoxPoint resultClosestBoxPointA; + BoxPoint resultClosestBoxPointB; + Vector3 resultNormal; + float distanceThreshold = FLT_MAX;//0.0f;//FLT_MAX;//use epsilon? + + + float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold); + + btVector3 normalInB = -getBtVector3(resultNormal); + + if(distance < spuManifold->getContactBreakingThreshold()) + { + btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint)); + + spuContacts.addContactPoint( + normalInB, + pointOnB, + distance); + } + + lsMem.needsDmaPutContactManifoldAlgo = true; +#ifdef USE_SEPDISTANCE_UTIL + btScalar sepDist = distance+spuManifold->getContactBreakingThreshold(); + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1); +#endif //USE_SEPDISTANCE_UTIL + } + + spuContacts.flush(); + + + } else + { + if ( +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f +#else + 1 +#endif //USE_SEPDISTANCE_UTIL + ) + { + handleCollisionPair(collisionPairInput, lsMem, spuContacts, + (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, + (ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape); + } else + { + //spu_printf("boxbox dist = %f\n",distance); btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; ppu_address_t manifoldAddress = (ppu_address_t)manifold; - //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped); spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(), lsMem.getColObj1()->getWorldTransform(), lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(), lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(), collisionPairInput.m_isSwapped); - btVector3 normalInB = -getBtVector3(resultNormal); - - btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint)); - - //transform pointOnB to worldspace? - - spuContacts.addContactPoint( - normalInB, - pointOnB, - distance); - //normalInB, - //pointOnB+positionOffset, - //distance); - //SET_CONTACT_POINT(cp[0],distance,-testNormal, - // boxPointA,relTransformA,primIndexA, - // boxPointB,relTransformB,primIndexB); spuContacts.flush(); } - - } else - { - handleCollisionPair(collisionPairInput, lsMem, spuContacts, - (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, - (ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape); } + } } } +#ifdef USE_SEPDISTANCE_UTIL + if (lsMem.needsDmaPutContactManifoldAlgo) + { + dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm); + dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; + cellDmaLargePut(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } +#endif //#ifdef USE_SEPDISTANCE_UTIL + } } } diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp index 91b5a3419..87419f56d 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp @@ -38,6 +38,7 @@ int gSpuNumGjkChecks = 0; SpuGjkPairDetector::SpuGjkPairDetector(void* objectA,void* objectB,int shapeTypeA, int shapeTypeB, float marginA,float marginB,SpuVoronoiSimplexSolver* simplexSolver, const SpuConvexPenetrationDepthSolver* penetrationDepthSolver) :m_cachedSeparatingAxis(float(0.),float(0.),float(1.)), +m_cachedSeparatingDistance(0.f), m_penetrationDepthSolver(penetrationDepthSolver), m_simplexSolver(simplexSolver), m_minkowskiA(objectA), @@ -54,6 +55,8 @@ m_catchDegeneracies(1) void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuContactResult& output) { + m_cachedSeparatingDistance = 0.f; + btScalar distance=btScalar(0.); btVector3 normalInB(btScalar(0.),btScalar(0.),btScalar(0.)); btVector3 pointOnA,pointOnB; @@ -294,6 +297,8 @@ void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuC //spu_printf("distance\n"); #endif //__SPU__ + m_cachedSeparatingDistance = distance; + m_cachedSeparatingAxis = normalInB; output.addContactPoint( normalInB, diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h index 449c13946..934e2944d 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h @@ -34,6 +34,7 @@ class SpuGjkPairDetector btVector3 m_cachedSeparatingAxis; + btScalar m_cachedSeparatingDistance; const SpuConvexPenetrationDepthSolver* m_penetrationDepthSolver; SpuVoronoiSimplexSolver* m_simplexSolver; void* m_minkowskiA; @@ -74,6 +75,15 @@ public: m_cachedSeparatingAxis = seperatingAxis; } + const btVector3& getCachedSeparatingAxis() const + { + return m_cachedSeparatingAxis; + } + btScalar getCachedSeparatingDistance() const + { + return m_cachedSeparatingDistance; + } + void setPenetrationDepthSolver(SpuConvexPenetrationDepthSolver* penetrationDepthSolver) { m_penetrationDepthSolver = penetrationDepthSolver; diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp index 93dda4661..c34752b78 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp @@ -338,7 +338,7 @@ bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& } else { // could not seperate shapes - btAssert (false); + //btAssert (false); } return res.m_hasResult; }