diff --git a/src/BulletDynamics/ConstraintSolver/btSolverBody.h b/src/BulletDynamics/ConstraintSolver/btSolverBody.h index 81bf9c1cc..3681e4d11 100644 --- a/src/BulletDynamics/ConstraintSolver/btSolverBody.h +++ b/src/BulletDynamics/ConstraintSolver/btSolverBody.h @@ -29,16 +29,18 @@ ATTRIBUTE_ALIGNED16 (struct) btSolverBody { BT_DECLARE_ALIGNED_ALLOCATOR(); + btMatrix3x3 m_worldInvInertiaTensor; + btVector3 m_angularVelocity; + btVector3 m_linearVelocity; + btVector3 m_centerOfMassPosition; + btVector3 m_pushVelocity; + btVector3 m_turnVelocity; + float m_angularFactor; float m_invMass; float m_friction; btRigidBody* m_originalBody; - btVector3 m_linearVelocity; - btVector3 m_centerOfMassPosition; - - btVector3 m_pushVelocity; - btVector3 m_turnVelocity; SIMD_FORCE_INLINE void getVelocityInLocalPoint(const btVector3& rel_pos, btVector3& velocity ) const diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h index a526cd9f7..b119171af 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h @@ -17,9 +17,9 @@ subject to the following restrictions: #ifndef __BOX_H__ #define __BOX_H__ + #ifndef PE_REF -#define PE_REF(a) a -#define PE_REF(a) const a& +#define PE_REF(a) a& #endif #include diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp index e4f5f5cbf..6d9b503e1 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp @@ -1,4 +1,3 @@ - #include "SpuGatheringCollisionTask.h" //#define DEBUG_SPU_COLLISION_DETECTION 1 @@ -29,6 +28,7 @@ #include "boxBoxDistance.h" #include "BulletMultiThreaded/vectormath2bullet.h" #include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData +#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h" #ifdef __SPU__ ///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases @@ -959,15 +959,8 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&& (lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE)); - if (boxbox)// && !gUseEpa)//for now use gUseEpa for this toggle + if (boxbox) { - //getVmVector3 - //getBtVector3 - //getVmQuat - //getBtQuat - //getVmMatrix3 - - //spu_printf("boxbox dist = %f\n",distance); btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; @@ -988,41 +981,88 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) #endif ) { +#ifdef USE_PE_BOX_BOX - //getCollisionMargin0 - btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); - btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); - btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); - btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); + //getCollisionMargin0 + btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); + btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); - Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); - Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); - Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()); - Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()); - Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()); + Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); + Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); + Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()); + Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()); + Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()); - Transform3 transformA(vmMatrix0,vmPos0); - Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ()); - Transform3 transformB(vmMatrix1,vmPos1); - BoxPoint resultClosestBoxPointA; - BoxPoint resultClosestBoxPointB; - Vector3 resultNormal; - float distanceThreshold = FLT_MAX;//0.0f;//FLT_MAX;//use epsilon? + Transform3 transformA(vmMatrix0,vmPos0); + Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ()); + Transform3 transformB(vmMatrix1,vmPos1); + BoxPoint resultClosestBoxPointA; + BoxPoint resultClosestBoxPointB; + Vector3 resultNormal; + float distanceThreshold = FLT_MAX;//0.0f;//FLT_MAX;//use epsilon? - float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold); - - btVector3 normalInB = -getBtVector3(resultNormal); + float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold); + + btVector3 normalInB = -getBtVector3(resultNormal); - if(distance < spuManifold->getContactBreakingThreshold()) + if(distance < spuManifold->getContactBreakingThreshold()) + { + btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint)); + + spuContacts.addContactPoint( + normalInB, + pointOnB, + distance); + } + } +#else { - btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint)); - spuContacts.addContactPoint( - normalInB, - pointOnB, - distance); + btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); + btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); + + + btBoxShape box0(shapeDim0); + btBoxShape box1(shapeDim1); + + struct SpuBridgeContactCollector : public btDiscreteCollisionDetectorInterface::Result + { + SpuContactResult& m_spuContacts; + + virtual void setShapeIdentifiers(int partId0,int index0, int partId1,int index1) + { + m_spuContacts.setShapeIdentifiers(partId0,index0,partId1,index1); + } + virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) + { + m_spuContacts.addContactPoint(normalOnBInWorld,pointInWorld,depth); + } + + SpuBridgeContactCollector(SpuContactResult& spuContacts) + :m_spuContacts(spuContacts) + { + + } + }; + + SpuBridgeContactCollector bridgeOutput(spuContacts); + + btDiscreteCollisionDetectorInterface::ClosestPointInput input; + input.m_maximumDistanceSquared = 1e30f; + input.m_transformA = collisionPairInput.m_worldTransform0; + input.m_transformB = collisionPairInput.m_worldTransform1; + + btBoxBoxDetector detector(&box0,&box1); + + detector.getClosestPoints(input,bridgeOutput,0); + } +#endif //USE_PE_BOX_BOX lsMem.needsDmaPutContactManifoldAlgo = true; #ifdef USE_SEPDISTANCE_UTIL diff --git a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp index f70b85575..ad7fc2633 100644 --- a/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp +++ b/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp @@ -178,14 +178,11 @@ VertexBFaceATests( PE_REF(Vector3) faceOffsetBA, const Matrix3 & matrixAB, const Matrix3 & matrixBA, - PE_REF(Vector3) signsB1, - PE_REF(Vector3) scalesB1, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesB, bool first ) { - - Vector3 signsB = signsB1; - Vector3 scalesB = scalesB; - + float t0, t1; float distSqr; @@ -307,12 +304,10 @@ VertexAFaceBTests( PE_REF(Vector3) faceOffsetBA, const Matrix3 & matrixAB, const Matrix3 & matrixBA, - PE_REF(Vector3) signsA1, - PE_REF(Vector3) scalesA1, + PE_REF(Vector3) signsA, + PE_REF(Vector3) scalesA, bool first ) { - Vector3 signsA = signsA1; - Vector3 scalesA = scalesA1; float t0, t1; float distSqr; @@ -545,18 +540,13 @@ EdgeEdgeTests( PE_REF(Vector3) faceOffsetBA, const Matrix3 & matrixAB, const Matrix3 & matrixBA, - PE_REF(Vector3) signsA1, - PE_REF(Vector3) signsB1, - PE_REF(Vector3) scalesA1, - PE_REF(Vector3) scalesB1, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB, bool first ) { - Vector3 signsA=signsA1; - Vector3 signsB=signsB1; - Vector3 scalesA=scalesA1; - Vector3 scalesB=scalesB1; - float distSqr; float tA, tB; diff --git a/src/BulletMultiThreaded/SpuParallelSolver.cpp b/src/BulletMultiThreaded/SpuParallelSolver.cpp index 13a3e92c1..b4186b5c1 100644 --- a/src/BulletMultiThreaded/SpuParallelSolver.cpp +++ b/src/BulletMultiThreaded/SpuParallelSolver.cpp @@ -257,9 +257,9 @@ public: */ // Solver caches -btAlignedObjectArray solverBodyPool_persist; +btAlignedObjectArray solverBodyPool_persist; btAlignedObjectArray solverBodyOffsetList_persist; -btAlignedObjectArray solverInternalConstraintPool_persist; +btAlignedObjectArray solverInternalConstraintPool_persist; btAlignedObjectArray solverConstraintPool_persist; @@ -343,13 +343,13 @@ void btParallelSequentialImpulseSolver::allSolved (const btContactSolverInfo& in // Setup rigid bodies // Allocate temporary data solverBodyPool_persist.resize(numBodies + numManifolds + numConstraints); - SpuSolverBody* solverBodyPool = &solverBodyPool_persist[0]; + btSolverBody* solverBodyPool = &solverBodyPool_persist[0]; solverBodyOffsetList_persist.resize(numBodyOffsets); uint32_t* solverBodyOffsetList = &solverBodyOffsetList_persist[0]; solverInternalConstraintPool_persist.resize(m_numberOfContacts*3); - SpuSolverInternalConstraint* solverInternalConstraintPool = &solverInternalConstraintPool_persist[0]; + btSolverConstraint* solverInternalConstraintPool = &solverInternalConstraintPool_persist[0]; solverConstraintPool_persist.resize(numConstraints); SpuSolverConstraint* solverConstraintPool = &solverConstraintPool_persist[0]; @@ -446,7 +446,9 @@ void btParallelSequentialImpulseSolver::allSolved (const btContactSolverInfo& in m_taskScheduler.issueTask(); } - m_taskScheduler.flushTasks(); + m_taskScheduler.flushTasks(); + + } btAlignedFree((void*)spinVar); } @@ -481,6 +483,41 @@ void btParallelSequentialImpulseSolver::allSolved (const btContactSolverInfo& in } + + + { + BT_PROFILE("warmstart_writeback"); + + btSpinlock::SpinVariable* spinVar = (btSpinlock::SpinVariable*)btAlignedAlloc(sizeof(btSpinlock::SpinVariable), 128); + for (int iter = 0; iter < info.m_numIterations; ++iter) + { + btSpinlock lock (spinVar); + lock.Init(); + + // Clear the "processed cells" part of the hash + memcpy(m_solverHash->m_currentMask[0], emptyCellMask, sizeof(uint32_t)*SPU_HASH_NUMCELLDWORDS); + + for (int task = 0; task < m_taskScheduler.getMaxOutstandingTasks(); ++task) + { + SpuSolverTaskDesc* desc = m_taskScheduler.getTask(); + desc->m_solverCommand = CMD_SOLVER_MANIFOLD_WARMSTART_WRITEBACK; + desc->m_solverData.m_solverHash = m_solverHash; + desc->m_solverData.m_solverInternalConstraintList = solverInternalConstraintPool; + desc->m_solverData.m_solverConstraintList = solverConstraintPool; + desc->m_commandData.m_manifoldSetup.m_manifoldHolders = &m_sortedManifolds[0]; + desc->m_commandData.m_iterate.m_spinLockVar = spinVar; + + m_taskScheduler.issueTask(); + } + m_taskScheduler.flushTasks(); + } + btAlignedFree((void*)spinVar); + } + + + + + // Clean up m_sortedManifolds.resize(0); m_sortedConstraints.resize(0); diff --git a/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp b/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp index b29a7111c..2927812e8 100644 --- a/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp +++ b/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp @@ -37,22 +37,24 @@ Written by: Marten Svanfeldt //NOTE! When changing this, make sure the package sizes etc below are updated #define TEMP_STORAGE_SIZE (150*1024) -#define CONSTRAINT_MAX_SIZE (46*16) +#define CONSTRAINT_MAX_SIZE (60*16) struct SolverTask_LocalStoreMemory { ATTRIBUTE_ALIGNED16(SpuSolverHash m_localHash); // Data for temporary storage in situations where we just need very few - ATTRIBUTE_ALIGNED16(SpuSolverInternalConstraint m_tempInternalConstr[4]); + ATTRIBUTE_ALIGNED16(btSolverConstraint m_tempInternalConstr[4]); ATTRIBUTE_ALIGNED16(SpuSolverConstraint m_tempConstraint[1]); - ATTRIBUTE_ALIGNED16(SpuSolverBody m_tempSPUBodies[2]); + ATTRIBUTE_ALIGNED16(btSolverBody m_tempSPUBodies[2]); ATTRIBUTE_ALIGNED16(char m_tempRBs[2][sizeof(btRigidBody)]); ATTRIBUTE_ALIGNED16(char m_externalConstraint[CONSTRAINT_MAX_SIZE]); // The general temporary storage, "dynamically" allocated ATTRIBUTE_ALIGNED16(uint8_t m_temporaryStorage[TEMP_STORAGE_SIZE]); size_t m_temporaryStorageUsed; + + ATTRIBUTE_ALIGNED16(float m_appliedImpulse[4]); }; @@ -112,24 +114,24 @@ void freeTemporaryStorage (SolverTask_LocalStoreMemory* lsmem, void* ptr, size_t lsmem->m_temporaryStorageUsed -= size; } -SpuSolverBody* allocBodyStorage (SolverTask_LocalStoreMemory* lsmem, size_t numBodies) +btSolverBody* allocBodyStorage (SolverTask_LocalStoreMemory* lsmem, size_t numBodies) { - return static_cast (allocTemporaryStorage(lsmem, sizeof(SpuSolverBody)*numBodies)); + return static_cast (allocTemporaryStorage(lsmem, sizeof(btSolverBody)*numBodies)); } -void freeBodyStorage (SolverTask_LocalStoreMemory* lsmem, SpuSolverBody* ptr, size_t numBodies) +void freeBodyStorage (SolverTask_LocalStoreMemory* lsmem, btSolverBody* ptr, size_t numBodies) { - freeTemporaryStorage(lsmem, ptr, sizeof(SpuSolverBody)*numBodies); + freeTemporaryStorage(lsmem, ptr, sizeof(btSolverBody)*numBodies); } -SpuSolverInternalConstraint* allocInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, size_t numConstr) +btSolverConstraint* allocInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, size_t numConstr) { - return static_cast (allocTemporaryStorage(lsmem, sizeof(SpuSolverInternalConstraint)*numConstr)); + return static_cast (allocTemporaryStorage(lsmem, sizeof(btSolverConstraint)*numConstr)); } -void freeInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, SpuSolverInternalConstraint* ptr, size_t numConstr) +void freeInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, btSolverConstraint* ptr, size_t numConstr) { - freeTemporaryStorage(lsmem, ptr, sizeof(SpuSolverInternalConstraint)*numConstr); + freeTemporaryStorage(lsmem, ptr, sizeof(btSolverConstraint)*numConstr); } SpuSolverConstraint* allocConstraintStorage (SolverTask_LocalStoreMemory* lsmem, size_t numConstr) @@ -209,25 +211,39 @@ private: - +#include "BulletDynamics/ConstraintSolver/btSolverBody.h" //-- RB HANDLING -static void setupSpuBody (btRigidBody* rb, SpuSolverBody* spuBody) +static void setupSpuBody (btCollisionObject* collisionObject, btSolverBody* solverBody) { - spuBody->m_linearVelocity = rb->getLinearVelocity(); - spuBody->m_angularVelocity = rb->getAngularVelocity(); - spuBody->m_worldInvInertiaTensor = rb->getInvInertiaTensorWorld(); - spuBody->m_invertedMass = rb->getInvMass(); - spuBody->m_angularFactor = rb->getAngularFactor (); + btRigidBody* rb = btRigidBody::upcast(collisionObject); + if (rb) + { + solverBody->m_worldInvInertiaTensor = rb->getInvInertiaTensorWorld(); + solverBody->m_angularVelocity = rb->getAngularVelocity() ; + solverBody->m_centerOfMassPosition = collisionObject->getWorldTransform().getOrigin(); + solverBody->m_friction = collisionObject->getFriction(); + solverBody->m_invMass = rb->getInvMass(); + solverBody->m_linearVelocity = rb->getLinearVelocity(); + solverBody->m_originalBody = rb; + solverBody->m_angularFactor = rb->getAngularFactor(); + } else + { + solverBody->m_worldInvInertiaTensor.setIdentity(); + solverBody->m_angularVelocity.setValue(0,0,0); + solverBody->m_centerOfMassPosition = collisionObject->getWorldTransform().getOrigin(); + solverBody->m_friction = collisionObject->getFriction(); + solverBody->m_invMass = 0.f; + solverBody->m_linearVelocity.setValue(0,0,0); + solverBody->m_originalBody = 0; + solverBody->m_angularFactor = 1.f; + } + solverBody->m_pushVelocity.setValue(0.f,0.f,0.f); + solverBody->m_turnVelocity.setValue(0.f,0.f,0.f); } //-- RB HANDLING END - - - - - //-- HASH HANDLING static void writeTaskFlag(SpuSolverHash* hashRemote, uint32_t taskId, uint32_t* flags) { @@ -397,63 +413,58 @@ static int getNextFreeCell(SolverTask_LocalStoreMemory* localMemory, SpuSolverTa //-- HASH HANDLING END - - -//-- SOLVER METHODS -// Contact solve method -static void solveContact (SpuSolverInternalConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB) +btScalar solveContact(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& contactConstraint) { - float normalImpulse(0.f); + btScalar normalImpulse; { - if (constraint.m_penetration < 0.f) - return; - + // Optimized version of projected relative velocity, use precomputed cross products with normal // body1.getVelocityInLocalPoint(contactConstraint.m_rel_posA,vel1); // body2.getVelocityInLocalPoint(contactConstraint.m_rel_posB,vel2); // btVector3 vel = vel1 - vel2; - // float rel_vel = contactConstraint.m_contactNormal.dot(vel); + // btScalar rel_vel = contactConstraint.m_contactNormal.dot(vel); - float rel_vel; - float vel1Dotn = constraint.m_normal.dot(bodyA.m_linearVelocity) - + constraint.m_relpos1CrossNormal.dot(bodyA.m_angularVelocity); - float vel2Dotn = constraint.m_normal.dot(bodyB.m_linearVelocity) - + constraint.m_relpos2CrossNormal.dot(bodyB.m_angularVelocity); + btScalar rel_vel; + btScalar vel1Dotn = contactConstraint.m_contactNormal.dot(body1.m_linearVelocity) + + contactConstraint.m_relpos1CrossNormal.dot(body1.m_angularVelocity); + btScalar vel2Dotn = contactConstraint.m_contactNormal.dot(body2.m_linearVelocity) + + contactConstraint.m_relpos2CrossNormal.dot(body2.m_angularVelocity); rel_vel = vel1Dotn-vel2Dotn; + btScalar positionalError = 0.f; + positionalError = -contactConstraint.m_penetration;// * solverInfo.m_erp/solverInfo.m_timeStep; - float positionalError = constraint.m_penetration; - float velocityError = constraint.m_restitution - rel_vel;// * damping; - - float penetrationImpulse = positionalError * constraint.m_jacDiagABInv; - float velocityImpulse = velocityError * constraint.m_jacDiagABInv; - float normalImpulse = penetrationImpulse+velocityImpulse; + btScalar velocityError = contactConstraint.m_restitution - rel_vel;// * damping; + btScalar penetrationImpulse = positionalError * contactConstraint.m_jacDiagABInv; + btScalar velocityImpulse = velocityError * contactConstraint.m_jacDiagABInv; + normalImpulse = penetrationImpulse+velocityImpulse; + + // See Erin Catto's GDC 2006 paper: Clamp the accumulated impulse - float oldNormalImpulse = constraint.m_appliedImpulse; - float sum = oldNormalImpulse + normalImpulse; - constraint.m_appliedImpulse = float(0.) > sum ? float(0.): sum; + btScalar oldNormalImpulse = contactConstraint.m_appliedImpulse; + btScalar sum = oldNormalImpulse + normalImpulse; + contactConstraint.m_appliedImpulse = btScalar(0.) > sum ? btScalar(0.): sum; - normalImpulse = constraint.m_appliedImpulse - oldNormalImpulse; - - if (bodyA.m_invertedMass > 0) - { - bodyA.m_linearVelocity += constraint.m_normal*bodyA.m_invertedMass*normalImpulse; - bodyA.m_angularVelocity += bodyA.m_angularFactor * constraint.m_angularComponentA*normalImpulse; - } - if (bodyB.m_invertedMass > 0) - { - bodyB.m_linearVelocity -= constraint.m_normal*bodyB.m_invertedMass*normalImpulse; - bodyB.m_angularVelocity -= bodyB.m_angularFactor * constraint.m_angularComponentB*normalImpulse; - } + normalImpulse = contactConstraint.m_appliedImpulse - oldNormalImpulse; + body1.internalApplyImpulse(contactConstraint.m_contactNormal*body1.m_invMass, + contactConstraint.m_angularComponentA,normalImpulse); + + body2.internalApplyImpulse(contactConstraint.m_contactNormal*body2.m_invMass, + contactConstraint.m_angularComponentB,-normalImpulse); } + + return normalImpulse; } + + // Friction solve method -static void solveFriction (SpuSolverInternalConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB, btScalar normalImpulse) +static void solveFriction ( btSolverBody& bodyA, btSolverBody& bodyB, btSolverConstraint& constraint,btScalar normalImpulse) { + const btScalar combinedFriction = constraint.m_friction; const btScalar limit = normalImpulse * combinedFriction; @@ -465,9 +476,9 @@ static void solveFriction (SpuSolverInternalConstraint& constraint, SpuSolverBod { btScalar rel_vel; - const btScalar vel1Dotn = constraint.m_normal.dot(bodyA.m_linearVelocity) + const btScalar vel1Dotn = constraint.m_contactNormal.dot(bodyA.m_linearVelocity) + constraint.m_relpos1CrossNormal.dot(bodyA.m_angularVelocity); - const btScalar vel2Dotn = constraint.m_normal.dot(bodyB.m_linearVelocity) + const btScalar vel2Dotn = constraint.m_contactNormal.dot(bodyB.m_linearVelocity) + constraint.m_relpos2CrossNormal.dot(bodyB.m_angularVelocity); rel_vel = vel1Dotn-vel2Dotn; @@ -480,23 +491,24 @@ static void solveFriction (SpuSolverInternalConstraint& constraint, SpuSolverBod j1 = constraint.m_appliedImpulse - oldTangentImpulse; } - - if (bodyA.m_invertedMass > 0) + j1*=0.9; + if (bodyA.m_invMass > 0) { - bodyA.m_linearVelocity += constraint.m_normal*bodyA.m_invertedMass*j1; + bodyA.m_linearVelocity += constraint.m_contactNormal*bodyA.m_invMass*j1; bodyA.m_angularVelocity += bodyA.m_angularFactor * constraint.m_angularComponentA*j1; } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { - bodyB.m_linearVelocity -= constraint.m_normal*bodyB.m_invertedMass*j1; + bodyB.m_linearVelocity -= constraint.m_contactNormal*bodyB.m_invMass*j1; bodyB.m_angularVelocity -= bodyB.m_angularFactor * constraint.m_angularComponentB*j1; } } } + // Constraint solving -static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB) +static void solveConstraint (SpuSolverConstraint& constraint, btSolverBody& bodyA, btSolverBody& bodyB) { // All but D6 use worldspace normals, use same code if (constraint.m_flags.m_useLinear) @@ -527,14 +539,14 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impNormal = normal*impulse; // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { - bodyA.m_linearVelocity += impNormal*bodyA.m_invertedMass; + bodyA.m_linearVelocity += impNormal*bodyA.m_invMass; bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * (btVector3(constraint.m_relPos1).cross(impNormal))); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { - bodyB.m_linearVelocity -= impNormal*bodyB.m_invertedMass; + bodyB.m_linearVelocity -= impNormal*bodyB.m_invMass; bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * (btVector3(constraint.m_relPos2).cross(impNormal))); } @@ -572,11 +584,11 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impAxis = axis*impulse; // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * impAxis); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * impAxis); } @@ -602,11 +614,11 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impAxis = axis*impulse* (constraint.hinge.m_limitJacFactor/btFabs (constraint.hinge.m_limitJacFactor)); // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * impAxis); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * impAxis); } @@ -632,11 +644,11 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impAxis = axis*clampedImpulse; // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * impAxis); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * impAxis); } @@ -665,11 +677,11 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impAxis = axis*impulse; // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * impAxis); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * impAxis); } @@ -695,11 +707,11 @@ static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bod btVector3 impAxis = axis*impulse; // Apply - if (bodyA.m_invertedMass > 0) + if (bodyA.m_invMass > 0) { bodyA.m_angularVelocity += bodyA.m_angularFactor * (bodyA.m_worldInvInertiaTensor * impAxis); } - if (bodyB.m_invertedMass > 0) + if (bodyB.m_invMass > 0) { bodyB.m_angularVelocity -= bodyB.m_angularFactor * (bodyB.m_worldInvInertiaTensor * impAxis); } @@ -830,7 +842,7 @@ void processSolverTask(void* userPtr, void* lsMemory) btRigidBody** bodyPtrList = (btRigidBody**)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody*)); btRigidBody* bodyList = (btRigidBody*)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody)); - SpuSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); + btSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); while (bodiesToProcess > 0) @@ -859,7 +871,7 @@ void processSolverTask(void* userPtr, void* lsMemory) for ( b = 0; b < packageSize; ++b) { btRigidBody* localBody = bodyList+b; - SpuSolverBody* spuBody = spuBodyList + b; + btSolverBody* spuBody = spuBodyList + b; //Set it up solver body setupSpuBody(localBody, spuBody); @@ -878,7 +890,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // DMA the list of SPU bodies { - int dmaSize = sizeof(SpuSolverBody)*packageSize; + int dmaSize = sizeof(btSolverBody)*packageSize; uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + bodyPackageOffset); cellDmaLargePut(spuBodyList, dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); } @@ -978,43 +990,54 @@ void processSolverTask(void* userPtr, void* lsMemory) cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - btRigidBody* rb0 = (btRigidBody*)&localMemory->m_tempRBs[0]; - btRigidBody* rb1 = (btRigidBody*)&localMemory->m_tempRBs[1]; + btRigidBody* rb0readonly = (btRigidBody*)&localMemory->m_tempRBs[0]; + btRigidBody* rb1readonly = (btRigidBody*)&localMemory->m_tempRBs[1]; - if (rb0->getIslandTag() >= 0) + if (rb0readonly->getIslandTag() >= 0) { - solverBodyIdA = rb0->getCompanionId(); + solverBodyIdA = rb0readonly->getCompanionId(); + + ///DMA back bodyA (with applied impulse) + { + int dmaSize = sizeof(btSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); + cellDmaLargeGet(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } } else { //create a static body solverBodyIdA = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + hashCell.m_manifoldListOffset; - setupSpuBody(rb0, &localMemory->m_tempSPUBodies[0]); - { - int dmaSize = sizeof(SpuSolverBody); - uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); - cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - } + setupSpuBody(rb0readonly, &localMemory->m_tempSPUBodies[0]); } - if (rb1->getIslandTag() >= 0) + btSolverBody* solverBodyA = &localMemory->m_tempSPUBodies[0]; + + + + if (rb1readonly->getIslandTag() >= 0) { - solverBodyIdB = rb1->getCompanionId(); + solverBodyIdB = rb1readonly->getCompanionId(); + ///DMA back bodyB (with applied impulse) + { + int dmaSize = sizeof(btSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); + cellDmaLargeGet(&localMemory->m_tempSPUBodies[1], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } else { //create a static body solverBodyIdB = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + hashCell.m_manifoldListOffset; - setupSpuBody(rb1, &localMemory->m_tempSPUBodies[0]); - { - int dmaSize = sizeof(SpuSolverBody); - uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); - cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - } + setupSpuBody(rb1readonly, &localMemory->m_tempSPUBodies[1]); } + btSolverBody* solverBodyB = &localMemory->m_tempSPUBodies[1]; + + // Setup the pointer table int offsA = localRBs.insert(solverBodyIdA); int offsB = localRBs.insert(solverBodyIdB); @@ -1027,30 +1050,71 @@ void processSolverTask(void* userPtr, void* lsMemory) btVector3 pos1 = cp.getPositionWorldOnA(); btVector3 pos2 = cp.getPositionWorldOnB(); - btVector3 rel_pos1 = pos1 - rb0->getCenterOfMassPosition(); - btVector3 rel_pos2 = pos2 - rb1->getCenterOfMassPosition(); + btVector3 rel_pos1 = pos1 - rb0readonly->getCenterOfMassPosition(); + btVector3 rel_pos2 = pos2 - rb1readonly->getCenterOfMassPosition(); btScalar rel_vel; btVector3 vel; // De-penetration { - SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[0]; + btSolverConstraint& constraint = localMemory->m_tempInternalConstr[0]; - constraint.m_localOffsetBodyA = offsA; - constraint.m_localOffsetBodyB = offsB; + { + uint64_t dmaPpuAddress2 = reinterpret_cast (manifoldHolderList[m].m_manifold); + //btManifoldPoint + int index = offsetof(btManifoldPoint,m_appliedImpulse)+ c* sizeof(btManifoldPoint); + dmaPpuAddress2+=index; + constraint.m_originalContactPoint = (void*)dmaPpuAddress2; + } - constraint.m_normal = cp.m_normalWorldOnB; + + constraint.m_solverBodyIdA = offsA; + constraint.m_solverBodyIdB = offsB; + + constraint.m_contactNormal = cp.m_normalWorldOnB; { //can be optimized, the cross products are already calculated - constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, cp.m_normalWorldOnB); + //constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, cp.m_normalWorldOnB); } constraint.m_relpos1CrossNormal = rel_pos1.cross(cp.m_normalWorldOnB); constraint.m_relpos2CrossNormal = rel_pos2.cross(cp.m_normalWorldOnB); + btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB); + constraint.m_angularComponentA = rb0readonly->getInvInertiaTensorWorld()*torqueAxis0; + btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB); + constraint.m_angularComponentB = rb1readonly->getInvInertiaTensorWorld()*torqueAxis1; + + + { + btVector3 vec; + btScalar denom0 = 0.f; + btScalar denom1 = 0.f; + if (rb0readonly) + { + vec = ( constraint.m_angularComponentA).cross(rel_pos1); + denom0 = rb0readonly->getInvMass() + cp.m_normalWorldOnB.dot(vec); + } + if (rb1readonly) + { + vec = ( constraint.m_angularComponentB).cross(rel_pos2); + denom1 = rb1readonly->getInvMass() + cp.m_normalWorldOnB.dot(vec); + } + + btScalar denom = 1/(denom0+denom1); + constraint.m_jacDiagABInv = denom; + } + + + + + //btVector3 vel1 = rb0readonly->getVelocityInLocalPoint(rel_pos1); + //btVector3 vel2 = rb1readonly->getVelocityInLocalPoint(rel_pos2); + btVector3 vel1; + solverBodyA->getVelocityInLocalPoint(rel_pos1,vel1); + btVector3 vel2; + solverBodyB->getVelocityInLocalPoint(rel_pos2,vel2); - btVector3 vel1 = rb0->getVelocityInLocalPoint(rel_pos1); - btVector3 vel2 = rb1->getVelocityInLocalPoint(rel_pos2); vel = vel1 - vel2; rel_vel = cp.m_normalWorldOnB.dot(vel); @@ -1063,22 +1127,24 @@ void processSolverTask(void* userPtr, void* lsMemory) rest = 0.f; }; + + btScalar penVel = -constraint.m_penetration/taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_timeStep; - constraint.m_penetration *= - -(taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_erp/taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_timeStep); + + constraint.m_penetration *= (taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_erp/taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_timeStep); - if (rest > penVel) - { - constraint.m_penetration = btScalar(0.); - } constraint.m_restitution = rest; - constraint.m_appliedImpulse = 0.f; + constraint.m_appliedImpulse = cp.m_appliedImpulse*0.85; + if (constraint.m_appliedImpulse!= 0.f) + { + if (solverBodyA) + solverBodyA->internalApplyImpulse(constraint.m_contactNormal*rb0readonly->getInvMass(),constraint.m_angularComponentA,constraint.m_appliedImpulse); + if (solverBodyB) + solverBodyB->internalApplyImpulse(constraint.m_contactNormal*rb1readonly->getInvMass(),constraint.m_angularComponentB,-constraint.m_appliedImpulse); + } - btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB); - constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*torqueAxis0; - btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB); - constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*torqueAxis1; + } // Friction @@ -1091,66 +1157,69 @@ void processSolverTask(void* userPtr, void* lsMemory) { frictionTangential0a /= btSqrt(lat_rel_vel); frictionTangential1b = frictionTangential0a.cross(cp.m_normalWorldOnB); + frictionTangential1b.normalize(); } else { btPlaneSpace1(cp.m_normalWorldOnB,frictionTangential0a,frictionTangential1b); } + { - SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[1]; + btSolverConstraint& constraint = localMemory->m_tempInternalConstr[1]; + constraint.m_originalContactPoint = 0; - constraint.m_normal = frictionTangential0a; + constraint.m_contactNormal = frictionTangential0a; - constraint.m_localOffsetBodyA = offsA; - constraint.m_localOffsetBodyB = offsB; + constraint.m_solverBodyIdA = offsA; + constraint.m_solverBodyIdB = offsB; constraint.m_friction = cp.m_combinedFriction; - constraint.m_appliedImpulse = btScalar(0.); + constraint.m_appliedImpulse = 0;//cp.m_appliedImpulse;//btScalar(0.); - constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, constraint.m_normal); + constraint.m_jacDiagABInv = computeJacobianInverse (rb0readonly, rb1readonly, pos1, pos2, constraint.m_contactNormal); { - btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_normal); + btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_contactNormal); constraint.m_relpos1CrossNormal = ftorqueAxis0; - constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*ftorqueAxis0; + constraint.m_angularComponentA = rb0readonly->getInvInertiaTensorWorld()*ftorqueAxis0; } { - btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_normal); + btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_contactNormal); constraint.m_relpos2CrossNormal = ftorqueAxis0; - constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*ftorqueAxis0; + constraint.m_angularComponentB = rb1readonly->getInvInertiaTensorWorld()*ftorqueAxis0; } } { - SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[2]; + btSolverConstraint& constraint = localMemory->m_tempInternalConstr[2]; + constraint.m_originalContactPoint = 0; + constraint.m_contactNormal = frictionTangential1b; - constraint.m_normal = frictionTangential1b; - - constraint.m_localOffsetBodyA = offsA; - constraint.m_localOffsetBodyB = offsB; + constraint.m_solverBodyIdA = offsA; + constraint.m_solverBodyIdB = offsB; constraint.m_friction = cp.m_combinedFriction; constraint.m_appliedImpulse = btScalar(0.); - constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, constraint.m_normal); + constraint.m_jacDiagABInv = computeJacobianInverse (rb0readonly, rb1readonly, pos1, pos2, constraint.m_contactNormal); { - btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_normal); + btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_contactNormal); constraint.m_relpos1CrossNormal = ftorqueAxis0; - constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*ftorqueAxis0; + constraint.m_angularComponentA = rb0readonly->getInvInertiaTensorWorld()*ftorqueAxis0; } { - btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_normal); + btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_contactNormal); constraint.m_relpos2CrossNormal = ftorqueAxis0; - constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*ftorqueAxis0; + constraint.m_angularComponentB = rb1readonly->getInvInertiaTensorWorld()*ftorqueAxis0; } } // DMA the three constraints { - int dmaSize = sizeof(SpuSolverInternalConstraint)*3; + int dmaSize = sizeof(btSolverConstraint)*3; uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintIndex); cellDmaLargePut(&localMemory->m_tempInternalConstr, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -1159,9 +1228,28 @@ void processSolverTask(void* userPtr, void* lsMemory) constraintIndex += 3; } + if (1) + { + ///DMA back bodyA (with applied impulse) + { + int dmaSize = sizeof(btSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); + cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + ///DMA back bodyB (with applied impulse) + { + int dmaSize = sizeof(btSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); + cellDmaLargePut(&localMemory->m_tempSPUBodies[1], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } } + + manifoldsToProcess -= packageSize; manifoldPackageOffset += packageSize; } @@ -1194,6 +1282,7 @@ void processSolverTask(void* userPtr, void* lsMemory) { //int dmaSize = CONSTRAINT_MAX_SIZE; int dmaSize = getConstraintSize((btTypedConstraintType)constraintHolderList[c].m_constraintType); + btAssert(dmaSize (constraintHolderList[c].m_constraint); cellDmaLargeGet(constraintList + CONSTRAINT_MAX_SIZE*c, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); } @@ -1236,7 +1325,7 @@ void processSolverTask(void* userPtr, void* lsMemory) hashCell.m_constraintListOffset; setupSpuBody(rb0, &localMemory->m_tempSPUBodies[0]); { - int dmaSize = sizeof(SpuSolverBody); + int dmaSize = sizeof(btSolverBody); uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -1254,7 +1343,7 @@ void processSolverTask(void* userPtr, void* lsMemory) hashCell.m_constraintListOffset; setupSpuBody(rb1, &localMemory->m_tempSPUBodies[0]); { - int dmaSize = sizeof(SpuSolverBody); + int dmaSize = sizeof(btSolverBody); uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -1539,6 +1628,7 @@ void processSolverTask(void* userPtr, void* lsMemory) } } break; + case CMD_SOLVER_SOLVE_ITERATE: { // DMA the hash @@ -1569,7 +1659,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // Get the body list uint32_t* indexList = (uint32_t*)allocTemporaryStorage(localMemory, sizeof(uint32_t)*hashCell.m_numLocalBodies); - SpuSolverBody* bodyList = allocBodyStorage(localMemory, hashCell.m_numLocalBodies); + btSolverBody* bodyList = allocBodyStorage(localMemory, hashCell.m_numLocalBodies); int b; { int dmaSize = sizeof(uint32_t)*hashCell.m_numLocalBodies; @@ -1581,7 +1671,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // DMA the bodies for ( b = 0; b < hashCell.m_numLocalBodies; ++b) { - int dmaSize = sizeof(SpuSolverBody); + int dmaSize = sizeof(btSolverBody); uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + indexList[b]); cellDmaLargeGet(bodyList+b, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); } @@ -1613,8 +1703,8 @@ void processSolverTask(void* userPtr, void* lsMemory) for (size_t j = 0; j < packetSize; ++j) { SpuSolverConstraint& constraint = constraints[j]; - SpuSolverBody& bodyA = bodyList[constraint.m_localOffsetBodyA]; - SpuSolverBody& bodyB = bodyList[constraint.m_localOffsetBodyB]; + btSolverBody& bodyA = bodyList[constraint.m_localOffsetBodyA]; + btSolverBody& bodyB = bodyList[constraint.m_localOffsetBodyB]; solveConstraint(constraint, bodyA, bodyB); } @@ -1632,16 +1722,16 @@ void processSolverTask(void* userPtr, void* lsMemory) } freeConstraintStorage (localMemory, constraints, maxConstraintsPerPacket); - } + } // Now process the contacts if (hashCell.m_numContacts) { - const size_t maxContactsPerPacket = memTemporaryStorage(localMemory) / (sizeof(SpuSolverInternalConstraint)*3); + const size_t maxContactsPerPacket = memTemporaryStorage(localMemory) / (sizeof(btSolverConstraint)*3); size_t contactsToProcess = hashCell.m_numContacts; size_t constraintListOffset = hashCell.m_internalConstraintListOffset; - SpuSolverInternalConstraint* internalConstraints = allocInternalConstraintStorage(localMemory, maxContactsPerPacket*3); + btSolverConstraint* internalConstraints = allocInternalConstraintStorage(localMemory, maxContactsPerPacket*3); while (contactsToProcess > 0) { @@ -1649,7 +1739,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // DMA the constraints { - int dmaSize = sizeof(SpuSolverInternalConstraint)*packetSize*3; + int dmaSize = sizeof(btSolverConstraint)*packetSize*3; uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintListOffset); cellDmaLargeGet(internalConstraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); } @@ -1658,32 +1748,37 @@ void processSolverTask(void* userPtr, void* lsMemory) size_t j; // Solve - for ( j = 0; j < packetSize*3; j += 3) + { - SpuSolverInternalConstraint& contact = internalConstraints[j]; - SpuSolverBody& bodyA = bodyList[contact.m_localOffsetBodyA]; - SpuSolverBody& bodyB = bodyList[contact.m_localOffsetBodyB]; + for ( j = 0; j < packetSize*3; j += 3) + { + btSolverConstraint& contact = internalConstraints[j]; + btSolverBody& bodyA = bodyList[contact.m_solverBodyIdA]; + btSolverBody& bodyB = bodyList[contact.m_solverBodyIdB]; - solveContact(contact, bodyA, bodyB); + solveContact(bodyA, bodyB,contact); + } } - for ( j = 0; j < packetSize*3; j += 3) { - SpuSolverInternalConstraint& contact = internalConstraints[j]; - SpuSolverBody& bodyA = bodyList[contact.m_localOffsetBodyA]; - SpuSolverBody& bodyB = bodyList[contact.m_localOffsetBodyB]; + for ( j = 0; j < packetSize*3; j += 3) + { + btSolverConstraint& contact = internalConstraints[j]; + btSolverBody& bodyA = bodyList[contact.m_solverBodyIdA]; + btSolverBody& bodyB = bodyList[contact.m_solverBodyIdB]; - SpuSolverInternalConstraint& frictionConstraint1 = internalConstraints[j + 1]; - solveFriction(frictionConstraint1, bodyA, bodyB, contact.m_appliedImpulse); + btSolverConstraint& frictionConstraint1 = internalConstraints[j + 1]; + solveFriction(bodyA, bodyB, frictionConstraint1,contact.m_appliedImpulse); - SpuSolverInternalConstraint& frictionConstraint2 = internalConstraints[j + 2]; - solveFriction(frictionConstraint2, bodyA, bodyB, contact.m_appliedImpulse); + btSolverConstraint& frictionConstraint2 = internalConstraints[j + 2]; + solveFriction(bodyA, bodyB, frictionConstraint2,contact.m_appliedImpulse); + } } // Write back the constraints for accumulated stuff { - int dmaSize = sizeof(SpuSolverInternalConstraint)*packetSize*3; + int dmaSize = sizeof(btSolverConstraint)*packetSize*3; uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintListOffset); cellDmaLargePut(internalConstraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); } @@ -1700,7 +1795,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // DMA the bodies back to main memory for ( b = 0; b < hashCell.m_numLocalBodies; ++b) { - int dmaSize = sizeof(SpuSolverBody); + int dmaSize = sizeof(btSolverBody); uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + indexList[b]); cellDmaLargePut(bodyList + b, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); } @@ -1720,7 +1815,7 @@ void processSolverTask(void* userPtr, void* lsMemory) btRigidBody** bodyPtrList = (btRigidBody**)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody*)); btRigidBody* bodyList = (btRigidBody*)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody)); - SpuSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); + btSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); while (bodiesToProcess > 0) { @@ -1746,7 +1841,7 @@ void processSolverTask(void* userPtr, void* lsMemory) // DMA the list of SPU bodies { - int dmaSize = sizeof(SpuSolverBody)*packageSize; + int dmaSize = sizeof(btSolverBody)*packageSize; uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + bodyPackageOffset); cellDmaLargeGet(spuBodyList, dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); } @@ -1756,9 +1851,9 @@ void processSolverTask(void* userPtr, void* lsMemory) for ( b = 0; b < packageSize; ++b) { btRigidBody* localBody = bodyList + b; - SpuSolverBody* solverBody = spuBodyList + b; + btSolverBody* solverBody = spuBodyList + b; - if (solverBody->m_invertedMass > 0) + if (solverBody->m_invMass > 0) { localBody->setLinearVelocity(solverBody->m_linearVelocity); localBody->setAngularVelocity(solverBody->m_angularVelocity); @@ -1782,6 +1877,86 @@ void processSolverTask(void* userPtr, void* lsMemory) } break; + + case CMD_SOLVER_MANIFOLD_WARMSTART_WRITEBACK: + { + // DMA the hash + { + int dmaSize = sizeof(SpuSolverHash); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + cellDmaLargeGet(&localMemory->m_localHash, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + btSpinlock hashLock (taskDesc.m_commandData.m_iterate.m_spinLockVar); + + int cellToProcess; + while (1) + { + cellToProcess = getNextFreeCell(localMemory, taskDesc, hashLock); + + if (cellToProcess >= SPU_HASH_NUMCELLS) + break; + + // Now process that one cell + SpuSolverHashCell& hashCell = localMemory->m_localHash.m_Hash[cellToProcess]; + + if (hashCell.m_numContacts == 0 && hashCell.m_numConstraints == 0) + continue; + + // Now process the contacts + if (hashCell.m_numContacts) + { + const size_t maxContactsPerPacket = memTemporaryStorage(localMemory) / (sizeof(btSolverConstraint)*3); + size_t contactsToProcess = hashCell.m_numContacts; + size_t constraintListOffset = hashCell.m_internalConstraintListOffset; + + btSolverConstraint* internalConstraints = allocInternalConstraintStorage(localMemory, maxContactsPerPacket*3); + + while (contactsToProcess > 0) + { + size_t packetSize = contactsToProcess > maxContactsPerPacket ? maxContactsPerPacket : contactsToProcess; + + // DMA the constraints + { + int dmaSize = sizeof(btSolverConstraint)*packetSize*3; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintListOffset); + cellDmaLargeGet(internalConstraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + int j; + for ( j = 0; j < packetSize*3; j += 3) + { + btSolverConstraint& contact = internalConstraints[j]; + { + //DMA in + uint64_t dmaPpuAddress2 = reinterpret_cast (contact.m_originalContactPoint); + int dmasize = 4*sizeof(float); + float* tmpMem = &localMemory->m_appliedImpulse[0]; + cellDmaGet(tmpMem,dmaPpuAddress2,dmasize,DMA_TAG(1),0,0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + *tmpMem = btMin(btScalar(3.),contact.m_appliedImpulse); + + ///DMA out + cellDmaLargePut(tmpMem,dmaPpuAddress2,dmasize,DMA_TAG(1),0,0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + constraintListOffset += packetSize*3; + contactsToProcess -= packetSize; + } + + freeInternalConstraintStorage (localMemory, internalConstraints, maxContactsPerPacket*3); + } + + + } + } + break; + default: //.. nothing ; diff --git a/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h b/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h index 218d5016c..e853f1488 100644 --- a/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h +++ b/src/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h @@ -26,7 +26,8 @@ Written by: Marten Svanfeldt #include "../SpuSync.h" #include "BulletDynamics/ConstraintSolver/btTypedConstraint.h" #include "LinearMath/btAlignedAllocator.h" - +#include "BulletDynamics/ConstraintSolver/btSolverBody.h" +#include "BulletDynamics/ConstraintSolver/btSolverConstraint.h" ATTRIBUTE_ALIGNED16(struct) ManifoldCellHolder { @@ -61,10 +62,11 @@ enum enum { CMD_SOLVER_SETUP_BODIES = 1, - CMD_SOLVER_MANIFOLD_SETUP = 2, - CMD_SOLVER_CONSTRAINT_SETUP = 3, - CMD_SOLVER_SOLVE_ITERATE = 4, - CMD_SOLVER_COPYBACK_BODIES = 5 + CMD_SOLVER_MANIFOLD_SETUP, + CMD_SOLVER_CONSTRAINT_SETUP, + CMD_SOLVER_SOLVE_ITERATE, + CMD_SOLVER_COPYBACK_BODIES, + CMD_SOLVER_MANIFOLD_WARMSTART_WRITEBACK }; struct SpuSolverHashCell @@ -106,39 +108,9 @@ inline unsigned int spuGetHashCellIndex(int x, int y, int z) } -ATTRIBUTE_ALIGNED16(struct) SpuSolverBody -{ - BT_DECLARE_ALIGNED_ALLOCATOR(); - btVector3 m_linearVelocity; - btVector3 m_angularVelocity; - btMatrix3x3 m_worldInvInertiaTensor; - btScalar m_angularFactor; - btScalar m_invertedMass; -}; -ATTRIBUTE_ALIGNED16(struct) SpuSolverInternalConstraint -{ - BT_DECLARE_ALIGNED_ALLOCATOR(); - - uint32_t m_localOffsetBodyA; - uint32_t m_localOffsetBodyB; - - btScalar m_appliedImpulse; - - btScalar m_friction; - btScalar m_restitution; - btScalar m_jacDiagABInv; - btScalar m_penetration; - - btVector3 m_normal; - - btVector3 m_relpos1CrossNormal; - btVector3 m_relpos2CrossNormal; - btVector3 m_angularComponentA; - btVector3 m_angularComponentB; -}; ATTRIBUTE_ALIGNED16(struct) SpuSolverConstraint @@ -147,7 +119,6 @@ ATTRIBUTE_ALIGNED16(struct) SpuSolverConstraint uint16_t m_localOffsetBodyA; uint16_t m_localOffsetBodyB; - uint16_t m_constraintType; struct { @@ -217,8 +188,8 @@ ATTRIBUTE_ALIGNED16(struct) SpuSolverDataDesc BT_DECLARE_ALIGNED_ALLOCATOR(); SpuSolverHash* m_solverHash; - SpuSolverBody* m_solverBodyList; - SpuSolverInternalConstraint* m_solverInternalConstraintList; + btSolverBody* m_solverBodyList; + btSolverConstraint* m_solverInternalConstraintList; SpuSolverConstraint* m_solverConstraintList; uint32_t* m_solverBodyOffsetList; };