diff --git a/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp new file mode 100644 index 000000000..77f452b45 --- /dev/null +++ b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp @@ -0,0 +1,1781 @@ +/* +Bullet Continuous Collision Detection and Physics Library - Parallel solver +Copyright (c) 2007 Starbreeze Studios + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +Written by: Marten Svanfeldt +*/ + +#define IN_PARALLELL_SOLVER 1 + +#include "SpuParallellSolverTask.h" +#include "BulletDynamics/Dynamics/btRigidBody.h" +#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h" +#include "../PlatformDefinitions.h" +#include "../SpuFakeDma.h" +#include "LinearMath/btMinMax.h" + +// To setup constraints +#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h" +#include "BulletDynamics/ConstraintSolver/btHingeConstraint.h" +#include "BulletDynamics/ConstraintSolver/btConeTwistConstraint.h" +#include "BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h" + +#ifndef offsetof +#define offsetof(s,m) (size_t)&reinterpret_cast((((s *)0)->m)) +#endif + +//NOTE! When changing this, make sure the package sizes etc below are updated +#define TEMP_STORAGE_SIZE (150*1024) +#define CONSTRAINT_MAX_SIZE (46*16) + +struct SolverTask_LocalStoreMemory +{ + ATTRIBUTE_ALIGNED16(SpuSolverHash m_localHash); + + // Data for temporary storage in situations where we just need very few + ATTRIBUTE_ALIGNED16(SpuSolverInternalConstraint m_tempInternalConstr[4]); + ATTRIBUTE_ALIGNED16(SpuSolverConstraint m_tempConstraint[1]); + ATTRIBUTE_ALIGNED16(SpuSolverBody m_tempSPUBodies[2]); + ATTRIBUTE_ALIGNED16(char m_tempRBs[2][sizeof(btRigidBody)]); + ATTRIBUTE_ALIGNED16(char m_externalConstraint[CONSTRAINT_MAX_SIZE]); + + // The general temporary storage, "dynamically" allocated + ATTRIBUTE_ALIGNED16(uint8_t m_temporaryStorage[TEMP_STORAGE_SIZE]); + size_t m_temporaryStorageUsed; +}; + + +#ifdef WIN32 +void* createSolverLocalStoreMemory() +{ + return new SolverTask_LocalStoreMemory; +}; + + +#elif defined(__CELLOS_LV2__) + +ATTRIBUTE_ALIGNED16(SolverTask_LocalStoreMemory gLocalStoreMemory); + +void* createSolverLocalStoreMemory() +{ + return &gLocalStoreMemory; +} +#endif + + + + + + +//-- MEMORY MANAGEMENT HELPERS +size_t memTemporaryStorage (SolverTask_LocalStoreMemory* lsmem) +{ + return TEMP_STORAGE_SIZE - lsmem->m_temporaryStorageUsed; +} + +void setupTemporaryStorage (SolverTask_LocalStoreMemory* lsmem) +{ + lsmem->m_temporaryStorageUsed = 0; +} + +void* allocTemporaryStorage (SolverTask_LocalStoreMemory* lsmem, size_t size) +{ + // Align size to even 16-byte interval to make it DMA-able + size = (size+0xf)&~0xf; + + //btAssert(lsmem->m_temporaryStorageUsed + size <= TEMP_STORAGE_SIZE); + + void *res = &lsmem->m_temporaryStorage[lsmem->m_temporaryStorageUsed]; + lsmem->m_temporaryStorageUsed += size; + return res; +} + +void freeTemporaryStorage (SolverTask_LocalStoreMemory* lsmem, void* ptr, size_t size) +{ + // Align size to even 16-byte interval to make it DMA-able + size = (size+0xf)&~0xf; + + // Only works if we free the last gotten allocation + //btAssert(&lsmem->m_temporaryStorage[lsmem->m_temporaryStorageUsed - size] == ptr); + + lsmem->m_temporaryStorageUsed -= size; +} + +SpuSolverBody* allocBodyStorage (SolverTask_LocalStoreMemory* lsmem, size_t numBodies) +{ + return static_cast (allocTemporaryStorage(lsmem, sizeof(SpuSolverBody)*numBodies)); +} + +void freeBodyStorage (SolverTask_LocalStoreMemory* lsmem, SpuSolverBody* ptr, size_t numBodies) +{ + freeTemporaryStorage(lsmem, ptr, sizeof(SpuSolverBody)*numBodies); +} + +SpuSolverInternalConstraint* allocInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, size_t numConstr) +{ + return static_cast (allocTemporaryStorage(lsmem, sizeof(SpuSolverInternalConstraint)*numConstr)); +} + +void freeInternalConstraintStorage (SolverTask_LocalStoreMemory* lsmem, SpuSolverInternalConstraint* ptr, size_t numConstr) +{ + freeTemporaryStorage(lsmem, ptr, sizeof(SpuSolverInternalConstraint)*numConstr); +} + +SpuSolverConstraint* allocConstraintStorage (SolverTask_LocalStoreMemory* lsmem, size_t numConstr) +{ + return static_cast (allocTemporaryStorage(lsmem, sizeof(SpuSolverConstraint)*numConstr)); +} + +void freeConstraintStorage (SolverTask_LocalStoreMemory* lsmem, SpuSolverConstraint* ptr, size_t numConstr) +{ + freeTemporaryStorage(lsmem, ptr, sizeof(SpuSolverConstraint)*numConstr); +} +//-- MEMORY MANAGEMENT HELPERS END + + + + + + + + + + +//-- INDEX SET HELPER +class SpuIndexSet +{ +public: + + SIMD_FORCE_INLINE SpuIndexSet (uint32_t* a) + : m_backingArray (a), m_size (0) + {} + + SIMD_FORCE_INLINE int insert (uint32_t data) + { + int pos = 0; + for (pos = 0; pos < m_size; ++pos) + { + if (m_backingArray[pos] == data) + { + return pos; + } + } + //btAssert(m_size < SPU_MAX_BODIES_PER_CELL); + + m_backingArray[m_size] = data; + return m_size++; + } + + SIMD_FORCE_INLINE void clear () + { + m_size = 0; + } + + SIMD_FORCE_INLINE const uint32_t& operator[](int n) const + { + return m_backingArray[n]; + } + + SIMD_FORCE_INLINE uint32_t& operator[](int n) + { + return m_backingArray[n]; + } + + SIMD_FORCE_INLINE int size() const + { // return length of sequence + return m_size; + } + +private: + uint32_t* m_backingArray; + int m_size; +}; +//-- INDEX SET HELPER END + + + + + + + + + +//-- RB HANDLING +static void setupSpuBody (btRigidBody* rb, SpuSolverBody* spuBody) +{ + spuBody->m_linearVelocity = rb->getLinearVelocity(); + spuBody->m_angularVelocity = rb->getAngularVelocity(); + spuBody->m_worldInvInertiaTensor = rb->getInvInertiaTensorWorld(); + spuBody->m_invertedMass = rb->getInvMass(); +} +//-- RB HANDLING END + + + + + + + +//-- HASH HANDLING +static void writeTaskFlag(SpuSolverHash* hashRemote, uint32_t taskId, uint32_t* flags) +{ + int dmaSize = sizeof(uint32_t)*SPU_HASH_NUMCELLDWORDS; + uint64_t dmaPpuAddress2 = reinterpret_cast (hashRemote); + dmaPpuAddress2 += offsetof(SpuSolverHash, m_currentMask); + dmaPpuAddress2 += sizeof(uint32_t) * SPU_HASH_NUMCELLDWORDS * (taskId + 1); + cellDmaLargePut(flags, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + +} + +static void updateLocalMask(SolverTask_LocalStoreMemory* localMemory, SpuSolverTaskDesc& taskDesc) +{ + int dmaSize = sizeof(uint32_t)*(SPU_MAX_SPUS+1)*SPU_HASH_NUMCELLDWORDS; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + dmaPpuAddress2 += offsetof(SpuSolverHash, m_currentMask); + + cellDmaLargeGet(&localMemory->m_localHash.m_currentMask, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); +} + +static unsigned int getZeroIndex(unsigned int start, uint32_t* mask, uint32_t* finished, unsigned int numRegs) +{ + // Find the index of some zero within mask|finished + unsigned int index = start; + + int reg = (start >> 5); + { + unsigned int bit = 1 << (start & 31); + + uint32_t combinedMask = mask[reg] | finished[reg]; + for (int bitCnt = (start & 31); bitCnt < SPU_HASH_WORDWIDTH; ++bitCnt, ++index, bit <<= 1) + { + if ((combinedMask & bit) == 0) + { + return index; + } + } + + reg++; + } + + for (; reg < numRegs; ++reg) + { + unsigned int bit = 1; + uint32_t combinedMask = mask[reg] | finished[reg]; + + for (int bitCnt = 0; bitCnt < SPU_HASH_WORDWIDTH; ++bitCnt, ++index, bit <<= 1) + { + if ((combinedMask & bit) == 0) + { + return index; + } + } + } + + return SPU_HASH_NUMCELLS; +} + +static bool isAllOne (uint32_t* mask, unsigned int numRegs) +{ + uint32_t totalMask = ~0; + for (int reg = 0; reg < numRegs; ++reg) + { + totalMask &= mask[reg]; + } + + return totalMask == ~0; +} + +static bool checkDependency(unsigned int tryIndex, uint32_t* mask, uint32_t matrix[SPU_HASH_NUMCELLS][SPU_HASH_NUMCELLDWORDS], unsigned int numRegs) +{ + for (int reg = 0; reg < numRegs; ++reg) + { + if ((mask[reg] & matrix[tryIndex][reg]) != 0) + { + //Dependency conflict, no-go + return false; + } + } + + return true; +} + +static unsigned int getNextFreeCell(SolverTask_LocalStoreMemory* localMemory, SpuSolverTaskDesc& taskDesc, btSpinlock& lock) +{ + unsigned int cellIndex = SPU_HASH_NUMCELLS; + + uint32_t myMask[SPU_HASH_NUMCELLDWORDS] = {0}; + + writeTaskFlag(taskDesc.m_solverData.m_solverHash, taskDesc.m_taskId, myMask); + SpuSolverHash* hash = &localMemory->m_localHash; + + // locking + lock.Lock(); + + bool stopLoop = false; + while (!stopLoop) + { + + // Try to find a free cell + uint32_t tmpMask[SPU_HASH_NUMCELLDWORDS] = {0}; + + updateLocalMask(localMemory, taskDesc); + + + // Or together the masks of finished cells and all currently locked cells + for (int row = 1; row <= SPU_MAX_SPUS; ++row) + { + for (int reg = 0; reg < SPU_HASH_NUMCELLDWORDS; ++reg) + { + tmpMask[reg] |= hash->m_currentMask[row][reg]; + } + } + + // Find first zero, starting with offset + unsigned int tryIndex; + unsigned int start = 0; + bool haveTry = false; + while (!haveTry) + { + tryIndex = getZeroIndex(start, tmpMask, hash->m_currentMask[0], SPU_HASH_NUMCELLDWORDS); + + if (tryIndex >= SPU_HASH_NUMCELLS) + break; + + haveTry = checkDependency(tryIndex, tmpMask, hash->m_dependencyMatrix, SPU_HASH_NUMCELLDWORDS); + start = tryIndex+1; + } + + if (tryIndex < SPU_HASH_NUMCELLS) + { + // If we get here there is no dependency conflict, so lets use it + cellIndex = tryIndex; + writeTaskFlag(taskDesc.m_solverData.m_solverHash, taskDesc.m_taskId, hash->m_dependencyMatrix[cellIndex]); + + hash->m_currentMask[0][cellIndex >> 5] |= (1 << (cellIndex & 31)); + + { + int dmaSize = sizeof(uint32_t)*SPU_HASH_NUMCELLDWORDS; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + dmaPpuAddress2 += offsetof(SpuSolverHash, m_currentMask); + + cellDmaLargePut(&hash->m_currentMask, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + stopLoop = true; + } + + // Check if there are at all any cells left + if (isAllOne (hash->m_currentMask[0], SPU_HASH_NUMCELLDWORDS)) + { + //lock.Unlock(); + break; + } + + } + + // unlock + lock.Unlock(); + + + return cellIndex; +} +//-- HASH HANDLING END + + + + +//-- SOLVER METHODS +// Contact solve method +static void solveContact (SpuSolverInternalConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB) +{ + float normalImpulse(0.f); + { + if (constraint.m_penetration < 0.f) + return; + + // Optimized version of projected relative velocity, use precomputed cross products with normal + // body1.getVelocityInLocalPoint(contactConstraint.m_rel_posA,vel1); + // body2.getVelocityInLocalPoint(contactConstraint.m_rel_posB,vel2); + // btVector3 vel = vel1 - vel2; + // float rel_vel = contactConstraint.m_contactNormal.dot(vel); + + float rel_vel; + float vel1Dotn = constraint.m_normal.dot(bodyA.m_linearVelocity) + + constraint.m_relpos1CrossNormal.dot(bodyA.m_angularVelocity); + float vel2Dotn = constraint.m_normal.dot(bodyB.m_linearVelocity) + + constraint.m_relpos2CrossNormal.dot(bodyB.m_angularVelocity); + + rel_vel = vel1Dotn-vel2Dotn; + + + float positionalError = constraint.m_penetration; + float velocityError = constraint.m_restitution - rel_vel;// * damping; + + float penetrationImpulse = positionalError * constraint.m_jacDiagABInv; + float velocityImpulse = velocityError * constraint.m_jacDiagABInv; + float normalImpulse = penetrationImpulse+velocityImpulse; + + // See Erin Catto's GDC 2006 paper: Clamp the accumulated impulse + float oldNormalImpulse = constraint.m_appliedImpulse; + float sum = oldNormalImpulse + normalImpulse; + constraint.m_appliedImpulse = float(0.) > sum ? float(0.): sum; + + float oldVelocityImpulse = constraint.m_appliedVelocityImpulse; + float velocitySum = oldVelocityImpulse + velocityImpulse; + constraint.m_appliedVelocityImpulse = float(0.) > velocitySum ? float(0.): velocitySum; + + normalImpulse = constraint.m_appliedImpulse - oldNormalImpulse; + + if (bodyA.m_invertedMass > 0) + { + bodyA.m_linearVelocity += constraint.m_normal*bodyA.m_invertedMass*normalImpulse; + bodyA.m_angularVelocity += constraint.m_angularComponentA*normalImpulse; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_linearVelocity -= constraint.m_normal*bodyB.m_invertedMass*normalImpulse; + bodyB.m_angularVelocity -= constraint.m_angularComponentB*normalImpulse; + } + + } +} + +// Friction solve method +static void solveFriction (SpuSolverInternalConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB, btScalar normalImpulse) +{ + const btScalar combinedFriction = constraint.m_friction; + + const btScalar limit = normalImpulse * combinedFriction; + + if (normalImpulse>btScalar(0.)) + { + + btScalar j1; + { + + btScalar rel_vel; + const btScalar vel1Dotn = constraint.m_normal.dot(bodyA.m_linearVelocity) + + constraint.m_relpos1CrossNormal.dot(bodyA.m_angularVelocity); + const btScalar vel2Dotn = constraint.m_normal.dot(bodyB.m_linearVelocity) + + constraint.m_relpos2CrossNormal.dot(bodyB.m_angularVelocity); + rel_vel = vel1Dotn-vel2Dotn; + + // calculate j that moves us to zero relative velocity + j1 = -rel_vel * constraint.m_jacDiagABInv; + btScalar oldTangentImpulse = constraint.m_appliedImpulse; + constraint.m_appliedImpulse = oldTangentImpulse + j1; + GEN_set_min(constraint.m_appliedImpulse, limit); + GEN_set_max(constraint.m_appliedImpulse, -limit); + j1 = constraint.m_appliedImpulse - oldTangentImpulse; + + } + + if (bodyA.m_invertedMass > 0) + { + bodyA.m_linearVelocity += constraint.m_normal*bodyA.m_invertedMass*j1; + bodyA.m_angularVelocity += constraint.m_angularComponentA*j1; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_linearVelocity -= constraint.m_normal*bodyB.m_invertedMass*j1; + bodyB.m_angularVelocity -= constraint.m_angularComponentB*j1; + } + + } +} + +// Constraint solving +static void solveConstraint (SpuSolverConstraint& constraint, SpuSolverBody& bodyA, SpuSolverBody& bodyB) +{ + // All but D6 use worldspace normals, use same code + if (constraint.m_flags.m_useLinear) + { + if (constraint.m_constraintType == POINT2POINT_CONSTRAINT_TYPE || + constraint.m_constraintType == HINGE_CONSTRAINT_TYPE || + constraint.m_constraintType == CONETWIST_CONSTRAINT_TYPE) + { + btVector3 normal (0,0,0); + + const btVector3& bias (constraint.m_linearBias); + const btVector3& jacInv (constraint.m_jacdiagABInv); + + for (int i = 0; i < 3; ++i) + { + normal[i] = 1; + + // Compute relative velocity + btVector3 vel1 = bodyA.m_linearVelocity + bodyA.m_angularVelocity.cross(constraint.m_relPos1); + btVector3 vel2 = bodyB.m_linearVelocity + bodyB.m_angularVelocity.cross(constraint.m_relPos2); + btVector3 vel = vel1 - vel2; + + float relVelNormal = normal.dot(vel); + + // Compute impulse + float impulse = (bias[i] - relVelNormal) * jacInv[i]; + + btVector3 impNormal = normal*impulse; + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_linearVelocity += impNormal*bodyA.m_invertedMass; + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * (btVector3(constraint.m_relPos1).cross(impNormal)); + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_linearVelocity -= impNormal*bodyB.m_invertedMass; + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * (btVector3(constraint.m_relPos2).cross(impNormal)); + } + + normal[i] = 0; + } + } + else + { + //D6 + } + + } + + switch (constraint.m_constraintType) + { + case POINT2POINT_CONSTRAINT_TYPE: + break; // Nothing special to do + case HINGE_CONSTRAINT_TYPE: + { + // Angular solving for the two first axes + const btVector3& bias (constraint.hinge.m_angularBias); + const btVector3& jacInv (constraint.hinge.m_angJacdiagABInv); + + for (int i = 0; i < 2; ++i) + { + const btVector3& axis (constraint.hinge.m_frameAinW[i]); + + // Compute relative velocity + btVector3 relVel = bodyA.m_angularVelocity - bodyB.m_angularVelocity; + + float relVelAxis = axis.dot(relVel); + + // Compute impulse + float impulse = (bias[i] - relVelAxis) * jacInv[i]; + btVector3 impAxis = axis*impulse; + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * impAxis; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * impAxis; + } + } + + // Limit + if (constraint.m_flags.m_limit1) + { + const btVector3& axis (constraint.hinge.m_frameAinW[2]); + + // Compute relative velocity + btVector3 relVel = bodyA.m_angularVelocity - bodyB.m_angularVelocity; + float relVelAxis = axis.dot(relVel); + + // Compute impulse + float impulse = (bias[2] - relVelAxis) * jacInv[2] * constraint.hinge.m_limitJacFactor; + + // Clamp it + float temp = constraint.hinge.m_limitAccumulatedImpulse; + constraint.hinge.m_limitAccumulatedImpulse = btMax (constraint.hinge.m_limitAccumulatedImpulse + impulse, 0.0f); + impulse = constraint.hinge.m_limitAccumulatedImpulse - temp; + + btVector3 impAxis = axis*impulse* (constraint.hinge.m_limitJacFactor/btFabs (constraint.hinge.m_limitJacFactor)); + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * impAxis; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * impAxis; + } + } + + // Motor + if (constraint.m_flags.m_motor1) + { + const btVector3& axis (constraint.hinge.m_frameAinW[2]); + + // Compute relative velocity + btVector3 relVel = bodyA.m_angularVelocity - bodyB.m_angularVelocity; + float relVelAxis = axis.dot(relVel); + + // Compute impulse + float impulse = (constraint.hinge.m_motorVelocity - relVelAxis) * jacInv[2]; + + // Clamp it + float clampedImpulse = impulse > constraint.hinge.m_motorImpulse ? constraint.hinge.m_motorImpulse : impulse; + clampedImpulse = impulse < -constraint.hinge.m_motorImpulse ? -constraint.hinge.m_motorImpulse : clampedImpulse; + + + btVector3 impAxis = axis*clampedImpulse; + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * impAxis; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * impAxis; + } + } + } + break; + case CONETWIST_CONSTRAINT_TYPE: + { + // Swing + if (constraint.m_flags.m_limit1) + { + const btVector3& axis (constraint.conetwist.m_swingAxis); + + // Compute relative velocity + btVector3 relVel = bodyA.m_angularVelocity - bodyB.m_angularVelocity; + float relVelAxis = axis.dot(relVel); + + // Compute impulse + float impulse = (constraint.conetwist.m_swingError - relVelAxis) * constraint.conetwist.m_swingJacInv; + + // Clamp it + float temp = constraint.conetwist.m_swingLimitImpulse; + constraint.conetwist.m_swingLimitImpulse = btMax (constraint.conetwist.m_swingLimitImpulse + impulse, 0.0f); + impulse = constraint.conetwist.m_swingLimitImpulse - temp; + + btVector3 impAxis = axis*impulse; + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * impAxis; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * impAxis; + } + } + + // Twist + if (constraint.m_flags.m_limit2) + { + const btVector3& axis (constraint.conetwist.m_twistAxis); + + // Compute relative velocity + btVector3 relVel = bodyA.m_angularVelocity - bodyB.m_angularVelocity; + float relVelAxis = axis.dot(relVel); + + // Compute impulse + float impulse = (constraint.conetwist.m_twistError - relVelAxis) * constraint.conetwist.m_twistJacInv; + + // Clamp it + float temp = constraint.conetwist.m_twistLimitImpulse; + constraint.conetwist.m_twistLimitImpulse = btMax (constraint.conetwist.m_twistLimitImpulse + impulse, 0.0f); + impulse = constraint.conetwist.m_twistLimitImpulse - temp; + + btVector3 impAxis = axis*impulse; + + // Apply + if (bodyA.m_invertedMass > 0) + { + bodyA.m_angularVelocity += bodyA.m_worldInvInertiaTensor * impAxis; + } + if (bodyB.m_invertedMass > 0) + { + bodyB.m_angularVelocity -= bodyB.m_worldInvInertiaTensor * impAxis; + } + } + } + break; + default: + ; + } +} +//-- SOLVER METHODS END + + + + + + + + +//-- CONSTRAINT SETUP METHODS +// Compute the jacobian inverse @@TODO: Optimize +static float computeJacobianInverse (const btRigidBody* rb0, const btRigidBody* rb1, + const btVector3& anchorAinW, const btVector3& anchorBinW, const btVector3& normal) +{ + float jacobian = rb0->computeImpulseDenominator(anchorAinW, normal); + jacobian += rb1->computeImpulseDenominator(anchorBinW, normal); + + return 1.0f/jacobian; +} + +static float computeAngularJacobianInverse (const btRigidBody* rb0, const btRigidBody* rb1, + const btVector3& normal) +{ + float jacobian = rb0->computeAngularImpulseDenominator(normal); + jacobian += rb1->computeAngularImpulseDenominator(normal); + + return 1.0f/jacobian; +} + +static void setupLinearConstraintWorld (SpuSolverConstraint& constraint, const btRigidBody* rb0, const btRigidBody* rb1, + const btVector3& anchorAinW, const btVector3& anchorBinW, const btContactSolverInfoData& solverInfo) +{ + btVector3 relPos1 = anchorAinW - rb0->getCenterOfMassPosition(); + btVector3 relPos2 = anchorBinW - rb1->getCenterOfMassPosition(); + + constraint.m_relPos1 = relPos1; + constraint.m_relPos2 = relPos2; + + btVector3 error = anchorAinW - anchorBinW; + + // Setup the three axes + btVector3 normal (0,0,0); + btVector3 jacInv, bias; + const float errorFactor = solverInfo.m_tau / (solverInfo.m_timeStep * solverInfo.m_damping); + + for (int i = 0; i < 3; ++i) + { + normal[i] = 1; + + jacInv[i] = solverInfo.m_damping * computeJacobianInverse (rb0, rb1, anchorAinW, anchorBinW, normal); + + // Compute the depth + float depth = -error[i]*errorFactor; + bias[i] = depth; + + normal[i] = 0; + } + + constraint.m_jacdiagABInv = jacInv; + constraint.m_linearBias = bias; + constraint.m_flags.m_useLinear = 1; +} +//-- CONSTRAINT SETUP METHODS END + + + + + + + +static int getConstraintSize (btTypedConstraintType type) +{ + switch (type) + { + case POINT2POINT_CONSTRAINT_TYPE: + return sizeof(btPoint2PointConstraint); + case HINGE_CONSTRAINT_TYPE: + return sizeof(btHingeConstraint); + case CONETWIST_CONSTRAINT_TYPE: + return sizeof(btConeTwistConstraint); + case D6_CONSTRAINT_TYPE: + return sizeof(btGeneric6DofConstraint); + default: + ; + //btAssert(0); + } + + return 0; +} + + + + + + + + + +//-- MAIN METHOD +void processSolverTask(void* userPtr, void* lsMemory) +{ + SolverTask_LocalStoreMemory* localMemory = (SolverTask_LocalStoreMemory*)lsMemory; + + SpuSolverTaskDesc* taskDescPtr = (SpuSolverTaskDesc*)userPtr; + SpuSolverTaskDesc& taskDesc = *taskDescPtr; + + setupTemporaryStorage(localMemory); + + switch (taskDesc.m_solverCommand) + { + case CMD_SOLVER_SETUP_BODIES: + { + int bodiesToProcess = taskDesc.m_commandData.m_bodySetup.m_numBodies; + int bodyPackageOffset = taskDesc.m_commandData.m_bodySetup.m_startBody; + const int bodiesPerPackage = 256; + + btRigidBody** bodyPtrList = (btRigidBody**)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody*)); + btRigidBody* bodyList = (btRigidBody*)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody)); + SpuSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); + + + while (bodiesToProcess > 0) + { + const int packageSize = bodiesToProcess > bodiesPerPackage ? bodiesPerPackage : bodiesToProcess; + + // DMA the body pointers + { + int dmaSize = sizeof(btRigidBody*)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_commandData.m_bodySetup.m_rbList + bodyPackageOffset); + cellDmaLargeGet(bodyPtrList, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // DMA the rigid bodies + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* body = bodyPtrList[b]; + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (body); + cellDmaLargeGet(&bodyList[b], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* localBody = bodyList+b; + SpuSolverBody* spuBody = spuBodyList + b; + //Set it up solver body + setupSpuBody(localBody, spuBody); + + int spuBodyIndex = bodyPackageOffset + b; + localBody->setCompanionId(spuBodyIndex); + } + + // DMA the rigid bodies back + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* body = bodyPtrList[b]; + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (body); + cellDmaLargePut(&bodyList[b], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + + // DMA the list of SPU bodies + { + int dmaSize = sizeof(SpuSolverBody)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + bodyPackageOffset); + cellDmaLargePut(spuBodyList, dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); + } + + + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + bodiesToProcess -= packageSize; + bodyPackageOffset += packageSize; + } + + } + break; + case CMD_SOLVER_MANIFOLD_SETUP: + { + // DMA the hash + { + int dmaSize = sizeof(SpuSolverHash); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + cellDmaLargeGet(&localMemory->m_localHash, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // Iterate over our cells + const int manifoldsPerPackage = 8; + const int constraintsPerPackage = 8; + + ManifoldCellHolder* manifoldHolderList = (ManifoldCellHolder*)allocTemporaryStorage(localMemory, sizeof(ManifoldCellHolder)*manifoldsPerPackage); + btPersistentManifold* manifoldList = (btPersistentManifold*)allocTemporaryStorage(localMemory, sizeof(btPersistentManifold)*manifoldsPerPackage); + + ConstraintCellHolder* constraintHolderList = (ConstraintCellHolder*)allocTemporaryStorage(localMemory, sizeof(ConstraintCellHolder)*constraintsPerPackage); + uint8_t* constraintList = (uint8_t*)allocTemporaryStorage(localMemory, CONSTRAINT_MAX_SIZE*constraintsPerPackage); + + uint32_t* indexArray = (uint32_t*)allocTemporaryStorage(localMemory, sizeof(uint32_t)*SPU_MAX_BODIES_PER_CELL); + + for (unsigned int c = 0; c < taskDesc.m_commandData.m_manifoldSetup.m_numCells; ++c) + { + int cellIdx = taskDesc.m_commandData.m_manifoldSetup.m_startCell + c; + SpuSolverHashCell& hashCell = localMemory->m_localHash.m_Hash[cellIdx]; + + SpuIndexSet localRBs (indexArray); + + { + int constraintIndex = hashCell.m_internalConstraintListOffset; + int manifoldsToProcess = hashCell.m_numManifolds; + int manifoldPackageOffset = hashCell.m_manifoldListOffset; + + while (manifoldsToProcess > 0) + { + const int packageSize = manifoldsToProcess > manifoldsPerPackage ? manifoldsPerPackage : manifoldsToProcess; + + // DMA the holder list + { + int dmaSize = sizeof(ManifoldCellHolder)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_commandData.m_manifoldSetup.m_manifoldHolders + manifoldPackageOffset); + cellDmaLargeGet(manifoldHolderList, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // DMA the manifold list + for (int m = 0; m < packageSize; ++m) + { + int dmaSize = sizeof(btPersistentManifold); + uint64_t dmaPpuAddress2 = reinterpret_cast (manifoldHolderList[m].m_manifold); + cellDmaLargeGet(manifoldList + m, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + for (int m = 0; m < packageSize; ++m) + { + btPersistentManifold* currManifold = manifoldList + m; + + btRigidBody* rb0Ptr = (btRigidBody*)currManifold->getBody0(); + btRigidBody* rb1Ptr = (btRigidBody*)currManifold->getBody1(); + + int numContacts = currManifold->getNumContacts(); + + if (!numContacts) + { + // No need to DMA anything more or so, so quit + continue; + } + + unsigned int solverBodyIdA = ~0, solverBodyIdB = ~0; + + // DMA the bodies + { + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (rb0Ptr); + cellDmaLargeGet(&localMemory->m_tempRBs[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + { + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (rb1Ptr); + cellDmaLargeGet(&localMemory->m_tempRBs[1], dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + + btRigidBody* rb0 = (btRigidBody*)&localMemory->m_tempRBs[0]; + btRigidBody* rb1 = (btRigidBody*)&localMemory->m_tempRBs[1]; + + if (rb0->getIslandTag() >= 0) + { + solverBodyIdA = rb0->getCompanionId(); + } + else + { + //create a static body + solverBodyIdA = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + hashCell.m_manifoldListOffset; + setupSpuBody(rb0, &localMemory->m_tempSPUBodies[0]); + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); + cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + + if (rb1->getIslandTag() >= 0) + { + solverBodyIdB = rb1->getCompanionId(); + } + else + { + //create a static body + solverBodyIdB = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + hashCell.m_manifoldListOffset; + setupSpuBody(rb1, &localMemory->m_tempSPUBodies[0]); + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); + cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + + // Setup the pointer table + int offsA = localRBs.insert(solverBodyIdA); + int offsB = localRBs.insert(solverBodyIdB); + + // Setup all the contacts + for (int c = 0; c < numContacts; ++c) + { + btManifoldPoint& cp = currManifold->getContactPoint(c); + + btVector3 pos1 = cp.getPositionWorldOnA(); + btVector3 pos2 = cp.getPositionWorldOnB(); + + btVector3 rel_pos1 = pos1 - rb0->getCenterOfMassPosition(); + btVector3 rel_pos2 = pos2 - rb1->getCenterOfMassPosition(); + + // De-penetration + { + SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[0]; + + constraint.m_localOffsetBodyA = offsA; + constraint.m_localOffsetBodyB = offsB; + + constraint.m_normal = cp.m_normalWorldOnB; + + { + //can be optimized, the cross products are already calculated + constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, cp.m_normalWorldOnB); + } + + constraint.m_relpos1CrossNormal = rel_pos1.cross(cp.m_normalWorldOnB); + constraint.m_relpos2CrossNormal = rel_pos2.cross(cp.m_normalWorldOnB); + + btVector3 vel1 = rb0->getVelocityInLocalPoint(rel_pos1); + btVector3 vel2 = rb1->getVelocityInLocalPoint(rel_pos2); + + btVector3 vel = vel1 - vel2; + btScalar rel_vel; + rel_vel = cp.m_normalWorldOnB.dot(vel); + + + constraint.m_penetration = cp.getDistance();///btScalar(infoGlobal.m_numIterations); + constraint.m_friction = cp.m_combinedFriction; + float rest = - rel_vel * cp.m_combinedRestitution; + if (rest <= btScalar(0.)) + { + rest = 0.f; + }; + + btScalar penVel = -constraint.m_penetration/taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_timeStep; + if (rest > penVel) + { + rest = btScalar(0.); + } + constraint.m_restitution = rest; + + constraint.m_penetration *= + -(taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_erp/taskDesc.m_commandData.m_manifoldSetup.m_solverInfo.m_timeStep); + + constraint.m_appliedImpulse = 0.f; + constraint.m_appliedVelocityImpulse = 0.f; + + + btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB); + constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*torqueAxis0; + btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB); + constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*torqueAxis1; + } + + // Friction + + btVector3 frictionTangential0a, frictionTangential1b; + + btPlaneSpace1(cp.m_normalWorldOnB,frictionTangential0a,frictionTangential1b); + + + { + SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[1]; + + constraint.m_normal = frictionTangential0a; + + constraint.m_localOffsetBodyA = offsA; + constraint.m_localOffsetBodyB = offsB; + + constraint.m_friction = cp.m_combinedFriction; + + constraint.m_appliedImpulse = btScalar(0.); + constraint.m_appliedVelocityImpulse = 0.f; + + constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, cp.m_normalWorldOnB); + + { + btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_normal); + constraint.m_relpos1CrossNormal = ftorqueAxis0; + constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*ftorqueAxis0; + } + { + btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_normal); + constraint.m_relpos2CrossNormal = ftorqueAxis0; + constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*ftorqueAxis0; + } + } + + { + SpuSolverInternalConstraint& constraint = localMemory->m_tempInternalConstr[2]; + + constraint.m_normal = frictionTangential1b; + + constraint.m_localOffsetBodyA = offsA; + constraint.m_localOffsetBodyB = offsB; + + constraint.m_friction = cp.m_combinedFriction; + + constraint.m_appliedImpulse = btScalar(0.); + constraint.m_appliedVelocityImpulse = 0.f; + + constraint.m_jacDiagABInv = computeJacobianInverse (rb0, rb1, pos1, pos2, cp.m_normalWorldOnB); + + { + btVector3 ftorqueAxis0 = rel_pos1.cross(constraint.m_normal); + constraint.m_relpos1CrossNormal = ftorqueAxis0; + constraint.m_angularComponentA = rb0->getInvInertiaTensorWorld()*ftorqueAxis0; + } + { + btVector3 ftorqueAxis0 = rel_pos2.cross(constraint.m_normal); + constraint.m_relpos2CrossNormal = ftorqueAxis0; + constraint.m_angularComponentB = rb1->getInvInertiaTensorWorld()*ftorqueAxis0; + } + } + + // DMA the three constraints + { + int dmaSize = sizeof(SpuSolverInternalConstraint)*3; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintIndex); + cellDmaLargePut(&localMemory->m_tempInternalConstr, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + constraintIndex += 3; + + } + + } + + manifoldsToProcess -= packageSize; + manifoldPackageOffset += packageSize; + } + } + int numOutConstraints = 0; + // Setup constraints + { + const btContactSolverInfoData& solverInfo = taskDesc.m_commandData.m_manifoldSetup.m_solverInfo; + + int constraintIndex = hashCell.m_constraintListOffset; + + int constraintsToProcess = hashCell.m_numConstraints; + int constraintPackageOffset = hashCell.m_constraintListOffset; + + while (constraintsToProcess) + { + const int packageSize = constraintsToProcess > constraintsPerPackage ? constraintsPerPackage : constraintsToProcess; + + // DMA the holder list + { + int dmaSize = sizeof(ConstraintCellHolder)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_commandData.m_manifoldSetup.m_constraintHolders + constraintPackageOffset); + cellDmaLargeGet(constraintHolderList, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // DMA the constraint list + for (int c = 0; c < packageSize; ++c) + { + //int dmaSize = CONSTRAINT_MAX_SIZE; + int dmaSize = getConstraintSize((btTypedConstraintType)constraintHolderList[c].m_constraintType); + uint64_t dmaPpuAddress2 = reinterpret_cast (constraintHolderList[c].m_constraint); + cellDmaLargeGet(constraintList + CONSTRAINT_MAX_SIZE*c, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + for (int c = 0; c < packageSize; ++c) + { + btTypedConstraint* currConstraint = (btTypedConstraint*)(constraintList + CONSTRAINT_MAX_SIZE*c); + btTypedConstraintType type = currConstraint->getConstraintType(); + + btRigidBody* rb0Ptr = (btRigidBody*)&currConstraint->getRigidBodyA(); + btRigidBody* rb1Ptr = (btRigidBody*)&currConstraint->getRigidBodyB(); + + // DMA the bodies + { + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (rb0Ptr); + cellDmaLargeGet(&localMemory->m_tempRBs[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + { + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (rb1Ptr); + cellDmaLargeGet(&localMemory->m_tempRBs[1], dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + + btRigidBody* rb0 = (btRigidBody*)&localMemory->m_tempRBs[0]; + btRigidBody* rb1 = (btRigidBody*)&localMemory->m_tempRBs[1]; + + unsigned int solverBodyIdA = ~0, solverBodyIdB = ~0; + if (rb0->getIslandTag() >= 0) + { + solverBodyIdA = rb0->getCompanionId(); + } + else + { + //create a static body + solverBodyIdA = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + taskDesc.m_commandData.m_manifoldSetup.m_numBodies + + hashCell.m_constraintListOffset; + setupSpuBody(rb0, &localMemory->m_tempSPUBodies[0]); + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdA); + cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + + if (rb1->getIslandTag() >= 0) + { + solverBodyIdB = rb1->getCompanionId(); + } + else + { + //create a static body + solverBodyIdB = taskDesc.m_commandData.m_manifoldSetup.m_numBodies + taskDesc.m_commandData.m_manifoldSetup.m_numManifolds + + hashCell.m_constraintListOffset; + setupSpuBody(rb1, &localMemory->m_tempSPUBodies[0]); + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + solverBodyIdB); + cellDmaLargePut(&localMemory->m_tempSPUBodies[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + + // Setup the pointer table + int offsA = localRBs.insert(solverBodyIdA); + int offsB = localRBs.insert(solverBodyIdB); + + bool haveConstraint = false; + + // Setup the constraint + switch (type) + { + case POINT2POINT_CONSTRAINT_TYPE: + { + SpuSolverConstraint& spuConstraint = localMemory->m_tempConstraint[0]; + btPoint2PointConstraint* p2pC = (btPoint2PointConstraint*)currConstraint; + + spuConstraint.m_localOffsetBodyA = offsA; + spuConstraint.m_localOffsetBodyB = offsB; + spuConstraint.m_constraintType = type; + + // Compute the anchor positions + btVector3 pivotAinW = rb0->getCenterOfMassTransform()*p2pC->m_pivotInA; + btVector3 pivotBinW = rb1->getCenterOfMassTransform()*p2pC->m_pivotInB; + + setupLinearConstraintWorld(spuConstraint, rb0, rb1, pivotAinW, pivotBinW, solverInfo); + + haveConstraint = true; //We have one constraint + } + break; + case HINGE_CONSTRAINT_TYPE: + { + SpuSolverConstraint& spuConstraint = localMemory->m_tempConstraint[0]; + btHingeConstraint* hC = (btHingeConstraint*)currConstraint; + + spuConstraint.m_localOffsetBodyA = offsA; + spuConstraint.m_localOffsetBodyB = offsB; + spuConstraint.m_constraintType = type; + + // Compute the transforms + btTransform frameAinW = rb0->getCenterOfMassTransform()*hC->m_rbAFrame; + btTransform frameBinW = rb1->getCenterOfMassTransform()*hC->m_rbBFrame; + + // Setup the linear part + setupLinearConstraintWorld(spuConstraint, rb0, rb1, frameAinW.getOrigin(), frameBinW.getOrigin(), solverInfo); + + // Setup angular part + btVector3 jacInv; + + // Setup the jacobian inverses + for (int i = 0; i < 3; ++i) + { + const btVector3 axisA = frameAinW.getBasis().getColumn(i); + const btVector3 axisB = frameBinW.getBasis().getColumn(i); + + spuConstraint.hinge.m_frameAinW[i] = axisA; + spuConstraint.hinge.m_frameBinW[i] = axisB; + + jacInv[i] = computeAngularJacobianInverse(rb0, rb1, axisA); + } + + // Compute position error along the two secondary axes & limit + { + btVector3 angularBias (0,0,0); + + const btVector3 axisA = frameAinW.getBasis().getColumn(2); + const btVector3 axisB = frameBinW.getBasis().getColumn(2); + + btVector3 error = -axisA.cross(axisB) / solverInfo.m_timeStep; + + angularBias[0] = error.dot(frameAinW.getBasis().getColumn(0)); + angularBias[1] = error.dot(frameAinW.getBasis().getColumn(1)); + + spuConstraint.m_flags.m_limit1 = 0; + + if (hC->m_lowerLimit < hC->m_upperLimit) + { + // Compute hinge axis + const btVector3& refAxis0 = frameAinW.getBasis().getColumn(0); + const btVector3& refAxis1 = frameAinW.getBasis().getColumn(1); + const btVector3& swingAxis = frameBinW.getBasis().getColumn(1); + + float hingeAngle = btAtan2Fast(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1)); + float correction, sign; + + spuConstraint.hinge.m_limitAccumulatedImpulse = 0; + + if (hingeAngle <= hC->m_lowerLimit*hC->m_limitSoftness) + { + correction = (hC->m_lowerLimit - hingeAngle); + sign = 1.0f; + spuConstraint.m_flags.m_limit1 = 1; + } + else if (hingeAngle >= hC->m_upperLimit*hC->m_limitSoftness) + { + correction = (hC->m_upperLimit - hingeAngle); + sign = -1.0f; + spuConstraint.m_flags.m_limit1 = 1; + } + + angularBias[2] = correction * hC->m_biasFactor / (solverInfo.m_timeStep * hC->m_relaxationFactor); + spuConstraint.hinge.m_limitJacFactor = hC->m_relaxationFactor * sign; + } + + spuConstraint.hinge.m_angularBias = angularBias; + } + + // Setup motor + spuConstraint.m_flags.m_motor1 = 0; + if (hC->m_enableAngularMotor) + { + spuConstraint.m_flags.m_motor1 = 1; + spuConstraint.hinge.m_motorVelocity = hC->m_motorTargetVelocity; + spuConstraint.hinge.m_motorImpulse = hC->m_maxMotorImpulse; + } + + spuConstraint.hinge.m_angJacdiagABInv = jacInv; + + haveConstraint = true; + } + break; + case CONETWIST_CONSTRAINT_TYPE: + { + SpuSolverConstraint& spuConstraint = localMemory->m_tempConstraint[0]; + btConeTwistConstraint* ctC = (btConeTwistConstraint*)currConstraint; + + spuConstraint.m_localOffsetBodyA = offsA; + spuConstraint.m_localOffsetBodyB = offsB; + spuConstraint.m_constraintType = type; + + // Compute the transforms + btTransform frameAinW = rb0->getCenterOfMassTransform()*ctC->m_rbAFrame; + btTransform frameBinW = rb1->getCenterOfMassTransform()*ctC->m_rbBFrame; + + // Setup the linear part + setupLinearConstraintWorld(spuConstraint, rb0, rb1, frameAinW.getOrigin(), frameBinW.getOrigin(), solverInfo); + + // Setup the swing limits + const btVector3& b1Axis1 = frameAinW.getBasis().getColumn(0); + const btVector3& b2Axis1 = frameBinW.getBasis().getColumn(0); + const btVector3& b1Axis2 = frameAinW.getBasis().getColumn(1); + const btVector3& b1Axis3 = frameAinW.getBasis().getColumn(2); + + float swing1 = 0.0f, swing2 = 0.0f; + + if (ctC->m_swingSpan1 >= 0.05f) + { + swing1 = btAtan2Fast(b2Axis1.dot(b1Axis2),b2Axis1.dot(b1Axis1)); + } + if (ctC->m_swingSpan2 >= 0.05f) + { + swing2 = btAtan2Fast(b2Axis1.dot(b1Axis3),b2Axis1.dot(b1Axis1)); + } + + float rMaxAngle1Sq = 1.0f / (ctC->m_swingSpan1*ctC->m_swingSpan1); + float rMaxAngle2Sq = 1.0f / (ctC->m_swingSpan2*ctC->m_swingSpan2); + float ellipseAngle = btFabs(swing1)* rMaxAngle1Sq + btFabs(swing2) * rMaxAngle2Sq; + + spuConstraint.m_flags.m_limit1 = 0; + spuConstraint.m_flags.m_limit2 = 0; + + spuConstraint.conetwist.m_swingLimitImpulse = 0; + spuConstraint.conetwist.m_twistLimitImpulse = 0; + + float relFactorSq = ctC->m_relaxationFactor*ctC->m_relaxationFactor; + + if (ellipseAngle > 1.0f) + { + spuConstraint.conetwist.m_swingError = ellipseAngle - 1.0f; + spuConstraint.conetwist.m_swingError *= ctC->m_biasFactor; + spuConstraint.conetwist.m_swingError /= solverInfo.m_timeStep * relFactorSq; + + spuConstraint.m_flags.m_limit1 = 1; + + btVector3 axis = b2Axis1.cross(b1Axis2* b2Axis1.dot(b1Axis2) + b1Axis3* b2Axis1.dot(b1Axis3)); + axis.normalize(); + + float swingAxisSign = (b2Axis1.dot(b1Axis1) >= 0.0f) ? 1.0f : -1.0f; + axis *= swingAxisSign; + + spuConstraint.conetwist.m_swingAxis = axis; + + float jacobian = computeAngularJacobianInverse(rb0, rb1, axis); + spuConstraint.conetwist.m_swingJacInv = relFactorSq * jacobian; + } + + // Setup twist limits + if (ctC->m_twistSpan >= 0.0f) + { + const btVector3& b2Axis2 = frameBinW.getBasis().getColumn(1); + + btQuaternion rotationArc = shortestArcQuat(b2Axis1,b1Axis1); + btVector3 TwistRef = quatRotate(rotationArc,b2Axis2); + float twist = btAtan2Fast(TwistRef.dot(b1Axis3), TwistRef.dot(b1Axis2)); + + float lockedFreeFactor = (ctC->m_twistSpan > btScalar(0.05f)) ? ctC->m_limitSoftness : btScalar(0.); + if (twist <= -ctC->m_twistSpan*lockedFreeFactor) + { + spuConstraint.conetwist.m_twistError = -(twist + ctC->m_twistSpan); + spuConstraint.conetwist.m_twistError *= ctC->m_biasFactor; + spuConstraint.conetwist.m_twistError /= solverInfo.m_timeStep * relFactorSq; + + spuConstraint.m_flags.m_limit2 = 1; + + btVector3 axis = -(b1Axis1 + b2Axis1); + axis.normalize(); + spuConstraint.conetwist.m_twistAxis = axis; + + float jacobian = computeAngularJacobianInverse(rb0, rb1, axis); + spuConstraint.conetwist.m_twistJacInv = relFactorSq * jacobian; + } + else if (twist >= ctC->m_twistSpan*lockedFreeFactor) + { + spuConstraint.conetwist.m_twistError = twist - ctC->m_twistSpan; + spuConstraint.conetwist.m_twistError *= ctC->m_biasFactor; + spuConstraint.conetwist.m_twistError /= solverInfo.m_timeStep * relFactorSq; + + spuConstraint.m_flags.m_limit2 = 1; + + btVector3 axis = b1Axis1 + b2Axis1; + axis.normalize(); + spuConstraint.conetwist.m_twistAxis = axis; + + float jacobian = computeAngularJacobianInverse(rb0, rb1, axis); + spuConstraint.conetwist.m_twistJacInv = relFactorSq * jacobian; + } + + } + + haveConstraint = true; //We have one constraint + } + break; + default: + ; + } + + + if (haveConstraint) + { + //DMA it + int dmaSize = sizeof(SpuSolverConstraint); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverConstraintList + + hashCell.m_constraintListOffset + numOutConstraints); + cellDmaLargePut(&localMemory->m_tempConstraint[0], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + numOutConstraints++; + } + + } + + constraintsToProcess -= packageSize; + constraintPackageOffset += packageSize; + } + } + + // Write back some data, if needed + if (localRBs.size() > 0) + { + { + // DMA the local body list + int dmaSize = sizeof(uint32_t)*localRBs.size(); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyOffsetList + hashCell.m_solverBodyOffsetListOffset); + cellDmaLargePut(indexArray, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + hashCell.m_numLocalBodies = localRBs.size(); + hashCell.m_numConstraints = numOutConstraints; + { + // DMA the hash cell + int dmaSize = sizeof(SpuSolverHashCell); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + dmaPpuAddress2 += offsetof(SpuSolverHash,m_Hash); + dmaPpuAddress2 += sizeof(SpuSolverHashCell) * cellIdx; + + cellDmaLargePut(&hashCell, dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + } + } + } + break; + case CMD_SOLVER_SOLVE_ITERATE: + { + // DMA the hash + { + int dmaSize = sizeof(SpuSolverHash); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverHash); + cellDmaLargeGet(&localMemory->m_localHash, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + btSpinlock hashLock (taskDesc.m_commandData.m_iterate.m_spinLockVar); + + unsigned int cellToProcess; + while (1) + { + cellToProcess = getNextFreeCell(localMemory, taskDesc, hashLock); + + if (cellToProcess >= SPU_HASH_NUMCELLS) + break; + + // Now process that one cell + SpuSolverHashCell& hashCell = localMemory->m_localHash.m_Hash[cellToProcess]; + + if (hashCell.m_numContacts == 0 && hashCell.m_numConstraints == 0) + continue; + + // DMA the local bodies and constraints + + // Get the body list + uint32_t* indexList = (uint32_t*)allocTemporaryStorage(localMemory, sizeof(uint32_t)*hashCell.m_numLocalBodies); + SpuSolverBody* bodyList = allocBodyStorage(localMemory, hashCell.m_numLocalBodies); + + { + int dmaSize = sizeof(uint32_t)*hashCell.m_numLocalBodies; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyOffsetList + hashCell.m_solverBodyOffsetListOffset); + cellDmaLargeGet(indexList, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // DMA the bodies + for (int b = 0; b < hashCell.m_numLocalBodies; ++b) + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + indexList[b]); + cellDmaLargeGet(bodyList+b, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + // Process the constraints in packets + if (hashCell.m_numConstraints) + { + const size_t maxConstraintsPerPacket = memTemporaryStorage(localMemory) / sizeof(SpuSolverConstraint); + size_t constraintsToProcess = hashCell.m_numConstraints; + size_t constraintListOffset = hashCell.m_constraintListOffset; + + SpuSolverConstraint* constraints = allocConstraintStorage(localMemory, maxConstraintsPerPacket); + + while (constraintsToProcess > 0) + { + size_t packetSize = constraintsToProcess > maxConstraintsPerPacket ? maxConstraintsPerPacket : constraintsToProcess; + + // DMA the constraints + { + int dmaSize = sizeof(SpuSolverConstraint)*packetSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverConstraintList + constraintListOffset); + cellDmaLargeGet(constraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + // Solve + for (int j = 0; j < packetSize; ++j) + { + SpuSolverConstraint& constraint = constraints[j]; + SpuSolverBody& bodyA = bodyList[constraint.m_localOffsetBodyA]; + SpuSolverBody& bodyB = bodyList[constraint.m_localOffsetBodyB]; + + solveConstraint(constraint, bodyA, bodyB); + } + + // Write back the constraints for accumulated stuff + { + int dmaSize = sizeof(SpuSolverConstraint)*packetSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverConstraintList + constraintListOffset); + cellDmaLargePut(constraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + constraintListOffset += packetSize; + constraintsToProcess -= packetSize; + } + + freeConstraintStorage (localMemory, constraints, maxConstraintsPerPacket); + } + + // Now process the contacts + if (hashCell.m_numContacts) + { + const size_t maxContactsPerPacket = memTemporaryStorage(localMemory) / (sizeof(SpuSolverInternalConstraint)*3); + size_t contactsToProcess = hashCell.m_numContacts; + size_t constraintListOffset = hashCell.m_internalConstraintListOffset; + + SpuSolverInternalConstraint* internalConstraints = allocInternalConstraintStorage(localMemory, maxContactsPerPacket*3); + + while (contactsToProcess > 0) + { + size_t packetSize = contactsToProcess > maxContactsPerPacket ? maxContactsPerPacket : contactsToProcess; + + // DMA the constraints + { + int dmaSize = sizeof(SpuSolverInternalConstraint)*packetSize*3; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintListOffset); + cellDmaLargeGet(internalConstraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + // Solve + for (int j = 0; j < packetSize*3; j += 3) + { + SpuSolverInternalConstraint& contact = internalConstraints[j]; + SpuSolverBody& bodyA = bodyList[contact.m_localOffsetBodyA]; + SpuSolverBody& bodyB = bodyList[contact.m_localOffsetBodyB]; + + solveContact(contact, bodyA, bodyB); + } + + for (int j = 0; j < packetSize*3; j += 3) + { + SpuSolverInternalConstraint& contact = internalConstraints[j]; + SpuSolverBody& bodyA = bodyList[contact.m_localOffsetBodyA]; + SpuSolverBody& bodyB = bodyList[contact.m_localOffsetBodyB]; + + SpuSolverInternalConstraint& frictionConstraint1 = internalConstraints[j + 1]; + solveFriction(frictionConstraint1, bodyA, bodyB, contact.m_appliedImpulse); + + SpuSolverInternalConstraint& frictionConstraint2 = internalConstraints[j + 2]; + solveFriction(frictionConstraint2, bodyA, bodyB, contact.m_appliedImpulse); + } + + + // Write back the constraints for accumulated stuff + { + int dmaSize = sizeof(SpuSolverInternalConstraint)*packetSize*3; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverInternalConstraintList + constraintListOffset); + cellDmaLargePut(internalConstraints, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + constraintListOffset += packetSize*3; + contactsToProcess -= packetSize; + } + + freeInternalConstraintStorage (localMemory, internalConstraints, maxContactsPerPacket*3); + } + + + // DMA the bodies back to main memory + for (int b = 0; b < hashCell.m_numLocalBodies; ++b) + { + int dmaSize = sizeof(SpuSolverBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + indexList[b]); + cellDmaLargePut(bodyList + b, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + freeBodyStorage(localMemory, bodyList, hashCell.m_numLocalBodies); + freeTemporaryStorage(localMemory, indexList, sizeof(uint32_t)*hashCell.m_numLocalBodies); + + }; + } + break; + case CMD_SOLVER_COPYBACK_BODIES: + { + int bodiesToProcess = taskDesc.m_commandData.m_bodyCopyback.m_numBodies; + int bodyPackageOffset = taskDesc.m_commandData.m_bodyCopyback.m_startBody; + const int bodiesPerPackage = 256; + + btRigidBody** bodyPtrList = (btRigidBody**)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody*)); + btRigidBody* bodyList = (btRigidBody*)allocTemporaryStorage(localMemory, bodiesPerPackage*sizeof(btRigidBody)); + SpuSolverBody* spuBodyList = allocBodyStorage(localMemory, bodiesPerPackage); + + while (bodiesToProcess > 0) + { + const int packageSize = bodiesToProcess > bodiesPerPackage ? bodiesPerPackage : bodiesToProcess; + + // DMA the body pointers + { + int dmaSize = sizeof(btRigidBody*)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_commandData.m_bodySetup.m_rbList + bodyPackageOffset); + cellDmaLargeGet(bodyPtrList, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + // DMA the rigid bodies + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* body = bodyPtrList[b]; + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (body); + cellDmaLargeGet(&bodyList[b], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + + // DMA the list of SPU bodies + { + int dmaSize = sizeof(SpuSolverBody)*packageSize; + uint64_t dmaPpuAddress2 = reinterpret_cast (taskDesc.m_solverData.m_solverBodyList + bodyPackageOffset); + cellDmaLargeGet(spuBodyList, dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); + } + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* localBody = bodyList + b; + SpuSolverBody* solverBody = spuBodyList + b; + + if (solverBody->m_invertedMass > 0) + { + localBody->setLinearVelocity(solverBody->m_linearVelocity); + localBody->setAngularVelocity(solverBody->m_angularVelocity); + } + localBody->setCompanionId(-1); + } + + // DMA the rigid bodies + for (int b = 0; b < packageSize; ++b) + { + btRigidBody* body = bodyPtrList[b]; + int dmaSize = sizeof(btRigidBody); + uint64_t dmaPpuAddress2 = reinterpret_cast (body); + cellDmaLargePut(&bodyList[b], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + } + + + bodiesToProcess -= packageSize; + bodyPackageOffset += packageSize; + } + + } + break; + default: + //.. nothing + ; +// btAssert(0); + } +} diff --git a/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h new file mode 100644 index 000000000..84a1480c7 --- /dev/null +++ b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.h @@ -0,0 +1,265 @@ +/* +Bullet Continuous Collision Detection and Physics Library - Parallel solver +Copyright (c) 2007 Starbreeze Studios + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +Written by: Marten Svanfeldt +*/ + +#ifndef SPU_PARALLELSOLVERTASK_H +#define SPU_PARALLELSOLVERTASK_H + +#include "../PlatformDefinitions.h" +#include "LinearMath/btScalar.h" +#include "LinearMath/btVector3.h" +#include "LinearMath/btMatrix3x3.h" +#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h" +#include "../SpuSync.h" +#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h" + + +ATTRIBUTE_ALIGNED16(struct) ManifoldCellHolder +{ + uint32_t m_hashCellIndex; + class btPersistentManifold* m_manifold; +}; + +ATTRIBUTE_ALIGNED16(struct) ConstraintCellHolder +{ + uint32_t m_hashCellIndex; + uint32_t m_constraintType; + class btTypedConstraint* m_constraint; +}; + +enum +{ + SPU_HASH_NUMCELLS = 128, + SPU_HASH_WORDWIDTH = sizeof(uint32_t)*8, + SPU_HASH_NUMCELLDWORDS = ((SPU_HASH_NUMCELLS + SPU_HASH_WORDWIDTH - 1) / SPU_HASH_WORDWIDTH), + SPU_HASH_NUMUNUSEDBITS = (SPU_HASH_NUMCELLDWORDS * SPU_HASH_WORDWIDTH) - SPU_HASH_NUMCELLS, + SPU_HASH_PHYSSIZE = 4, //TODO: MAKE CONFIGURABLE + + SPU_MAX_BODIES_PER_CELL = 1024, + + SPU_MAX_SPUS = 6 +}; + +enum +{ + CMD_SOLVER_SETUP_BODIES = 1, + CMD_SOLVER_MANIFOLD_SETUP = 2, + CMD_SOLVER_CONSTRAINT_SETUP = 3, + CMD_SOLVER_SOLVE_ITERATE = 4, + CMD_SOLVER_COPYBACK_BODIES = 5 +}; + +struct SpuSolverHashCell +{ + uint16_t m_numLocalBodies; + uint16_t m_solverBodyOffsetListOffset; + + uint16_t m_numManifolds; + uint16_t m_manifoldListOffset; + + uint16_t m_numContacts; + uint16_t m_internalConstraintListOffset; + + uint16_t m_numConstraints; + uint16_t m_constraintListOffset; +}; + +// Shared data structures +struct SpuSolverHash +{ + // Dependency matrix + ATTRIBUTE_ALIGNED16(uint32_t m_dependencyMatrix[SPU_HASH_NUMCELLS][SPU_HASH_NUMCELLDWORDS]); + ATTRIBUTE_ALIGNED16(uint32_t m_currentMask[SPU_MAX_SPUS+1][SPU_HASH_NUMCELLDWORDS]); + + // The hash itself + ATTRIBUTE_ALIGNED16(SpuSolverHashCell m_Hash[SPU_HASH_NUMCELLS]); + + // Hash meta-data +}; + +inline unsigned int spuHash(unsigned int k) { return k*2654435769u; }; +inline unsigned int spuGetHashCellIndex(int x, int y, int z) +{ + //int n = 0x8da6b343 * x + 0xd8163841 * y + 0xcb1ab31f * z; + + int n = x ^ spuHash(y ^ spuHash (z)); + + return ((unsigned int)n) & (SPU_HASH_NUMCELLS-1); +} + + +ATTRIBUTE_ALIGNED16(struct) SpuSolverBody +{ + btVector3 m_linearVelocity; + btVector3 m_angularVelocity; + + btMatrix3x3 m_worldInvInertiaTensor; + + float m_invertedMass; +}; + +ATTRIBUTE_ALIGNED16(struct) SpuSolverInternalConstraint +{ + uint32_t m_localOffsetBodyA; + uint32_t m_localOffsetBodyB; + + float m_appliedImpulse; + float m_appliedVelocityImpulse; + + float m_friction; + float m_restitution; + float m_jacDiagABInv; + float m_penetration; + + btVector3 m_normal; + + btVector3 m_relpos1CrossNormal; + btVector3 m_relpos2CrossNormal; + btVector3 m_angularComponentA; + btVector3 m_angularComponentB; +}; + + +ATTRIBUTE_ALIGNED16(struct) SpuSolverConstraint +{ + uint16_t m_localOffsetBodyA; + uint16_t m_localOffsetBodyB; + + uint16_t m_constraintType; + struct + { + uint16_t m_useLinear : 1; + + uint16_t m_limit1 : 1; + uint16_t m_limit2 : 1; + uint16_t m_limit3 : 1; + uint16_t m_limit4 : 1; + uint16_t m_limit5 : 1; + uint16_t m_limit6 : 1; + + uint16_t m_motor1 : 1; + uint16_t m_motor2 : 1; + uint16_t m_motor3 : 1; + uint16_t m_motor4 : 1; + uint16_t m_motor5 : 1; + uint16_t m_motor6 : 1; + } m_flags; + + // Linear parts, used by all constraints + btQuadWordStorage m_relPos1; + btQuadWordStorage m_relPos2; + btQuadWordStorage m_jacdiagABInv; //Jacobian inverse multiplied by gamma (damping) for each axis + btQuadWordStorage m_linearBias; //depth*tau/(dt*gamma) along each axis + + // Joint-specific parts + union + { + struct + { + btQuadWordStorage m_frameAinW[3]; + btQuadWordStorage m_frameBinW[3]; + + // For angular + btQuadWordStorage m_angJacdiagABInv; //1/j + btQuadWordStorage m_angularBias; //error/dt, in x/y. limit error*bias factor / (dt * relaxation factor) in z + + // For limit + float m_limitAccumulatedImpulse; + float m_limitJacFactor; //limitSign*relaxation factor + + // For motor + float m_motorVelocity; + float m_motorImpulse; + } hinge; + + struct + { + btQuadWordStorage m_swingAxis; + btQuadWordStorage m_twistAxis; + + float m_swingError; + float m_swingJacInv; + float m_swingLimitImpulse; + + float m_twistError; + float m_twistJacInv; + float m_twistLimitImpulse; + } conetwist; + }; +}; + + +ATTRIBUTE_ALIGNED16(struct) SpuSolverDataDesc +{ + SpuSolverHash* m_solverHash; + SpuSolverBody* m_solverBodyList; + SpuSolverInternalConstraint* m_solverInternalConstraintList; + SpuSolverConstraint* m_solverConstraintList; + uint32_t* m_solverBodyOffsetList; +}; + + +ATTRIBUTE_ALIGNED16(struct) SpuSolverTaskDesc +{ + uint32_t m_solverCommand; + uint32_t m_taskId; + SpuSolverDataDesc m_solverData; + + // command specific data + union + { + // Body setup + struct + { + uint32_t m_startBody; + uint32_t m_numBodies; + + class btRigidBody** m_rbList; + } m_bodySetup, m_bodyCopyback; + + struct + { + uint32_t m_startCell; + uint32_t m_numCells; + + uint32_t m_numBodies; + uint32_t m_numManifolds; + + ManifoldCellHolder* m_manifoldHolders; + ConstraintCellHolder* m_constraintHolders; + btContactSolverInfoData m_solverInfo; + } m_manifoldSetup; + + struct + { + btSpinlock::SpinVariable* m_spinLockVar; + } m_iterate; + } m_commandData; +}; + +void processSolverTask(void* userPtr, void* lsMemory); +void* createSolverLocalStoreMemory(); + +// Helper +inline bool constraintTypeSupported(btTypedConstraintType type) +{ + return type == POINT2POINT_CONSTRAINT_TYPE || + type == HINGE_CONSTRAINT_TYPE || + type == CONETWIST_CONSTRAINT_TYPE || + type == D6_CONSTRAINT_TYPE; +} + +#endif diff --git a/Extras/BulletMultiThreaded/SpuSolverTask/readme.txt b/Extras/BulletMultiThreaded/SpuSolverTask/readme.txt deleted file mode 100644 index f3e907347..000000000 --- a/Extras/BulletMultiThreaded/SpuSolverTask/readme.txt +++ /dev/null @@ -1 +0,0 @@ -Empty placeholder for future Libspe2 SPU task