From 0b6d1af1d41b571fb138fecd1f026707e5f2047a Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Thu, 1 May 2014 22:23:37 -0700 Subject: [PATCH] Only enable SSE4 for Visual Studio 2012 or later (_MSC_FULL_VER >= 170050727), it breaks the build for Visual Studio 2010 Add additional constructor for btMultiBodyJointMotor --- .../btSequentialImpulseConstraintSolver.cpp | 10 ++++++++++ .../Featherstone/btMultiBodyJointMotor.cpp | 17 ++++++++++++++++- .../Featherstone/btMultiBodyJointMotor.h | 2 +- src/LinearMath/btCpuFeatureUtility.h | 18 +++++++----------- src/LinearMath/btScalar.h | 5 +++++ 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp index 3608c40df..58a35a289 100644 --- a/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp +++ b/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp @@ -49,6 +49,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 ) return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) ); } +#if defined (BT_USE_SSE4) #define USE_FMA 1 #define USE_FMA3_INSTEAD_FMA4 1 #define USE_SSE4_DOT 0 @@ -80,6 +81,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 ) // c - a*b #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b)) #endif +#endif // Project Gauss Seidel or the equivalent Sequential Impulse static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) @@ -116,6 +118,7 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, // Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3 static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) { +#if defined (BT_ALLOW_SSE4) __m128 tmp = _mm_set_ps1(c.m_jacDiagABInv); __m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm); const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit); @@ -134,6 +137,9 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128); body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128); return deltaImpulse; +#else + return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c); +#endif } @@ -168,6 +174,7 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& bod // Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3 static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) { +#ifdef BT_ALLOW_SSE4 __m128 tmp = _mm_set_ps1(c.m_jacDiagABInv); __m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm); const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit); @@ -184,6 +191,9 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBo body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128); body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128); return deltaImpulse; +#else + return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c); +#endif //BT_ALLOW_SSE4 } diff --git a/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp b/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp index e424d5c92..18f49533b 100644 --- a/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp +++ b/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp @@ -22,8 +22,23 @@ subject to the following restrictions: btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse) -:btMultiBodyJointMotor(body,link,0,desiredVelocity,maxMotorImpulse) + :btMultiBodyConstraint(body,body,link,link,1,true), + m_desiredVelocity(desiredVelocity) { + int linkDoF = 0; + + m_maxAppliedImpulse = maxMotorImpulse; + // the data.m_jacobians never change, so may as well + // initialize them here + + // note: we rely on the fact that data.m_jacobians are + // always initialized to zero by the Constraint ctor + + unsigned int offset = 6 + (body->isMultiDof() ? body->getLink(link).m_dofOffset + linkDoF : link); + + // row 0: the lower bound + // row 0: the lower bound + jacobianA(0)[offset] = 1; } diff --git a/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h b/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h index ada7ccd60..e863c7cc0 100644 --- a/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h +++ b/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h @@ -30,7 +30,7 @@ protected: public: - btMultiBodyJointMotor(btMultiBody* body, int link,btScalar desiredVelocity, btScalar maxMotorImpulse); + btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse); btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse); virtual ~btMultiBodyJointMotor(); diff --git a/src/LinearMath/btCpuFeatureUtility.h b/src/LinearMath/btCpuFeatureUtility.h index 0c9ba99a9..b3ed59eab 100644 --- a/src/LinearMath/btCpuFeatureUtility.h +++ b/src/LinearMath/btCpuFeatureUtility.h @@ -6,10 +6,8 @@ #include //memset #ifdef BT_USE_SSE -#if (_MSC_FULL_VER >= 160040219) #include #endif -#endif #if defined BT_USE_NEON #define ARM_NEON_GCC_COMPATIBILITY 1 @@ -30,7 +28,7 @@ public: CPU_FEATURE_NEON_HPFP=4 }; - static int getCpuFeatures(btCpuFeature inFeature) + static int getCpuFeatures() { static int capabilities = 0; @@ -50,31 +48,29 @@ public: } #endif //BT_USE_NEON -#ifdef BT_USE_SSE -#if (_MSC_FULL_VER >= 160040219) +#ifdef BT_ALLOW_SSE4 { int cpuInfo[4]; memset(cpuInfo, 0, sizeof(cpuInfo)); unsigned long long sseExt; - __cpuid(mCpuInfo, 1); - mExt = _xgetbv(0); + __cpuid(cpuInfo, 1); + sseExt = _xgetbv(0); const int OSXSAVEFlag = (1UL << 27); const int AVXFlag = ((1UL << 28) | OSXSAVEFlag); const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag); - if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6) + if ((cpuInfo[2] & FMAFlag) == FMAFlag && (sseExt & 6) == 6) { capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3; } const int SSE41Flag = (1 << 19); - if (mCpuInfo[2] & SSE41Flag) + if (cpuInfo[2] & SSE41Flag) { capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1; } } -#endif//(_MSC_FULL_VER >= 160040219) -#endif//BT_USE_SSE +#endif//BT_ALLOW_SSE4 testedCapabilities = true; return capabilities; diff --git a/src/LinearMath/btScalar.h b/src/LinearMath/btScalar.h index 401e11eaa..da3e88313 100644 --- a/src/LinearMath/btScalar.h +++ b/src/LinearMath/btScalar.h @@ -74,6 +74,11 @@ inline int btGetVersion() #define BT_USE_SSE #ifdef BT_USE_SSE + +#if (_MSC_FULL_VER >= 170050727)//Visual Studio 2012 can compile SSE4/FMA3 (but SSE4/FMA3 is not enabled by default) + #define BT_ALLOW_SSE4 +#endif //(_MSC_FULL_VER >= 160040219) + //BT_USE_SSE_IN_API is disabled under Windows by default, because //it makes it harder to integrate Bullet into your application under Windows //(structured embedding Bullet structs/classes need to be 16-byte aligned)