Only enable SSE4 for Visual Studio 2012 or later (_MSC_FULL_VER >= 170050727), it breaks the build for Visual Studio 2010

Add additional constructor for btMultiBodyJointMotor
This commit is contained in:
erwincoumans
2014-05-01 22:23:37 -07:00
parent 0e1b90d708
commit 0b6d1af1d4
5 changed files with 39 additions and 13 deletions

View File

@@ -49,6 +49,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) ); return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
} }
#if defined (BT_USE_SSE4)
#define USE_FMA 1 #define USE_FMA 1
#define USE_FMA3_INSTEAD_FMA4 1 #define USE_FMA3_INSTEAD_FMA4 1
#define USE_SSE4_DOT 0 #define USE_SSE4_DOT 0
@@ -80,6 +81,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
// c - a*b // c - a*b
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b)) #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
#endif #endif
#endif
// Project Gauss Seidel or the equivalent Sequential Impulse // Project Gauss Seidel or the equivalent Sequential Impulse
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
@@ -116,6 +118,7 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1,
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3 // Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
{ {
#if defined (BT_ALLOW_SSE4)
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv); __m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm); __m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit); const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
@@ -134,6 +137,9 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody&
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128); body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128); body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
return deltaImpulse; return deltaImpulse;
#else
return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c);
#endif
} }
@@ -168,6 +174,7 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& bod
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3 // Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c) static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
{ {
#ifdef BT_ALLOW_SSE4
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv); __m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm); __m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit); const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
@@ -184,6 +191,9 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBo
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128); body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128); body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
return deltaImpulse; return deltaImpulse;
#else
return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c);
#endif //BT_ALLOW_SSE4
} }

View File

@@ -22,8 +22,23 @@ subject to the following restrictions:
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse) btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
:btMultiBodyJointMotor(body,link,0,desiredVelocity,maxMotorImpulse) :btMultiBodyConstraint(body,body,link,link,1,true),
m_desiredVelocity(desiredVelocity)
{ {
int linkDoF = 0;
m_maxAppliedImpulse = maxMotorImpulse;
// the data.m_jacobians never change, so may as well
// initialize them here
// note: we rely on the fact that data.m_jacobians are
// always initialized to zero by the Constraint ctor
unsigned int offset = 6 + (body->isMultiDof() ? body->getLink(link).m_dofOffset + linkDoF : link);
// row 0: the lower bound
// row 0: the lower bound
jacobianA(0)[offset] = 1;
} }

View File

@@ -30,7 +30,7 @@ protected:
public: public:
btMultiBodyJointMotor(btMultiBody* body, int link,btScalar desiredVelocity, btScalar maxMotorImpulse); btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse);
btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse); btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse);
virtual ~btMultiBodyJointMotor(); virtual ~btMultiBodyJointMotor();

View File

@@ -6,10 +6,8 @@
#include <string.h>//memset #include <string.h>//memset
#ifdef BT_USE_SSE #ifdef BT_USE_SSE
#if (_MSC_FULL_VER >= 160040219)
#include <intrin.h> #include <intrin.h>
#endif #endif
#endif
#if defined BT_USE_NEON #if defined BT_USE_NEON
#define ARM_NEON_GCC_COMPATIBILITY 1 #define ARM_NEON_GCC_COMPATIBILITY 1
@@ -30,7 +28,7 @@ public:
CPU_FEATURE_NEON_HPFP=4 CPU_FEATURE_NEON_HPFP=4
}; };
static int getCpuFeatures(btCpuFeature inFeature) static int getCpuFeatures()
{ {
static int capabilities = 0; static int capabilities = 0;
@@ -50,31 +48,29 @@ public:
} }
#endif //BT_USE_NEON #endif //BT_USE_NEON
#ifdef BT_USE_SSE #ifdef BT_ALLOW_SSE4
#if (_MSC_FULL_VER >= 160040219)
{ {
int cpuInfo[4]; int cpuInfo[4];
memset(cpuInfo, 0, sizeof(cpuInfo)); memset(cpuInfo, 0, sizeof(cpuInfo));
unsigned long long sseExt; unsigned long long sseExt;
__cpuid(mCpuInfo, 1); __cpuid(cpuInfo, 1);
mExt = _xgetbv(0); sseExt = _xgetbv(0);
const int OSXSAVEFlag = (1UL << 27); const int OSXSAVEFlag = (1UL << 27);
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag); const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag); const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6) if ((cpuInfo[2] & FMAFlag) == FMAFlag && (sseExt & 6) == 6)
{ {
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3; capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
} }
const int SSE41Flag = (1 << 19); const int SSE41Flag = (1 << 19);
if (mCpuInfo[2] & SSE41Flag) if (cpuInfo[2] & SSE41Flag)
{ {
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1; capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
} }
} }
#endif//(_MSC_FULL_VER >= 160040219) #endif//BT_ALLOW_SSE4
#endif//BT_USE_SSE
testedCapabilities = true; testedCapabilities = true;
return capabilities; return capabilities;

View File

@@ -74,6 +74,11 @@ inline int btGetVersion()
#define BT_USE_SSE #define BT_USE_SSE
#ifdef BT_USE_SSE #ifdef BT_USE_SSE
#if (_MSC_FULL_VER >= 170050727)//Visual Studio 2012 can compile SSE4/FMA3 (but SSE4/FMA3 is not enabled by default)
#define BT_ALLOW_SSE4
#endif //(_MSC_FULL_VER >= 160040219)
//BT_USE_SSE_IN_API is disabled under Windows by default, because //BT_USE_SSE_IN_API is disabled under Windows by default, because
//it makes it harder to integrate Bullet into your application under Windows //it makes it harder to integrate Bullet into your application under Windows
//(structured embedding Bullet structs/classes need to be 16-byte aligned) //(structured embedding Bullet structs/classes need to be 16-byte aligned)