Only enable SSE4 for Visual Studio 2012 or later (_MSC_FULL_VER >= 170050727), it breaks the build for Visual Studio 2010

Add additional constructor for btMultiBodyJointMotor
This commit is contained in:
erwincoumans
2014-05-01 22:23:37 -07:00
parent 0e1b90d708
commit 0b6d1af1d4
5 changed files with 39 additions and 13 deletions

View File

@@ -49,6 +49,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
}
#if defined (BT_USE_SSE4)
#define USE_FMA 1
#define USE_FMA3_INSTEAD_FMA4 1
#define USE_SSE4_DOT 0
@@ -80,6 +81,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
// c - a*b
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
#endif
#endif
// Project Gauss Seidel or the equivalent Sequential Impulse
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
@@ -116,6 +118,7 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1,
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
{
#if defined (BT_ALLOW_SSE4)
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
@@ -134,6 +137,9 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody&
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
return deltaImpulse;
#else
return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c);
#endif
}
@@ -168,6 +174,7 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& bod
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
{
#ifdef BT_ALLOW_SSE4
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
@@ -184,6 +191,9 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBo
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
return deltaImpulse;
#else
return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c);
#endif //BT_ALLOW_SSE4
}

View File

@@ -22,8 +22,23 @@ subject to the following restrictions:
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
:btMultiBodyJointMotor(body,link,0,desiredVelocity,maxMotorImpulse)
:btMultiBodyConstraint(body,body,link,link,1,true),
m_desiredVelocity(desiredVelocity)
{
int linkDoF = 0;
m_maxAppliedImpulse = maxMotorImpulse;
// the data.m_jacobians never change, so may as well
// initialize them here
// note: we rely on the fact that data.m_jacobians are
// always initialized to zero by the Constraint ctor
unsigned int offset = 6 + (body->isMultiDof() ? body->getLink(link).m_dofOffset + linkDoF : link);
// row 0: the lower bound
// row 0: the lower bound
jacobianA(0)[offset] = 1;
}

View File

@@ -30,7 +30,7 @@ protected:
public:
btMultiBodyJointMotor(btMultiBody* body, int link,btScalar desiredVelocity, btScalar maxMotorImpulse);
btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse);
btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse);
virtual ~btMultiBodyJointMotor();

View File

@@ -6,10 +6,8 @@
#include <string.h>//memset
#ifdef BT_USE_SSE
#if (_MSC_FULL_VER >= 160040219)
#include <intrin.h>
#endif
#endif
#if defined BT_USE_NEON
#define ARM_NEON_GCC_COMPATIBILITY 1
@@ -30,7 +28,7 @@ public:
CPU_FEATURE_NEON_HPFP=4
};
static int getCpuFeatures(btCpuFeature inFeature)
static int getCpuFeatures()
{
static int capabilities = 0;
@@ -50,31 +48,29 @@ public:
}
#endif //BT_USE_NEON
#ifdef BT_USE_SSE
#if (_MSC_FULL_VER >= 160040219)
#ifdef BT_ALLOW_SSE4
{
int cpuInfo[4];
memset(cpuInfo, 0, sizeof(cpuInfo));
unsigned long long sseExt;
__cpuid(mCpuInfo, 1);
mExt = _xgetbv(0);
__cpuid(cpuInfo, 1);
sseExt = _xgetbv(0);
const int OSXSAVEFlag = (1UL << 27);
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6)
if ((cpuInfo[2] & FMAFlag) == FMAFlag && (sseExt & 6) == 6)
{
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
}
const int SSE41Flag = (1 << 19);
if (mCpuInfo[2] & SSE41Flag)
if (cpuInfo[2] & SSE41Flag)
{
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
}
}
#endif//(_MSC_FULL_VER >= 160040219)
#endif//BT_USE_SSE
#endif//BT_ALLOW_SSE4
testedCapabilities = true;
return capabilities;

View File

@@ -74,6 +74,11 @@ inline int btGetVersion()
#define BT_USE_SSE
#ifdef BT_USE_SSE
#if (_MSC_FULL_VER >= 170050727)//Visual Studio 2012 can compile SSE4/FMA3 (but SSE4/FMA3 is not enabled by default)
#define BT_ALLOW_SSE4
#endif //(_MSC_FULL_VER >= 160040219)
//BT_USE_SSE_IN_API is disabled under Windows by default, because
//it makes it harder to integrate Bullet into your application under Windows
//(structured embedding Bullet structs/classes need to be 16-byte aligned)