Only enable SSE4 for Visual Studio 2012 or later (_MSC_FULL_VER >= 170050727), it breaks the build for Visual Studio 2010
Add additional constructor for btMultiBodyJointMotor
This commit is contained in:
@@ -49,6 +49,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
||||
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
|
||||
}
|
||||
|
||||
#if defined (BT_USE_SSE4)
|
||||
#define USE_FMA 1
|
||||
#define USE_FMA3_INSTEAD_FMA4 1
|
||||
#define USE_SSE4_DOT 0
|
||||
@@ -80,6 +81,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
||||
// c - a*b
|
||||
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Project Gauss Seidel or the equivalent Sequential Impulse
|
||||
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||
@@ -116,6 +118,7 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1,
|
||||
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||
{
|
||||
#if defined (BT_ALLOW_SSE4)
|
||||
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||
@@ -134,6 +137,9 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody&
|
||||
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||
return deltaImpulse;
|
||||
#else
|
||||
return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -168,6 +174,7 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& bod
|
||||
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||
{
|
||||
#ifdef BT_ALLOW_SSE4
|
||||
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||
@@ -184,6 +191,9 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBo
|
||||
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||
return deltaImpulse;
|
||||
#else
|
||||
return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c);
|
||||
#endif //BT_ALLOW_SSE4
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -22,8 +22,23 @@ subject to the following restrictions:
|
||||
|
||||
|
||||
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
|
||||
:btMultiBodyJointMotor(body,link,0,desiredVelocity,maxMotorImpulse)
|
||||
:btMultiBodyConstraint(body,body,link,link,1,true),
|
||||
m_desiredVelocity(desiredVelocity)
|
||||
{
|
||||
int linkDoF = 0;
|
||||
|
||||
m_maxAppliedImpulse = maxMotorImpulse;
|
||||
// the data.m_jacobians never change, so may as well
|
||||
// initialize them here
|
||||
|
||||
// note: we rely on the fact that data.m_jacobians are
|
||||
// always initialized to zero by the Constraint ctor
|
||||
|
||||
unsigned int offset = 6 + (body->isMultiDof() ? body->getLink(link).m_dofOffset + linkDoF : link);
|
||||
|
||||
// row 0: the lower bound
|
||||
// row 0: the lower bound
|
||||
jacobianA(0)[offset] = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ protected:
|
||||
|
||||
public:
|
||||
|
||||
btMultiBodyJointMotor(btMultiBody* body, int link,btScalar desiredVelocity, btScalar maxMotorImpulse);
|
||||
btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse);
|
||||
btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse);
|
||||
virtual ~btMultiBodyJointMotor();
|
||||
|
||||
|
||||
@@ -6,10 +6,8 @@
|
||||
|
||||
#include <string.h>//memset
|
||||
#ifdef BT_USE_SSE
|
||||
#if (_MSC_FULL_VER >= 160040219)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined BT_USE_NEON
|
||||
#define ARM_NEON_GCC_COMPATIBILITY 1
|
||||
@@ -30,7 +28,7 @@ public:
|
||||
CPU_FEATURE_NEON_HPFP=4
|
||||
};
|
||||
|
||||
static int getCpuFeatures(btCpuFeature inFeature)
|
||||
static int getCpuFeatures()
|
||||
{
|
||||
|
||||
static int capabilities = 0;
|
||||
@@ -50,31 +48,29 @@ public:
|
||||
}
|
||||
#endif //BT_USE_NEON
|
||||
|
||||
#ifdef BT_USE_SSE
|
||||
#if (_MSC_FULL_VER >= 160040219)
|
||||
#ifdef BT_ALLOW_SSE4
|
||||
{
|
||||
int cpuInfo[4];
|
||||
memset(cpuInfo, 0, sizeof(cpuInfo));
|
||||
unsigned long long sseExt;
|
||||
__cpuid(mCpuInfo, 1);
|
||||
mExt = _xgetbv(0);
|
||||
__cpuid(cpuInfo, 1);
|
||||
sseExt = _xgetbv(0);
|
||||
|
||||
const int OSXSAVEFlag = (1UL << 27);
|
||||
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
|
||||
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
|
||||
if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6)
|
||||
if ((cpuInfo[2] & FMAFlag) == FMAFlag && (sseExt & 6) == 6)
|
||||
{
|
||||
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
|
||||
}
|
||||
|
||||
const int SSE41Flag = (1 << 19);
|
||||
if (mCpuInfo[2] & SSE41Flag)
|
||||
if (cpuInfo[2] & SSE41Flag)
|
||||
{
|
||||
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
|
||||
}
|
||||
}
|
||||
#endif//(_MSC_FULL_VER >= 160040219)
|
||||
#endif//BT_USE_SSE
|
||||
#endif//BT_ALLOW_SSE4
|
||||
|
||||
testedCapabilities = true;
|
||||
return capabilities;
|
||||
|
||||
@@ -74,6 +74,11 @@ inline int btGetVersion()
|
||||
|
||||
#define BT_USE_SSE
|
||||
#ifdef BT_USE_SSE
|
||||
|
||||
#if (_MSC_FULL_VER >= 170050727)//Visual Studio 2012 can compile SSE4/FMA3 (but SSE4/FMA3 is not enabled by default)
|
||||
#define BT_ALLOW_SSE4
|
||||
#endif //(_MSC_FULL_VER >= 160040219)
|
||||
|
||||
//BT_USE_SSE_IN_API is disabled under Windows by default, because
|
||||
//it makes it harder to integrate Bullet into your application under Windows
|
||||
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
|
||||
|
||||
Reference in New Issue
Block a user