Only enable SSE4 for Visual Studio 2012 or later (_MSC_FULL_VER >= 170050727), it breaks the build for Visual Studio 2010
Add additional constructor for btMultiBodyJointMotor
This commit is contained in:
@@ -49,6 +49,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
|||||||
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
|
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined (BT_USE_SSE4)
|
||||||
#define USE_FMA 1
|
#define USE_FMA 1
|
||||||
#define USE_FMA3_INSTEAD_FMA4 1
|
#define USE_FMA3_INSTEAD_FMA4 1
|
||||||
#define USE_SSE4_DOT 0
|
#define USE_SSE4_DOT 0
|
||||||
@@ -80,6 +81,7 @@ static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
|||||||
// c - a*b
|
// c - a*b
|
||||||
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
|
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Project Gauss Seidel or the equivalent Sequential Impulse
|
// Project Gauss Seidel or the equivalent Sequential Impulse
|
||||||
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
@@ -116,6 +118,7 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1,
|
|||||||
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||||
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
{
|
{
|
||||||
|
#if defined (BT_ALLOW_SSE4)
|
||||||
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||||
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||||
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||||
@@ -134,6 +137,9 @@ static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody&
|
|||||||
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||||
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||||
return deltaImpulse;
|
return deltaImpulse;
|
||||||
|
#else
|
||||||
|
return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -168,6 +174,7 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& bod
|
|||||||
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||||
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
{
|
{
|
||||||
|
#ifdef BT_ALLOW_SSE4
|
||||||
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||||
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||||
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||||
@@ -184,6 +191,9 @@ static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBo
|
|||||||
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||||
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||||
return deltaImpulse;
|
return deltaImpulse;
|
||||||
|
#else
|
||||||
|
return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c);
|
||||||
|
#endif //BT_ALLOW_SSE4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,8 +22,23 @@ subject to the following restrictions:
|
|||||||
|
|
||||||
|
|
||||||
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
|
btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
|
||||||
:btMultiBodyJointMotor(body,link,0,desiredVelocity,maxMotorImpulse)
|
:btMultiBodyConstraint(body,body,link,link,1,true),
|
||||||
|
m_desiredVelocity(desiredVelocity)
|
||||||
{
|
{
|
||||||
|
int linkDoF = 0;
|
||||||
|
|
||||||
|
m_maxAppliedImpulse = maxMotorImpulse;
|
||||||
|
// the data.m_jacobians never change, so may as well
|
||||||
|
// initialize them here
|
||||||
|
|
||||||
|
// note: we rely on the fact that data.m_jacobians are
|
||||||
|
// always initialized to zero by the Constraint ctor
|
||||||
|
|
||||||
|
unsigned int offset = 6 + (body->isMultiDof() ? body->getLink(link).m_dofOffset + linkDoF : link);
|
||||||
|
|
||||||
|
// row 0: the lower bound
|
||||||
|
// row 0: the lower bound
|
||||||
|
jacobianA(0)[offset] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
btMultiBodyJointMotor(btMultiBody* body, int link,btScalar desiredVelocity, btScalar maxMotorImpulse);
|
btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse);
|
||||||
btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse);
|
btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse);
|
||||||
virtual ~btMultiBodyJointMotor();
|
virtual ~btMultiBodyJointMotor();
|
||||||
|
|
||||||
|
|||||||
@@ -6,10 +6,8 @@
|
|||||||
|
|
||||||
#include <string.h>//memset
|
#include <string.h>//memset
|
||||||
#ifdef BT_USE_SSE
|
#ifdef BT_USE_SSE
|
||||||
#if (_MSC_FULL_VER >= 160040219)
|
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined BT_USE_NEON
|
#if defined BT_USE_NEON
|
||||||
#define ARM_NEON_GCC_COMPATIBILITY 1
|
#define ARM_NEON_GCC_COMPATIBILITY 1
|
||||||
@@ -30,7 +28,7 @@ public:
|
|||||||
CPU_FEATURE_NEON_HPFP=4
|
CPU_FEATURE_NEON_HPFP=4
|
||||||
};
|
};
|
||||||
|
|
||||||
static int getCpuFeatures(btCpuFeature inFeature)
|
static int getCpuFeatures()
|
||||||
{
|
{
|
||||||
|
|
||||||
static int capabilities = 0;
|
static int capabilities = 0;
|
||||||
@@ -50,31 +48,29 @@ public:
|
|||||||
}
|
}
|
||||||
#endif //BT_USE_NEON
|
#endif //BT_USE_NEON
|
||||||
|
|
||||||
#ifdef BT_USE_SSE
|
#ifdef BT_ALLOW_SSE4
|
||||||
#if (_MSC_FULL_VER >= 160040219)
|
|
||||||
{
|
{
|
||||||
int cpuInfo[4];
|
int cpuInfo[4];
|
||||||
memset(cpuInfo, 0, sizeof(cpuInfo));
|
memset(cpuInfo, 0, sizeof(cpuInfo));
|
||||||
unsigned long long sseExt;
|
unsigned long long sseExt;
|
||||||
__cpuid(mCpuInfo, 1);
|
__cpuid(cpuInfo, 1);
|
||||||
mExt = _xgetbv(0);
|
sseExt = _xgetbv(0);
|
||||||
|
|
||||||
const int OSXSAVEFlag = (1UL << 27);
|
const int OSXSAVEFlag = (1UL << 27);
|
||||||
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
|
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
|
||||||
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
|
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
|
||||||
if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6)
|
if ((cpuInfo[2] & FMAFlag) == FMAFlag && (sseExt & 6) == 6)
|
||||||
{
|
{
|
||||||
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
|
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int SSE41Flag = (1 << 19);
|
const int SSE41Flag = (1 << 19);
|
||||||
if (mCpuInfo[2] & SSE41Flag)
|
if (cpuInfo[2] & SSE41Flag)
|
||||||
{
|
{
|
||||||
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
|
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif//(_MSC_FULL_VER >= 160040219)
|
#endif//BT_ALLOW_SSE4
|
||||||
#endif//BT_USE_SSE
|
|
||||||
|
|
||||||
testedCapabilities = true;
|
testedCapabilities = true;
|
||||||
return capabilities;
|
return capabilities;
|
||||||
|
|||||||
@@ -74,6 +74,11 @@ inline int btGetVersion()
|
|||||||
|
|
||||||
#define BT_USE_SSE
|
#define BT_USE_SSE
|
||||||
#ifdef BT_USE_SSE
|
#ifdef BT_USE_SSE
|
||||||
|
|
||||||
|
#if (_MSC_FULL_VER >= 170050727)//Visual Studio 2012 can compile SSE4/FMA3 (but SSE4/FMA3 is not enabled by default)
|
||||||
|
#define BT_ALLOW_SSE4
|
||||||
|
#endif //(_MSC_FULL_VER >= 160040219)
|
||||||
|
|
||||||
//BT_USE_SSE_IN_API is disabled under Windows by default, because
|
//BT_USE_SSE_IN_API is disabled under Windows by default, because
|
||||||
//it makes it harder to integrate Bullet into your application under Windows
|
//it makes it harder to integrate Bullet into your application under Windows
|
||||||
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
|
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
|
||||||
|
|||||||
Reference in New Issue
Block a user