Added SSE4/FMA optimized constraint row solver implementation for btSequentialImpulseConstraintSolver,
thanks to Vladimir Bondarev (https://github.com/VladimirBondarev/bullet3/tree/c25d)
This commit is contained in:
@@ -10,7 +10,7 @@ IF (USE_GLUT)
|
|||||||
# ENDIF()
|
# ENDIF()
|
||||||
SET(SharedDemoSubdirs
|
SET(SharedDemoSubdirs
|
||||||
OpenGL
|
OpenGL
|
||||||
CcdPhysicsDemo SliderConstraintDemo GenericJointDemo Raytracer
|
CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer
|
||||||
RagdollDemo ForkLiftDemo BasicDemo FeatherstoneMultiBodyDemo RollingFrictionDemo RaytestDemo VoronoiFractureDemo
|
RagdollDemo ForkLiftDemo BasicDemo FeatherstoneMultiBodyDemo RollingFrictionDemo RaytestDemo VoronoiFractureDemo
|
||||||
GyroscopicDemo FractureDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
|
GyroscopicDemo FractureDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
|
||||||
UserCollisionAlgorithm CharacterDemo SoftDemo
|
UserCollisionAlgorithm CharacterDemo SoftDemo
|
||||||
|
|||||||
@@ -14,14 +14,14 @@
|
|||||||
INCLUDE_DIRECTORIES(
|
INCLUDE_DIRECTORIES(
|
||||||
${BULLET_PHYSICS_SOURCE_DIR}/src
|
${BULLET_PHYSICS_SOURCE_DIR}/src
|
||||||
../OpenGL
|
../OpenGL
|
||||||
${BULLET_PHYSICS_SOURCE_DIR}/Extras/Serialize/BulletFileLoader
|
${GLUT_INCLUDE_DIR}
|
||||||
${BULLET_PHYSICS_SOURCE_DIR}/Extras/Serialize/BulletWorldImporter
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
IF (USE_GLUT)
|
IF (USE_GLUT)
|
||||||
LINK_LIBRARIES(
|
LINK_LIBRARIES(
|
||||||
OpenGLSupport BulletWorldImporter BulletDynamics BulletCollision LinearMath BulletFileLoader ${GLUT_glut_LIBRARY} ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY}
|
OpenGLSupport BulletDynamics BulletCollision LinearMath ${GLUT_glut_LIBRARY}
|
||||||
|
${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY}
|
||||||
)
|
)
|
||||||
|
|
||||||
ADD_EXECUTABLE(AppConstraintDemo
|
ADD_EXECUTABLE(AppConstraintDemo
|
||||||
@@ -31,7 +31,7 @@ IF (USE_GLUT)
|
|||||||
)
|
)
|
||||||
ELSE (USE_GLUT)
|
ELSE (USE_GLUT)
|
||||||
LINK_LIBRARIES(
|
LINK_LIBRARIES(
|
||||||
OpenGLSupport BulletWorldImporter BulletDynamics BulletCollision LinearMath BulletFileLoader ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY}
|
OpenGLSupport BulletDynamics BulletCollision LinearMath ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY}
|
||||||
)
|
)
|
||||||
|
|
||||||
ADD_EXECUTABLE(AppConstraintDemo
|
ADD_EXECUTABLE(AppConstraintDemo
|
||||||
|
|||||||
@@ -37,30 +37,53 @@ int gNumSplitImpulseRecoveries = 0;
|
|||||||
|
|
||||||
#include "BulletDynamics/Dynamics/btRigidBody.h"
|
#include "BulletDynamics/Dynamics/btRigidBody.h"
|
||||||
|
|
||||||
btSequentialImpulseConstraintSolver::btSequentialImpulseConstraintSolver()
|
|
||||||
:m_btSeed2(0)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
btSequentialImpulseConstraintSolver::~btSequentialImpulseConstraintSolver()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
#include <emmintrin.h>
|
#include <intrin.h>//is it safe to include this header?
|
||||||
|
|
||||||
#define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
|
#define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
|
||||||
static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
|
||||||
{
|
{
|
||||||
__m128 result = _mm_mul_ps( vec0, vec1);
|
__m128 result = _mm_mul_ps( vec0, vec1);
|
||||||
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
|
return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
|
||||||
}
|
}
|
||||||
#endif//USE_SIMD
|
|
||||||
|
#define USE_FMA 1
|
||||||
|
#define USE_FMA3_INSTEAD_FMA4 1
|
||||||
|
#define USE_SSE4_DOT 0
|
||||||
|
|
||||||
|
#define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f)
|
||||||
|
#define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f))
|
||||||
|
|
||||||
|
#if USE_SSE4_DOT
|
||||||
|
#define DOT_PRODUCT(a, b) SSE4_DP(a, b)
|
||||||
|
#else
|
||||||
|
#define DOT_PRODUCT(a, b) btSimdDot3(a, b)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_FMA
|
||||||
|
#if USE_FMA3_INSTEAD_FMA4
|
||||||
|
// a*b + c
|
||||||
|
#define FMADD(a, b, c) _mm_fmadd_ps(a, b, c)
|
||||||
|
// -(a*b) + c
|
||||||
|
#define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c)
|
||||||
|
#else // USE_FMA3
|
||||||
|
// a*b + c
|
||||||
|
#define FMADD(a, b, c) _mm_macc_ps(a, b, c)
|
||||||
|
// -(a*b) + c
|
||||||
|
#define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c)
|
||||||
|
#endif
|
||||||
|
#else // USE_FMA
|
||||||
|
// c + a*b
|
||||||
|
#define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b))
|
||||||
|
// c - a*b
|
||||||
|
#define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
|
||||||
|
#endif
|
||||||
|
|
||||||
// Project Gauss Seidel or the equivalent Sequential Impulse
|
// Project Gauss Seidel or the equivalent Sequential Impulse
|
||||||
btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
|
static btSimdScalar gResolveSingleConstraintRowGeneric_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
{
|
{
|
||||||
#ifdef USE_SIMD
|
|
||||||
__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
|
__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
|
||||||
__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
|
__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
|
||||||
__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
|
__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
|
||||||
@@ -86,8 +109,92 @@ btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGene
|
|||||||
body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
|
body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
|
||||||
body2.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
|
body2.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
|
||||||
body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
|
body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
|
||||||
|
|
||||||
return deltaImpulse;
|
return deltaImpulse;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||||
|
static btSimdScalar gResolveSingleConstraintRowGeneric_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
|
{
|
||||||
|
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||||
|
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||||
|
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||||
|
const __m128 upperLimit = _mm_set_ps1(c.m_upperLimit);
|
||||||
|
const __m128 deltaVel1Dotn = _mm_add_ps(DOT_PRODUCT(c.m_contactNormal1.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), DOT_PRODUCT(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
const __m128 deltaVel2Dotn = _mm_add_ps(DOT_PRODUCT(c.m_contactNormal2.mVec128, body2.internalGetDeltaLinearVelocity().mVec128), DOT_PRODUCT(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
|
||||||
|
deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
|
||||||
|
tmp = _mm_add_ps(c.m_appliedImpulse, deltaImpulse); // sum
|
||||||
|
const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
|
||||||
|
const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
|
||||||
|
deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
|
||||||
|
c.m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
|
||||||
|
body1.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal1.mVec128, body1.internalGetInvMass().mVec128), deltaImpulse, body1.internalGetDeltaLinearVelocity().mVec128);
|
||||||
|
body1.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentA.mVec128, deltaImpulse, body1.internalGetDeltaAngularVelocity().mVec128);
|
||||||
|
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||||
|
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||||
|
return deltaImpulse;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse2(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
|
{
|
||||||
|
__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
|
||||||
|
__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
|
||||||
|
__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
|
||||||
|
btSimdScalar deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse), _mm_set1_ps(c.m_cfm)));
|
||||||
|
__m128 deltaVel1Dotn = _mm_add_ps(btSimdDot3(c.m_contactNormal1.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
__m128 deltaVel2Dotn = _mm_add_ps(btSimdDot3(c.m_contactNormal2.mVec128, body2.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
|
||||||
|
deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
|
||||||
|
btSimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse);
|
||||||
|
btSimdScalar resultLowerLess, resultUpperLess;
|
||||||
|
resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1);
|
||||||
|
resultUpperLess = _mm_cmplt_ps(sum, upperLimit1);
|
||||||
|
__m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
|
||||||
|
deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
|
||||||
|
c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum));
|
||||||
|
__m128 linearComponentA = _mm_mul_ps(c.m_contactNormal1.mVec128, body1.internalGetInvMass().mVec128);
|
||||||
|
__m128 linearComponentB = _mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128);
|
||||||
|
__m128 impulseMagnitude = deltaImpulse;
|
||||||
|
body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude));
|
||||||
|
body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
|
||||||
|
body2.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
|
||||||
|
body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
|
||||||
|
return deltaImpulse;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Enhanced version of gResolveSingleConstraintRowGeneric_sse2 with SSE4.1 and FMA3
|
||||||
|
static btSimdScalar gResolveSingleConstraintRowLowerLimit_sse4_1_fma3(btSolverBody& body1, btSolverBody& body2, const btSolverConstraint& c)
|
||||||
|
{
|
||||||
|
__m128 tmp = _mm_set_ps1(c.m_jacDiagABInv);
|
||||||
|
__m128 deltaImpulse = _mm_set_ps1(c.m_rhs - btScalar(c.m_appliedImpulse)*c.m_cfm);
|
||||||
|
const __m128 lowerLimit = _mm_set_ps1(c.m_lowerLimit);
|
||||||
|
const __m128 deltaVel1Dotn = _mm_add_ps(DOT_PRODUCT(c.m_contactNormal1.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), DOT_PRODUCT(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
const __m128 deltaVel2Dotn = _mm_add_ps(DOT_PRODUCT(c.m_contactNormal2.mVec128, body2.internalGetDeltaLinearVelocity().mVec128), DOT_PRODUCT(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128));
|
||||||
|
deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
|
||||||
|
deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
|
||||||
|
tmp = _mm_add_ps(c.m_appliedImpulse, deltaImpulse);
|
||||||
|
const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
|
||||||
|
deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.m_appliedImpulse), deltaImpulse, mask);
|
||||||
|
c.m_appliedImpulse = _mm_blendv_ps(lowerLimit, tmp, mask);
|
||||||
|
body1.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal1.mVec128, body1.internalGetInvMass().mVec128), deltaImpulse, body1.internalGetDeltaLinearVelocity().mVec128);
|
||||||
|
body1.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentA.mVec128, deltaImpulse, body1.internalGetDeltaAngularVelocity().mVec128);
|
||||||
|
body2.internalGetDeltaLinearVelocity().mVec128 = FMADD(_mm_mul_ps(c.m_contactNormal2.mVec128, body2.internalGetInvMass().mVec128), deltaImpulse, body2.internalGetDeltaLinearVelocity().mVec128);
|
||||||
|
body2.internalGetDeltaAngularVelocity().mVec128 = FMADD(c.m_angularComponentB.mVec128, deltaImpulse, body2.internalGetDeltaAngularVelocity().mVec128);
|
||||||
|
return deltaImpulse;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //USE_SIMD
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
|
||||||
|
{
|
||||||
|
#ifdef USE_SIMD
|
||||||
|
return m_resolveSingleConstraintRowGeneric(body1, body2, c);
|
||||||
#else
|
#else
|
||||||
return resolveSingleConstraintRowGeneric(body1,body2,c);
|
return resolveSingleConstraintRowGeneric(body1,body2,c);
|
||||||
#endif
|
#endif
|
||||||
@@ -129,29 +236,7 @@ btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGene
|
|||||||
btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
|
btSimdScalar btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
|
||||||
{
|
{
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
|
return m_resolveSingleConstraintRowLowerLimit(body1, body2, c);
|
||||||
__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
|
|
||||||
__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
|
|
||||||
btSimdScalar deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
|
|
||||||
__m128 deltaVel1Dotn = _mm_add_ps(btSimdDot3(c.m_contactNormal1.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
|
|
||||||
__m128 deltaVel2Dotn = _mm_add_ps(btSimdDot3(c.m_contactNormal2.mVec128,body2.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128));
|
|
||||||
deltaImpulse = _mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
|
|
||||||
deltaImpulse = _mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
|
|
||||||
btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
|
|
||||||
btSimdScalar resultLowerLess,resultUpperLess;
|
|
||||||
resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
|
|
||||||
resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
|
|
||||||
__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
|
|
||||||
deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
|
|
||||||
c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
|
|
||||||
__m128 linearComponentA = _mm_mul_ps(c.m_contactNormal1.mVec128,body1.internalGetInvMass().mVec128);
|
|
||||||
__m128 linearComponentB = _mm_mul_ps(c.m_contactNormal2.mVec128,body2.internalGetInvMass().mVec128);
|
|
||||||
__m128 impulseMagnitude = deltaImpulse;
|
|
||||||
body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
|
|
||||||
body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
|
|
||||||
body2.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
|
|
||||||
body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
|
|
||||||
return deltaImpulse;
|
|
||||||
#else
|
#else
|
||||||
return resolveSingleConstraintRowLowerLimit(body1,body2,c);
|
return resolveSingleConstraintRowLowerLimit(body1,body2,c);
|
||||||
#endif
|
#endif
|
||||||
@@ -248,6 +333,36 @@ void btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFri
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
btSequentialImpulseConstraintSolver::btSequentialImpulseConstraintSolver()
|
||||||
|
:m_btSeed2(0),
|
||||||
|
m_resolveSingleConstraintRowGeneric(gResolveSingleConstraintRowGeneric_sse2),
|
||||||
|
m_resolveSingleConstraintRowLowerLimit(gResolveSingleConstraintRowLowerLimit_sse2)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
btSequentialImpulseConstraintSolver::~btSequentialImpulseConstraintSolver()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
btSingleConstraintRowSolver btSequentialImpulseConstraintSolver::getSSE2ConstraintRowSolverGeneric()
|
||||||
|
{
|
||||||
|
return gResolveSingleConstraintRowGeneric_sse2;
|
||||||
|
}
|
||||||
|
btSingleConstraintRowSolver btSequentialImpulseConstraintSolver::getSSE4_1ConstraintRowSolverGeneric()
|
||||||
|
{
|
||||||
|
return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
|
||||||
|
}
|
||||||
|
btSingleConstraintRowSolver btSequentialImpulseConstraintSolver::getSSE2ConstraintRowSolverLowerLimit()
|
||||||
|
{
|
||||||
|
return gResolveSingleConstraintRowLowerLimit_sse2;
|
||||||
|
}
|
||||||
|
btSingleConstraintRowSolver btSequentialImpulseConstraintSolver::getSSE4_1ConstraintRowSolverLowerLimit()
|
||||||
|
{
|
||||||
|
return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned long btSequentialImpulseConstraintSolver::btRand2()
|
unsigned long btSequentialImpulseConstraintSolver::btRand2()
|
||||||
{
|
{
|
||||||
@@ -434,8 +549,8 @@ void btSequentialImpulseConstraintSolver::setupFrictionConstraint(btSolverConstr
|
|||||||
|
|
||||||
// btScalar positionalError = 0.f;
|
// btScalar positionalError = 0.f;
|
||||||
|
|
||||||
btScalar velocityError = desiredVelocity - rel_vel;
|
btSimdScalar velocityError = desiredVelocity - rel_vel;
|
||||||
btScalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv;
|
btSimdScalar velocityImpulse = velocityError * btSimdScalar(solverConstraint.m_jacDiagABInv);
|
||||||
solverConstraint.m_rhs = velocityImpulse;
|
solverConstraint.m_rhs = velocityImpulse;
|
||||||
solverConstraint.m_rhsPenetration = 0.f;
|
solverConstraint.m_rhsPenetration = 0.f;
|
||||||
solverConstraint.m_cfm = cfmSlip;
|
solverConstraint.m_cfm = cfmSlip;
|
||||||
@@ -1741,5 +1856,3 @@ void btSequentialImpulseConstraintSolver::reset()
|
|||||||
{
|
{
|
||||||
m_btSeed2 = 0;
|
m_btSeed2 = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,8 @@ class btCollisionObject;
|
|||||||
#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
|
#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
|
||||||
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
|
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
|
||||||
|
|
||||||
|
typedef btSimdScalar(*btSingleConstraintRowSolver)(btSolverBody&, btSolverBody&, const btSolverConstraint&);
|
||||||
|
|
||||||
///The btSequentialImpulseConstraintSolver is a fast SIMD implementation of the Projected Gauss Seidel (iterative LCP) method.
|
///The btSequentialImpulseConstraintSolver is a fast SIMD implementation of the Projected Gauss Seidel (iterative LCP) method.
|
||||||
ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolver : public btConstraintSolver
|
ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolver : public btConstraintSolver
|
||||||
{
|
{
|
||||||
@@ -43,6 +45,10 @@ protected:
|
|||||||
btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
|
btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
|
||||||
int m_maxOverrideNumSolverIterations;
|
int m_maxOverrideNumSolverIterations;
|
||||||
int m_fixedBodyId;
|
int m_fixedBodyId;
|
||||||
|
|
||||||
|
btSingleConstraintRowSolver m_resolveSingleConstraintRowGeneric;
|
||||||
|
btSingleConstraintRowSolver m_resolveSingleConstraintRowLowerLimit;
|
||||||
|
|
||||||
void setupFrictionConstraint( btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,
|
void setupFrictionConstraint( btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,
|
||||||
btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
|
btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
|
||||||
btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation,
|
btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation,
|
||||||
@@ -113,8 +119,6 @@ public:
|
|||||||
|
|
||||||
virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher);
|
virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///clear internal cached data and reset random seed
|
///clear internal cached data and reset random seed
|
||||||
virtual void reset();
|
virtual void reset();
|
||||||
|
|
||||||
@@ -136,6 +140,34 @@ public:
|
|||||||
{
|
{
|
||||||
return BT_SEQUENTIAL_IMPULSE_SOLVER;
|
return BT_SEQUENTIAL_IMPULSE_SOLVER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
btSingleConstraintRowSolver getActiveConstraintRowSolverGeneric()
|
||||||
|
{
|
||||||
|
return m_resolveSingleConstraintRowGeneric;
|
||||||
|
}
|
||||||
|
void setConstraintRowSolverGeneric(btSingleConstraintRowSolver rowSolver)
|
||||||
|
{
|
||||||
|
m_resolveSingleConstraintRowGeneric = rowSolver;
|
||||||
|
}
|
||||||
|
btSingleConstraintRowSolver getActiveConstraintRowSolverLowerLimit()
|
||||||
|
{
|
||||||
|
return m_resolveSingleConstraintRowLowerLimit;
|
||||||
|
}
|
||||||
|
void setConstraintRowSolverLowerLimit(btSingleConstraintRowSolver rowSolver)
|
||||||
|
{
|
||||||
|
m_resolveSingleConstraintRowLowerLimit = rowSolver;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// btSingleConstraintRowSolver getScalarConstraintRowSolverLowerLimit();
|
||||||
|
btSingleConstraintRowSolver getSSE2ConstraintRowSolverGeneric();
|
||||||
|
btSingleConstraintRowSolver getSSE4_1ConstraintRowSolverGeneric();
|
||||||
|
btSingleConstraintRowSolver getSSE2ConstraintRowSolverLowerLimit();
|
||||||
|
btSingleConstraintRowSolver getSSE4_1ConstraintRowSolverLowerLimit();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
87
src/LinearMath/btCpuFeatureUtility.h
Normal file
87
src/LinearMath/btCpuFeatureUtility.h
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
|
||||||
|
#ifndef BT_CPU_UTILITY_H
|
||||||
|
#define BT_CPU_UTILITY_H
|
||||||
|
|
||||||
|
#include "LinearMath/btScalar.h"
|
||||||
|
|
||||||
|
#include <string.h>//memset
|
||||||
|
#ifdef BT_USE_SSE
|
||||||
|
#if (_MSC_FULL_VER >= 160040219)
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined BT_USE_NEON
|
||||||
|
#define ARM_NEON_GCC_COMPATIBILITY 1
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/sysctl.h> //for sysctlbyname
|
||||||
|
#endif //BT_USE_NEON
|
||||||
|
|
||||||
|
///Rudimentary btCpuFeatureUtility for CPU features: only report the features that Bullet actually uses (SSE4/FMA3, NEON_HPFP)
|
||||||
|
///We assume SSE2 in case BT_USE_SSE2 is defined in LinearMath/btScalar.h
|
||||||
|
class btCpuFeatureUtility
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum btCpuFeature
|
||||||
|
{
|
||||||
|
CPU_FEATURE_FMA3=1,
|
||||||
|
CPU_FEATURE_SSE4_1=2,
|
||||||
|
CPU_FEATURE_NEON_HPFP=4
|
||||||
|
};
|
||||||
|
|
||||||
|
static int getCpuFeatures(btCpuFeature inFeature)
|
||||||
|
{
|
||||||
|
|
||||||
|
static int capabilities = 0;
|
||||||
|
static bool testedCapabilities = false;
|
||||||
|
if (0 != testedCapabilities)
|
||||||
|
{
|
||||||
|
return capabilities;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BT_USE_NEON
|
||||||
|
{
|
||||||
|
uint32_t hasFeature = 0;
|
||||||
|
size_t featureSize = sizeof(hasFeature);
|
||||||
|
int err = sysctlbyname("hw.optional.neon_hpfp", &hasFeature, &featureSize, NULL, 0);
|
||||||
|
if (0 == err && hasFeature)
|
||||||
|
capabilities |= CPU_FEATURE_NEON_HPFP;
|
||||||
|
}
|
||||||
|
#endif //BT_USE_NEON
|
||||||
|
|
||||||
|
#ifdef BT_USE_SSE
|
||||||
|
#if (_MSC_FULL_VER >= 160040219)
|
||||||
|
{
|
||||||
|
int cpuInfo[4];
|
||||||
|
memset(cpuInfo, 0, sizeof(cpuInfo));
|
||||||
|
unsigned long long sseExt;
|
||||||
|
__cpuid(mCpuInfo, 1);
|
||||||
|
mExt = _xgetbv(0);
|
||||||
|
|
||||||
|
const int OSXSAVEFlag = (1UL << 27);
|
||||||
|
const int AVXFlag = ((1UL << 28) | OSXSAVEFlag);
|
||||||
|
const int FMAFlag = ((1UL << 12) | AVXFlag | OSXSAVEFlag);
|
||||||
|
if ((mCpuInfo[2] & FMAFlag) == FMAFlag && (mExt & 6) == 6)
|
||||||
|
{
|
||||||
|
capabilities |= btCpuFeatureUtility::CPU_FEATURE_FMA3;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int SSE41Flag = (1 << 19);
|
||||||
|
if (mCpuInfo[2] & SSE41Flag)
|
||||||
|
{
|
||||||
|
capabilities |= btCpuFeatureUtility::CPU_FEATURE_SSE4_1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif//(_MSC_FULL_VER >= 160040219)
|
||||||
|
#endif//BT_USE_SSE
|
||||||
|
|
||||||
|
testedCapabilities = true;
|
||||||
|
return capabilities;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //BT_CPU_UTILITY_H
|
||||||
Reference in New Issue
Block a user