added quickstep improvements, to allow for constraints (point to point etc).
Thanks Francisco Leon/projectileman
This commit is contained in:
@@ -53,145 +53,7 @@
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//math stuff
|
||||
|
||||
typedef btScalar dVector4[4];
|
||||
typedef btScalar dMatrix3[4*3];
|
||||
#define dInfinity FLT_MAX
|
||||
|
||||
|
||||
|
||||
#define dRecip(x) ((float)(1.0f/(x))) /* reciprocal */
|
||||
|
||||
|
||||
|
||||
#define dMULTIPLY0_331NEW(A,op,B,C) \
|
||||
{\
|
||||
btScalar tmp[3];\
|
||||
tmp[0] = C.getX();\
|
||||
tmp[1] = C.getY();\
|
||||
tmp[2] = C.getZ();\
|
||||
dMULTIPLYOP0_331(A,op,B,tmp);\
|
||||
}
|
||||
|
||||
#define dMULTIPLY0_331(A,B,C) dMULTIPLYOP0_331(A,=,B,C)
|
||||
#define dMULTIPLYOP0_331(A,op,B,C) \
|
||||
(A)[0] op dDOT1((B),(C)); \
|
||||
(A)[1] op dDOT1((B+4),(C)); \
|
||||
(A)[2] op dDOT1((B+8),(C));
|
||||
|
||||
#define dAASSERT btAssert
|
||||
#define dIASSERT btAssert
|
||||
|
||||
#define REAL float
|
||||
#define dDOTpq(a,b,p,q) ((a)[0]*(b)[0] + (a)[p]*(b)[q] + (a)[2*(p)]*(b)[2*(q)])
|
||||
btScalar dDOT1 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,1,1); }
|
||||
#define dDOT14(a,b) dDOTpq(a,b,1,4)
|
||||
|
||||
#define dCROSS(a,op,b,c) \
|
||||
(a)[0] op ((b)[1]*(c)[2] - (b)[2]*(c)[1]); \
|
||||
(a)[1] op ((b)[2]*(c)[0] - (b)[0]*(c)[2]); \
|
||||
(a)[2] op ((b)[0]*(c)[1] - (b)[1]*(c)[0]);
|
||||
|
||||
|
||||
#define dMULTIPLYOP2_333(A,op,B,C) \
|
||||
(A)[0] op dDOT1((B),(C)); \
|
||||
(A)[1] op dDOT1((B),(C+4)); \
|
||||
(A)[2] op dDOT1((B),(C+8)); \
|
||||
(A)[4] op dDOT1((B+4),(C)); \
|
||||
(A)[5] op dDOT1((B+4),(C+4)); \
|
||||
(A)[6] op dDOT1((B+4),(C+8)); \
|
||||
(A)[8] op dDOT1((B+8),(C)); \
|
||||
(A)[9] op dDOT1((B+8),(C+4)); \
|
||||
(A)[10] op dDOT1((B+8),(C+8));
|
||||
#define dMULTIPLYOP0_333(A,op,B,C) \
|
||||
(A)[0] op dDOT14((B),(C)); \
|
||||
(A)[1] op dDOT14((B),(C+1)); \
|
||||
(A)[2] op dDOT14((B),(C+2)); \
|
||||
(A)[4] op dDOT14((B+4),(C)); \
|
||||
(A)[5] op dDOT14((B+4),(C+1)); \
|
||||
(A)[6] op dDOT14((B+4),(C+2)); \
|
||||
(A)[8] op dDOT14((B+8),(C)); \
|
||||
(A)[9] op dDOT14((B+8),(C+1)); \
|
||||
(A)[10] op dDOT14((B+8),(C+2));
|
||||
|
||||
#define dMULTIPLY2_333(A,B,C) dMULTIPLYOP2_333(A,=,B,C)
|
||||
#define dMULTIPLY0_333(A,B,C) dMULTIPLYOP0_333(A,=,B,C)
|
||||
#define dMULTIPLYADD0_331(A,B,C) dMULTIPLYOP0_331(A,+=,B,C)
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
#define EFFICIENT_ALIGNMENT 16
|
||||
#define dEFFICIENT_SIZE(x) ((((x)-1)|(EFFICIENT_ALIGNMENT-1))+1)
|
||||
/* alloca aligned to the EFFICIENT_ALIGNMENT. note that this can waste
|
||||
* up to 15 bytes per allocation, depending on what alloca() returns.
|
||||
*/
|
||||
|
||||
#define dALLOCA16(n) \
|
||||
((char*)dEFFICIENT_SIZE(((size_t)(alloca((n)+(EFFICIENT_ALIGNMENT-1))))))
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DEBUG
|
||||
#define ANSI_FTOL 1
|
||||
|
||||
extern "C" {
|
||||
__declspec(naked) void _ftol2() {
|
||||
__asm {
|
||||
#if ANSI_FTOL
|
||||
fnstcw WORD PTR [esp-2]
|
||||
mov ax, WORD PTR [esp-2]
|
||||
|
||||
OR AX, 0C00h
|
||||
|
||||
mov WORD PTR [esp-4], ax
|
||||
fldcw WORD PTR [esp-4]
|
||||
fistp QWORD PTR [esp-12]
|
||||
fldcw WORD PTR [esp-2]
|
||||
mov eax, DWORD PTR [esp-12]
|
||||
mov edx, DWORD PTR [esp-8]
|
||||
#else
|
||||
fistp DWORD PTR [esp-12]
|
||||
mov eax, DWORD PTR [esp-12]
|
||||
mov ecx, DWORD PTR [esp-8]
|
||||
#endif
|
||||
ret
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif //DEBUG
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define ALLOCA dALLOCA16
|
||||
|
||||
typedef const btScalar *dRealPtr;
|
||||
typedef btScalar *dRealMutablePtr;
|
||||
#define dRealArray(name,n) btScalar name[n];
|
||||
#define dRealAllocaArray(name,n) btScalar *name = (btScalar*) ALLOCA ((n)*sizeof(btScalar));
|
||||
|
||||
void dSetZero1 (btScalar *a, int n)
|
||||
{
|
||||
dAASSERT (a && n >= 0);
|
||||
while (n > 0) {
|
||||
*(a++) = 0;
|
||||
n--;
|
||||
}
|
||||
}
|
||||
|
||||
void dSetValue1 (btScalar *a, int n, btScalar value)
|
||||
{
|
||||
dAASSERT (a && n >= 0);
|
||||
while (n > 0) {
|
||||
*(a++) = value;
|
||||
n--;
|
||||
}
|
||||
}
|
||||
|
||||
#include "OdeMacros.h"
|
||||
|
||||
//***************************************************************************
|
||||
// configuration
|
||||
@@ -224,7 +86,7 @@ static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int
|
||||
dRealMutablePtr iMJ_ptr = iMJ;
|
||||
dRealMutablePtr J_ptr = J;
|
||||
for (i=0; i<m; i++) {
|
||||
int b1 = jb[i*2];
|
||||
int b1 = jb[i*2];
|
||||
int b2 = jb[i*2+1];
|
||||
btScalar k = body[b1]->m_invMass;
|
||||
for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j];
|
||||
@@ -245,48 +107,48 @@ static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb
|
||||
{
|
||||
int i,j;
|
||||
|
||||
|
||||
|
||||
|
||||
dRealMutablePtr out_ptr1 = out + onlyBody1*6;
|
||||
|
||||
for (j=0; j<6; j++)
|
||||
|
||||
for (j=0; j<6; j++)
|
||||
out_ptr1[j] = 0;
|
||||
|
||||
if (onlyBody2 >= 0)
|
||||
{
|
||||
out_ptr1 = out + onlyBody2*6;
|
||||
|
||||
for (j=0; j<6; j++)
|
||||
for (j=0; j<6; j++)
|
||||
out_ptr1[j] = 0;
|
||||
}
|
||||
|
||||
dRealPtr iMJ_ptr = iMJ;
|
||||
for (i=0; i<m; i++) {
|
||||
|
||||
int b1 = jb[i*2];
|
||||
int b1 = jb[i*2];
|
||||
|
||||
dRealMutablePtr out_ptr = out + b1*6;
|
||||
if ((b1 == onlyBody1) || (b1 == onlyBody2))
|
||||
{
|
||||
for (j=0; j<6; j++)
|
||||
for (j=0; j<6; j++)
|
||||
out_ptr[j] += iMJ_ptr[j] * in[i] ;
|
||||
}
|
||||
|
||||
|
||||
iMJ_ptr += 6;
|
||||
|
||||
int b2 = jb[i*2+1];
|
||||
if ((b2 == onlyBody1) || (b2 == onlyBody2))
|
||||
{
|
||||
if (b2 >= 0)
|
||||
if (b2 >= 0)
|
||||
{
|
||||
out_ptr = out + b2*6;
|
||||
for (j=0; j<6; j++)
|
||||
for (j=0; j<6; j++)
|
||||
out_ptr[j] += iMJ_ptr[j] * in[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
iMJ_ptr += 6;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -302,10 +164,10 @@ static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
|
||||
dSetZero1 (out,6*nb);
|
||||
dRealPtr iMJ_ptr = iMJ;
|
||||
for (i=0; i<m; i++) {
|
||||
int b1 = jb[i*2];
|
||||
int b1 = jb[i*2];
|
||||
int b2 = jb[i*2+1];
|
||||
dRealMutablePtr out_ptr = out + b1*6;
|
||||
for (j=0; j<6; j++)
|
||||
for (j=0; j<6; j++)
|
||||
out_ptr[j] += iMJ_ptr[j] * in[i];
|
||||
iMJ_ptr += 6;
|
||||
if (b2 >= 0) {
|
||||
@@ -326,15 +188,15 @@ static void multiply_J (int m, dRealMutablePtr J, int *jb,
|
||||
int i,j;
|
||||
dRealPtr J_ptr = J;
|
||||
for (i=0; i<m; i++) {
|
||||
int b1 = jb[i*2];
|
||||
int b1 = jb[i*2];
|
||||
int b2 = jb[i*2+1];
|
||||
btScalar sum = 0;
|
||||
dRealMutablePtr in_ptr = in + b1*6;
|
||||
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||||
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||||
J_ptr += 6;
|
||||
if (b2 >= 0) {
|
||||
in_ptr = in + b2*6;
|
||||
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||||
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||||
}
|
||||
J_ptr += 6;
|
||||
out[i] = sum;
|
||||
@@ -440,7 +302,7 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
}
|
||||
|
||||
// scale Ad by CFM
|
||||
for (i=0; i<m; i++)
|
||||
for (i=0; i<m; i++)
|
||||
Ad[i] *= cfm[i];
|
||||
|
||||
// order to solve constraint rows in
|
||||
@@ -449,11 +311,11 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
#ifndef REORDER_CONSTRAINTS
|
||||
// make sure constraints with findex < 0 come first.
|
||||
j=0;
|
||||
for (i=0; i<m; i++)
|
||||
if (findex[i] < 0)
|
||||
for (i=0; i<m; i++)
|
||||
if (findex[i] < 0)
|
||||
order[j++].index = i;
|
||||
for (i=0; i<m; i++)
|
||||
if (findex[i] >= 0)
|
||||
for (i=0; i<m; i++)
|
||||
if (findex[i] >= 0)
|
||||
order[j++].index = i;
|
||||
dIASSERT (j==m);
|
||||
#endif
|
||||
@@ -512,11 +374,11 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
// linearizing access to those arrays. hmmm, this does not seem
|
||||
// like a win, but we should think carefully about our memory
|
||||
// access pattern.
|
||||
|
||||
|
||||
int index = order[i].index;
|
||||
J_ptr = J + index*12;
|
||||
iMJ_ptr = iMJ + index*12;
|
||||
|
||||
|
||||
// set the limits for this constraint. note that 'hicopy' is used.
|
||||
// this is the place where the QuickStep method differs from the
|
||||
// direct LCP solving method, since that method only performs this
|
||||
@@ -533,7 +395,7 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
int b2 = jb[index*2+1];
|
||||
float delta = rhs[index] - lambda[index]*Ad[index];
|
||||
dRealMutablePtr fc_ptr = invMforce + 6*b1;
|
||||
|
||||
|
||||
// @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
|
||||
delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] +
|
||||
fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] +
|
||||
@@ -566,7 +428,7 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
//@@@ a trick that may or may not help
|
||||
//float ramp = (1-((float)(iteration+1)/(float)num_iterations));
|
||||
//delta *= ramp;
|
||||
|
||||
|
||||
// update invMforce.
|
||||
// @@@ potential optimization: SIMD for this and the b2 >= 0 case
|
||||
fc_ptr = invMforce + 6*b1;
|
||||
@@ -596,8 +458,8 @@ static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, OdeSolverBody *
|
||||
void SolveInternal1 (float global_cfm,
|
||||
float global_erp,
|
||||
OdeSolverBody* const *body, int nb,
|
||||
BU_Joint **joint,
|
||||
int nj,
|
||||
BU_Joint **joint,
|
||||
int nj,
|
||||
const btContactSolverInfo& solverInfo)
|
||||
{
|
||||
|
||||
@@ -605,20 +467,20 @@ void SolveInternal1 (float global_cfm,
|
||||
float sOr = solverInfo.m_sor;
|
||||
|
||||
int i,j;
|
||||
|
||||
|
||||
btScalar stepsize1 = dRecip(solverInfo.m_timeStep);
|
||||
|
||||
// number all bodies in the body list - set their tag values
|
||||
for (i=0; i<nb; i++)
|
||||
for (i=0; i<nb; i++)
|
||||
body[i]->m_odeTag = i;
|
||||
|
||||
|
||||
// make a local copy of the joint array, because we might want to modify it.
|
||||
// (the "BU_Joint *const*" declaration says we're allowed to modify the joints
|
||||
// but not the joint array, because the caller might need it unchanged).
|
||||
//@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints
|
||||
//BU_Joint **joint = (BU_Joint**) alloca (nj * sizeof(BU_Joint*));
|
||||
//memcpy (joint,_joint,nj * sizeof(BU_Joint*));
|
||||
|
||||
|
||||
// for all bodies, compute the inertia tensor and its inverse in the global
|
||||
// frame, and compute the rotational force and add it to the torque
|
||||
// accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
|
||||
@@ -692,7 +554,7 @@ void SolveInternal1 (float global_cfm,
|
||||
dSetValue1 (lo,m,-dInfinity);
|
||||
dSetValue1 (hi,m, dInfinity);
|
||||
for (i=0; i<m; i++) findex[i] = -1;
|
||||
|
||||
|
||||
// get jacobian data from constraints. an m*12 matrix will be created
|
||||
// to store the two jacobian blocks from each constraint. it has this
|
||||
// format:
|
||||
@@ -731,7 +593,7 @@ void SolveInternal1 (float global_cfm,
|
||||
|
||||
// adjust returned findex values for global index numbering
|
||||
for (j=0; j<info[i].m; j++) {
|
||||
if (findex[ofs[i] + j] >= 0)
|
||||
if (findex[ofs[i] + j] >= 0)
|
||||
findex[ofs[i] + j] += ofs[i];
|
||||
}
|
||||
}
|
||||
@@ -754,10 +616,10 @@ void SolveInternal1 (float global_cfm,
|
||||
// put v/h + invM*fe into tmp1
|
||||
for (i=0; i<nb; i++) {
|
||||
btScalar body_invMass = body[i]->m_invMass;
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
tmp1[i*6+j] = body[i]->m_facc[j] * body_invMass + body[i]->m_linearVelocity[j] * stepsize1;
|
||||
dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,invI + i*12,body[i]->m_tacc);
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
tmp1[i*6+3+j] += body[i]->m_angularVelocity[j] * stepsize1;
|
||||
}
|
||||
|
||||
@@ -769,7 +631,7 @@ void SolveInternal1 (float global_cfm,
|
||||
for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
|
||||
|
||||
// scale CFM
|
||||
for (i=0; i<m; i++)
|
||||
for (i=0; i<m; i++)
|
||||
cfm[i] *= stepsize1;
|
||||
|
||||
// load lambda from the value saved on the previous iteration
|
||||
@@ -795,17 +657,17 @@ void SolveInternal1 (float global_cfm,
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// note that the SOR method overwrites rhs and J at this point, so
|
||||
// they should not be used again.
|
||||
|
||||
|
||||
// add stepsize * cforce to the body velocity
|
||||
for (i=0; i<nb; i++) {
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
body[i]->m_linearVelocity[j] += solverInfo.m_timeStep* cforce[i*6+j];
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
body[i]->m_angularVelocity[j] += solverInfo.m_timeStep* cforce[i*6+3+j];
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -819,13 +681,13 @@ void SolveInternal1 (float global_cfm,
|
||||
btVector3 linvel = body[i]->m_linearVelocity;
|
||||
btVector3 angvel = body[i]->m_angularVelocity;
|
||||
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
{
|
||||
linvel[j] += solverInfo.m_timeStep * body_invMass * body[i]->m_facc[j];
|
||||
}
|
||||
for (j=0; j<3; j++)
|
||||
for (j=0; j<3; j++)
|
||||
{
|
||||
body[i]->m_tacc[j] *= solverInfo.m_timeStep;
|
||||
body[i]->m_tacc[j] *= solverInfo.m_timeStep;
|
||||
}
|
||||
dMULTIPLY0_331NEW(angvel,+=,invI + i*12,body[i]->m_tacc);
|
||||
body[i]->m_angularVelocity = angvel;
|
||||
|
||||
Reference in New Issue
Block a user