updated Bullet sequential impulse constraint solver, so it matches 100% ODE PGS quickstep solver innerloop, mainly by renaming variables...

This commit is contained in:
erwin.coumans
2008-11-26 00:27:35 +00:00
parent 09aa2dbbe7
commit 82047e601e
28 changed files with 646 additions and 1135 deletions

View File

@@ -65,29 +65,30 @@ subject to the following restrictions:
// during the solution. depending on the situation, this can help a lot
// or hardly at all, but it doesn't seem to hurt.
#define RANDOMLY_REORDER_CONSTRAINTS 1
//#define RANDOMLY_REORDER_CONSTRAINTS 1
//***************************************************************************
// various common computations involving the matrix J
// compute iMJ = inv(M)*J'
inline void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
// various common computations involving the matrix JconstraintAxis
// compute iMJ = inv(M)*JconstraintAxis'
inline void compute_invM_JT (int numConstraintRows, dRealMutablePtr JconstraintAxis, dRealMutablePtr iMJ, int *jb,
//OdeSolverBody* const *body,
const btAlignedObjectArray<btOdeSolverBody*> &body,
dRealPtr invI)
dRealPtr inverseInertiaWorld)
{
int i,j;
dRealMutablePtr iMJ_ptr = iMJ;
dRealMutablePtr J_ptr = J;
for (i=0; i<m; i++) {
dRealMutablePtr J_ptr = JconstraintAxis;
for (i=0; i<numConstraintRows; i++) {
int b1 = jb[i*2];
int b2 = jb[i*2+1];
btScalar k = body[b1]->m_invMass;
for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j];
dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3);
for (j=0; j<3; j++)
iMJ_ptr[j] = body[b1]->m_invMass*J_ptr[j];
dMULTIPLY0_331 (iMJ_ptr + 3, inverseInertiaWorld + 12*b1, J_ptr + 3);
if (b2 >= 0) {
k = body[b2]->m_invMass;
for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6];
dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9);
for (j=0; j<3; j++)
iMJ_ptr[j+6] = body[b2]->m_invMass*J_ptr[j+6];//inv mass * constraint (normal) axis
dMULTIPLY0_331 (iMJ_ptr + 9, inverseInertiaWorld + 12*b2, J_ptr + 9);//inverse inertia world * constraint (normal) axis
}
J_ptr += 12;
iMJ_ptr += 12;
@@ -95,7 +96,7 @@ inline void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int
}
#if 0
static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb,
static void multiply_invM_JTSpecial (int numConstraintRows, int nb, dRealMutablePtr iMJ, int *jb,
dRealMutablePtr in, dRealMutablePtr out,int onlyBody1,int onlyBody2)
{
int i,j;
@@ -116,7 +117,7 @@ static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb
}
dRealPtr iMJ_ptr = iMJ;
for (i=0; i<m; i++) {
for (i=0; i<numConstraintRows; i++) {
int b1 = jb[i*2];
@@ -147,16 +148,16 @@ static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb
#endif
// compute out = inv(M)*J'*in.
// compute out = inv(M)*JconstraintAxis'*in.
#if 0
static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
static void multiply_invM_JT (int numConstraintRows, int nb, dRealMutablePtr iMJ, int *jb,
dRealMutablePtr in, dRealMutablePtr out)
{
int i,j;
dSetZero1 (out,6*nb);
dRealPtr iMJ_ptr = iMJ;
for (i=0; i<m; i++) {
for (i=0; i<numConstraintRows; i++) {
int b1 = jb[i*2];
int b2 = jb[i*2+1];
dRealMutablePtr out_ptr = out + b1*6;
@@ -173,13 +174,13 @@ static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
#endif
// compute out = J*in.
inline void multiply_J (int m, dRealMutablePtr J, int *jb,
// compute out = JconstraintAxis*in.
inline void multiply_J (int numConstraintRows, dRealMutablePtr JconstraintAxis, int *jb,
dRealMutablePtr in, dRealMutablePtr out)
{
int i,j;
dRealPtr J_ptr = J;
for (i=0; i<m; i++) {
dRealPtr J_ptr = JconstraintAxis;
for (i=0; i<numConstraintRows; i++) {
int b1 = jb[i*2];
int b2 = jb[i*2+1];
btScalar sum = 0;
@@ -199,14 +200,14 @@ inline void multiply_J (int m, dRealMutablePtr J, int *jb,
// SOR-LCP method
// nb is the number of bodies in the body array.
// J is an m*12 matrix of constraint rows
// JconstraintAxis is an numConstraintRows*12 matrix of constraint rows
// jb is an array of first and second body numbers for each constraint row
// invI is the global frame inverse inertia for each body (stacked 3x3 matrices)
// inverseInertiaWorld is the global frame inverse inertia for each body (stacked 3x3 matrices)
//
// this returns lambda and fc (the constraint force).
// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda
// this returns lambdaAccumulatedImpulse and fc (the constraint force).
// note: fc is returned as inv(M)*JconstraintAxis'*lambdaAccumulatedImpulse, the constraint force is actually JconstraintAxis'*lambdaAccumulatedImpulse
//
// b, lo and hi are modified on exit
// b, lowerLimit and higherLimit are modified on exit
//------------------------------------------------------------------------------
ATTRIBUTE_ALIGNED16(struct) IndexError {
@@ -216,10 +217,10 @@ ATTRIBUTE_ALIGNED16(struct) IndexError {
};
//------------------------------------------------------------------------------
void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
void btSorLcpSolver::SOR_LCP(int numConstraintRows, int nb, dRealMutablePtr JconstraintAxis, int *jb,
const btAlignedObjectArray<btOdeSolverBody*> &body,
dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr invMforce, dRealMutablePtr rhs,
dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
dRealPtr inverseInertiaWorld, dRealMutablePtr lambdaAccumulatedImpulse, dRealMutablePtr invMforce, dRealMutablePtr rhs,
dRealMutablePtr lowerLimit, dRealMutablePtr higherLimit, dRealPtr cfm, int *findex,
int numiter,float overRelax,
btStackAlloc* stackAlloc
)
@@ -237,49 +238,51 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
#ifdef WARM_STARTING
// for warm starting, this seems to be necessary to prevent
// jerkiness in motor-driven joints. i have no idea why this works.
for (i=0; i<m; i++) lambda[i] *= 0.9;
for (i=0; i<numConstraintRows; i++) lambdaAccumulatedImpulse[i] *= 0.9;
#else
dSetZero1 (lambda,m);
dSetZero1 (lambdaAccumulatedImpulse,numConstraintRows);
#endif
// the lambda computed at the previous iteration.
// the lambdaAccumulatedImpulse computed at the previous iteration.
// this is used to measure error for when we are reordering the indexes.
dRealAllocaArray (last_lambda,m);
dRealAllocaArray (last_lambda,numConstraintRows);
// a copy of the 'hi' vector in case findex[] is being used
dRealAllocaArray (hicopy,m);
memcpy (hicopy,hi,m*sizeof(float));
// a copy of the 'higherLimit' vector in case findex[] is being used
dRealAllocaArray (hicopy,numConstraintRows);
memcpy (hicopy,higherLimit,numConstraintRows*sizeof(float));
// precompute iMJ = inv(M)*J'
dRealAllocaArray (iMJ,m*12);
compute_invM_JT (m,J,iMJ,jb,body,invI);
// precompute iMJ = inv(M)*JconstraintAxis'
dRealAllocaArray (iMJ,numConstraintRows*12);
compute_invM_JT (numConstraintRows,JconstraintAxis,iMJ,jb,body,inverseInertiaWorld);
// compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
// as we change lambda.
// compute fc=(inv(M)*JconstraintAxis')*lambdaAccumulatedImpulse. we will incrementally maintain fc
// as we change lambdaAccumulatedImpulse.
#ifdef WARM_STARTING
multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
multiply_invM_JT (numConstraintRows,nb,iMJ,jb,lambdaAccumulatedImpulse,fc);
#else
dSetZero1 (invMforce,nb*6);
#endif
// precompute 1 / diagonals of A
dRealAllocaArray (Ad,m);
dRealAllocaArray (Ad,numConstraintRows);
dRealPtr iMJ_ptr = iMJ;
dRealMutablePtr J_ptr = J;
for (i=0; i<m; i++) {
dRealMutablePtr J_ptr = JconstraintAxis;
for (i=0; i<numConstraintRows; i++) {
float sum = 0;
for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
for (j=0; j<6; j++)
sum += iMJ_ptr[j] * J_ptr[j];
if (jb[i*2+1] >= 0) {
for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
for (j=6; j<12; j++)
sum += iMJ_ptr[j] * J_ptr[j];
}
iMJ_ptr += 12;
J_ptr += 12;
Ad[i] = sor_w / sum;//(sum + cfm[i]);
}
// scale J and b by Ad
J_ptr = J;
for (i=0; i<m; i++) {
// scale JconstraintAxis and b by Ad
J_ptr = JconstraintAxis;
for (i=0; i<numConstraintRows; i++) {
for (j=0; j<12; j++) {
J_ptr[0] *= Ad[i];
J_ptr++;
@@ -288,24 +291,24 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
}
// scale Ad by CFM
for (i=0; i<m; i++)
for (i=0; i<numConstraintRows; i++)
Ad[i] *= cfm[i];
// order to solve constraint rows in
//IndexError *order = (IndexError*) alloca (m*sizeof(IndexError));
IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError));
//IndexError *order = (IndexError*) alloca (numConstraintRows*sizeof(IndexError));
IndexError *order = (IndexError*) ALLOCA (numConstraintRows*sizeof(IndexError));
#ifndef REORDER_CONSTRAINTS
// make sure constraints with findex < 0 come first.
j=0;
for (i=0; i<m; i++)
for (i=0; i<numConstraintRows; i++)
if (findex[i] < 0)
order[j++].index = i;
for (i=0; i<m; i++)
for (i=0; i<numConstraintRows; i++)
if (findex[i] >= 0)
order[j++].index = i;
dIASSERT (j==m);
dIASSERT (j==numConstraintRows);
#endif
for (int iteration=0; iteration < num_iterations; iteration++) {
@@ -315,7 +318,7 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
if (iteration < 2) {
// for the first two iterations, solve the constraints in
// the given order
for (i=0; i<m; i++) {
for (i=0; i<numConstraintRows; i++) {
order[i].error = i;
order[i].findex = findex[i];
order[i].index = i;
@@ -324,13 +327,13 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
else {
// sort the constraints so that the ones converging slowest
// get solved last. use the absolute (not relative) error.
for (i=0; i<m; i++) {
float v1 = dFabs (lambda[i]);
for (i=0; i<numConstraintRows; i++) {
float v1 = dFabs (lambdaAccumulatedImpulse[i]);
float v2 = dFabs (last_lambda[i]);
float max = (v1 > v2) ? v1 : v2;
if (max > 0) {
//@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
order[i].error = dFabs(lambda[i]-last_lambda[i]);
//@@@ relative error: order[i].error = dFabs(lambdaAccumulatedImpulse[i]-last_lambda[i])/max;
order[i].error = dFabs(lambdaAccumulatedImpulse[i]-last_lambda[i]);
}
else {
order[i].error = dInfinity;
@@ -339,11 +342,11 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
order[i].index = i;
}
}
qsort (order,m,sizeof(IndexError),&compare_index_error);
qsort (order,numConstraintRows,sizeof(IndexError),&compare_index_error);
#endif
#ifdef RANDOMLY_REORDER_CONSTRAINTS
if ((iteration & 7) == 0) {
for (i=1; i<m; ++i) {
for (i=1; i<numConstraintRows; ++i) {
IndexError tmp = order[i];
int swapi = dRandInt2(i+1);
order[i] = order[swapi];
@@ -352,19 +355,19 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
}
#endif
//@@@ potential optimization: swap lambda and last_lambda pointers rather
// than copying the data. we must make sure lambda is properly
//@@@ potential optimization: swap lambdaAccumulatedImpulse and last_lambda pointers rather
// than copying the data. we must make sure lambdaAccumulatedImpulse is properly
// returned to the caller
memcpy (last_lambda,lambda,m*sizeof(float));
memcpy (last_lambda,lambdaAccumulatedImpulse,numConstraintRows*sizeof(float));
for (int i=0; i<m; i++) {
// @@@ potential optimization: we could pre-sort J and iMJ, thereby
for (int i=0; i<numConstraintRows; i++) {
// @@@ potential optimization: we could pre-sort JconstraintAxis and iMJ, thereby
// linearizing access to those arrays. hmmm, this does not seem
// like a win, but we should think carefully about our memory
// access pattern.
int index = order[i].index;
J_ptr = J + index*12;
J_ptr = JconstraintAxis + index*12;
iMJ_ptr = iMJ + index*12;
// set the limits for this constraint. note that 'hicopy' is used.
@@ -372,70 +375,72 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
// direct LCP solving method, since that method only performs this
// limit adjustment once per time step, whereas this method performs
// once per iteration per constraint row.
// the constraints are ordered so that all lambda[] values needed have
// the constraints are ordered so that all lambdaAccumulatedImpulse[] values needed have
// already been computed.
if (findex[index] >= 0) {
hi[index] = btFabs (hicopy[index] * lambda[findex[index]]);
lo[index] = -hi[index];
higherLimit[index] = btFabs (hicopy[index] * lambdaAccumulatedImpulse[findex[index]]);
lowerLimit[index] = -higherLimit[index];
}
int b1 = jb[index*2];
int b2 = jb[index*2+1];
float delta = rhs[index] - lambda[index]*Ad[index];
dRealMutablePtr fc_ptr = invMforce + 6*b1;
dRealMutablePtr deltaVelocity = invMforce + 6*b1;
float deltaAppliedImpulse = rhs[index] - lambdaAccumulatedImpulse[index]*Ad[index];
// @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] +
fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] +
fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5];
deltaAppliedImpulse -=deltaVelocity[0] * J_ptr[0] + deltaVelocity[1] * J_ptr[1] +
deltaVelocity[2] * J_ptr[2] + deltaVelocity[3] * J_ptr[3] +
deltaVelocity[4] * J_ptr[4] + deltaVelocity[5] * J_ptr[5];
// @@@ potential optimization: handle 1-body constraints in a separate
// loop to avoid the cost of test & jump?
if (b2 >= 0) {
fc_ptr = invMforce + 6*b2;
delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] +
fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] +
fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11];
deltaVelocity = invMforce + 6*b2;
deltaAppliedImpulse -=deltaVelocity[0] * J_ptr[6] + deltaVelocity[1] * J_ptr[7] +
deltaVelocity[2] * J_ptr[8] + deltaVelocity[3] * J_ptr[9] +
deltaVelocity[4] * J_ptr[10] + deltaVelocity[5] * J_ptr[11];
}
// compute lambda and clamp it to [lo,hi].
// compute lambdaAccumulatedImpulse and clamp it to [lowerLimit,higherLimit].
// @@@ potential optimization: does SSE have clamping instructions
// to save test+jump penalties here?
float new_lambda = lambda[index] + delta;
if (new_lambda < lo[index]) {
delta = lo[index]-lambda[index];
lambda[index] = lo[index];
float sum = lambdaAccumulatedImpulse[index] + deltaAppliedImpulse;
if (sum < lowerLimit[index]) {
deltaAppliedImpulse = lowerLimit[index]-lambdaAccumulatedImpulse[index];
lambdaAccumulatedImpulse[index] = lowerLimit[index];
}
else if (new_lambda > hi[index]) {
delta = hi[index]-lambda[index];
lambda[index] = hi[index];
else if (sum > higherLimit[index]) {
deltaAppliedImpulse = higherLimit[index]-lambdaAccumulatedImpulse[index];
lambdaAccumulatedImpulse[index] = higherLimit[index];
}
else {
lambda[index] = new_lambda;
lambdaAccumulatedImpulse[index] = sum;
}
//@@@ a trick that may or may not help
//float ramp = (1-((float)(iteration+1)/(float)num_iterations));
//delta *= ramp;
//deltaAppliedImpulse *= ramp;
// update invMforce.
// @@@ potential optimization: SIMD for this and the b2 >= 0 case
fc_ptr = invMforce + 6*b1;
fc_ptr[0] += delta * iMJ_ptr[0];
fc_ptr[1] += delta * iMJ_ptr[1];
fc_ptr[2] += delta * iMJ_ptr[2];
fc_ptr[3] += delta * iMJ_ptr[3];
fc_ptr[4] += delta * iMJ_ptr[4];
fc_ptr[5] += delta * iMJ_ptr[5];
deltaVelocity = invMforce + 6*b1;
deltaVelocity[0] += deltaAppliedImpulse * iMJ_ptr[0];
deltaVelocity[1] += deltaAppliedImpulse * iMJ_ptr[1];
deltaVelocity[2] += deltaAppliedImpulse * iMJ_ptr[2];
deltaVelocity[3] += deltaAppliedImpulse * iMJ_ptr[3];
deltaVelocity[4] += deltaAppliedImpulse * iMJ_ptr[4];
deltaVelocity[5] += deltaAppliedImpulse * iMJ_ptr[5];
// @@@ potential optimization: handle 1-body constraints in a separate
// loop to avoid the cost of test & jump?
if (b2 >= 0) {
fc_ptr = invMforce + 6*b2;
fc_ptr[0] += delta * iMJ_ptr[6];
fc_ptr[1] += delta * iMJ_ptr[7];
fc_ptr[2] += delta * iMJ_ptr[8];
fc_ptr[3] += delta * iMJ_ptr[9];
fc_ptr[4] += delta * iMJ_ptr[10];
fc_ptr[5] += delta * iMJ_ptr[11];
deltaVelocity = invMforce + 6*b2;
deltaVelocity[0] += deltaAppliedImpulse * iMJ_ptr[6];
deltaVelocity[1] += deltaAppliedImpulse * iMJ_ptr[7];
deltaVelocity[2] += deltaAppliedImpulse * iMJ_ptr[8];
deltaVelocity[3] += deltaAppliedImpulse * iMJ_ptr[9];
deltaVelocity[4] += deltaAppliedImpulse * iMJ_ptr[10];
deltaVelocity[5] += deltaAppliedImpulse * iMJ_ptr[11];
}
}
}
@@ -443,10 +448,7 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
}
//------------------------------------------------------------------------------
void btSorLcpSolver::SolveInternal1 (
float global_cfm,
float global_erp,
const btAlignedObjectArray<btOdeSolverBody*> &body, int nb,
void btSorLcpSolver::SolveInternal1 (const btAlignedObjectArray<btOdeSolverBody*> &body, int nb,
btAlignedObjectArray<btOdeJoint*> &joint,
int nj, const btContactSolverInfo& solverInfo,
btStackAlloc* stackAlloc)
@@ -475,16 +477,16 @@ void btSorLcpSolver::SolveInternal1 (
// for all bodies, compute the inertia tensor and its inverse in the global
// frame, and compute the rotational force and add it to the torque
// accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
// accumulator. I and inverseInertiaWorld are a vertical stack of 3x4 matrices, one per body.
dRealAllocaArray (I,3*4*nb);
dRealAllocaArray (invI,3*4*nb);
dRealAllocaArray (inverseInertiaWorld,3*4*nb);
/* for (i=0; i<nb; i++) {
dMatrix3 tmp;
// compute inertia tensor in global frame
dMULTIPLY2_333 (tmp,body[i]->m_I,body[i]->m_R);
// compute inverse inertia tensor in global frame
dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R);
dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp);
dMULTIPLY0_333 (inverseInertiaWorld+i*12,body[i]->m_R,tmp);
// compute rotational force
dCROSS (body[i]->m_tacc,-=,body[i]->getAngularVelocity(),tmp);
}
@@ -497,7 +499,7 @@ void btSorLcpSolver::SolveInternal1 (
// compute inverse inertia tensor in global frame
dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R);
dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp);
dMULTIPLY0_333 (inverseInertiaWorld+i*12,body[i]->m_R,tmp);
// compute rotational force
// dMULTIPLY0_331 (tmp,I+i*12,body[i]->m_angularVelocity);
// dCROSS (body[i]->m_tacc,-=,body[i]->m_angularVelocity,tmp);
@@ -506,16 +508,16 @@ void btSorLcpSolver::SolveInternal1 (
// get joint information (m = total constraint dimension, nub = number of unbounded variables).
// joints with m=0 are inactive and are removed from the joints array
// get joint information (numConstraintRows = total constraint dimension, nub = number of unbounded variables).
// joints with numConstraintRows=0 are inactive and are removed from the joints array
// entirely, so that the code that follows does not consider them.
//@@@ do we really need to save all the info1's
btOdeJoint::Info1 *info = (btOdeJoint::Info1*) ALLOCA (nj*sizeof(btOdeJoint::Info1));
for (i=0, j=0; j<nj; j++) { // i=dest, j=src
joint[j]->GetInfo1 (info+i);
dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
if (info[i].m > 0) {
dIASSERT (info[i].m_numConstraintRows >= 0 && info[i].m_numConstraintRows <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m_numConstraintRows);
if (info[i].m_numConstraintRows > 0) {
joint[i] = joint[j];
i++;
}
@@ -523,34 +525,34 @@ void btSorLcpSolver::SolveInternal1 (
nj = i;
// create the row offset array
int m = 0;
int *ofs = (int*) ALLOCA (nj*sizeof(int));
int numConstraintRows = 0;
int *constraintRowOffsets = (int*) ALLOCA (nj*sizeof(int));
for (i=0; i<nj; i++) {
ofs[i] = m;
m += info[i].m;
constraintRowOffsets[i] = numConstraintRows;
numConstraintRows += info[i].m_numConstraintRows;
}
// if there are constraints, compute the constraint force
dRealAllocaArray (J,m*12);
int *jb = (int*) ALLOCA (m*2*sizeof(int));
if (m > 0) {
dRealAllocaArray (JconstraintAxis,numConstraintRows*12);
int *jb = (int*) ALLOCA (numConstraintRows*2*sizeof(int));
if (numConstraintRows > 0) {
// create a constraint equation right hand side vector `c', a constraint
// force mixing vector `cfm', and LCP low and high bound vectors, and an
// 'findex' vector.
dRealAllocaArray (c,m);
dRealAllocaArray (cfm,m);
dRealAllocaArray (lo,m);
dRealAllocaArray (hi,m);
dRealAllocaArray (c_rhs,numConstraintRows);
dRealAllocaArray (cfm,numConstraintRows);
dRealAllocaArray (lowerLimit,numConstraintRows);
dRealAllocaArray (higherLimit,numConstraintRows);
int *findex = (int*) ALLOCA (m*sizeof(int));
int *findex = (int*) ALLOCA (numConstraintRows*sizeof(int));
dSetZero1 (c,m);
dSetValue1 (cfm,m,global_cfm);
dSetValue1 (lo,m,-dInfinity);
dSetValue1 (hi,m, dInfinity);
for (i=0; i<m; i++) findex[i] = -1;
dSetZero1 (c_rhs,numConstraintRows);
dSetValue1 (cfm,numConstraintRows,solverInfo.m_globalCfm);
dSetValue1 (lowerLimit,numConstraintRows,-dInfinity);
dSetValue1 (higherLimit,numConstraintRows, dInfinity);
for (i=0; i<numConstraintRows; i++) findex[i] = -1;
// get jacobian data from constraints. an m*12 matrix will be created
// get jacobian data from constraints. an numConstraintRows*12 matrix will be created
// to store the two jacobian blocks from each constraint. it has this
// format:
//
@@ -563,30 +565,30 @@ void btSorLcpSolver::SolveInternal1 (
// (lll) = linear jacobian data
// (aaa) = angular jacobian data
//
dSetZero1 (J,m*12);
dSetZero1 (JconstraintAxis,numConstraintRows*12);
btOdeJoint::Info2 Jinfo;
Jinfo.rowskip = 12;
Jinfo.fps = stepsize1;
Jinfo.erp = global_erp;
Jinfo.erp = solverInfo.m_erp;
for (i=0; i<nj; i++) {
Jinfo.J1l = J + ofs[i]*12;
Jinfo.J1a = Jinfo.J1l + 3;
Jinfo.J2l = Jinfo.J1l + 6;
Jinfo.J2a = Jinfo.J1l + 9;
Jinfo.c = c + ofs[i];
Jinfo.cfm = cfm + ofs[i];
Jinfo.lo = lo + ofs[i];
Jinfo.hi = hi + ofs[i];
Jinfo.findex = findex + ofs[i];
Jinfo.m_J1linearAxis = JconstraintAxis + constraintRowOffsets[i]*12;
Jinfo.m_J1angularAxis = Jinfo.m_J1linearAxis + 3;
Jinfo.m_J2linearAxis = Jinfo.m_J1linearAxis + 6;
Jinfo.m_J2angularAxis = Jinfo.m_J1linearAxis + 9;
Jinfo.m_constraintError = c_rhs + constraintRowOffsets[i];
Jinfo.cfm = cfm + constraintRowOffsets[i];
Jinfo.m_lowerLimit = lowerLimit + constraintRowOffsets[i];
Jinfo.m_higherLimit = higherLimit + constraintRowOffsets[i];
Jinfo.findex = findex + constraintRowOffsets[i];
joint[i]->GetInfo2 (&Jinfo);
if (Jinfo.c[0] > solverInfo.m_maxErrorReduction)
Jinfo.c[0] = solverInfo.m_maxErrorReduction;
if (Jinfo.m_constraintError[0] > solverInfo.m_maxErrorReduction)
Jinfo.m_constraintError[0] = solverInfo.m_maxErrorReduction;
// adjust returned findex values for global index numbering
for (j=0; j<info[i].m; j++) {
if (findex[ofs[i] + j] >= 0)
findex[ofs[i] + j] += ofs[i];
for (j=0; j<info[i].m_numConstraintRows; j++) {
if (findex[constraintRowOffsets[i] + j] >= 0)
findex[constraintRowOffsets[i] + j] += constraintRowOffsets[i];
}
}
@@ -595,13 +597,13 @@ void btSorLcpSolver::SolveInternal1 (
for (i=0; i<nj; i++) {
int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->m_odeTag) : -1;
int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->m_odeTag) : -1;
for (j=0; j<info[i].m; j++) {
for (j=0; j<info[i].m_numConstraintRows; j++) {
jb_ptr[0] = b1;
jb_ptr[1] = b2;
jb_ptr += 2;
}
}
dIASSERT (jb_ptr == jb+2*m);
dIASSERT (jb_ptr == jb+2*numConstraintRows);
// compute the right hand side `rhs'
dRealAllocaArray (tmp1,nb*6);
@@ -610,47 +612,48 @@ void btSorLcpSolver::SolveInternal1 (
btScalar body_invMass = body[i]->m_invMass;
for (j=0; j<3; j++)
tmp1[i*6+j] = body[i]->m_facc[j] * body_invMass + body[i]->m_linearVelocity[j] * stepsize1;
dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,invI + i*12,body[i]->m_tacc);
dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,inverseInertiaWorld + i*12,body[i]->m_tacc);
for (j=0; j<3; j++)
tmp1[i*6+3+j] += body[i]->m_angularVelocity[j] * stepsize1;
}
// put J*tmp1 into rhs
dRealAllocaArray (rhs,m);
multiply_J (m,J,jb,tmp1,rhs);
// put JconstraintAxis*tmp1 into rhs
dRealAllocaArray (rhs,numConstraintRows);
multiply_J (numConstraintRows,JconstraintAxis,jb,tmp1,rhs);
// complete rhs
for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
for (i=0; i<numConstraintRows; i++)
rhs[i] = c_rhs[i]*stepsize1 - rhs[i];
// scale CFM
for (i=0; i<m; i++)
for (i=0; i<numConstraintRows; i++)
cfm[i] *= stepsize1;
// load lambda from the value saved on the previous iteration
dRealAllocaArray (lambda,m);
// load lambdaAccumulatedImpulse from the value saved on the previous iteration
dRealAllocaArray (lambdaAccumulatedImpulse,numConstraintRows);
#ifdef WARM_STARTING
dSetZero1 (lambda,m); //@@@ shouldn't be necessary
dSetZero1 (lambdaAccumulatedImpulse,numConstraintRows); //@@@ shouldn't be necessary
for (i=0; i<nj; i++) {
memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(btScalar));
memcpy (lambdaAccumulatedImpulse+constraintRowOffsets[i],joint[i]->lambdaAccumulatedImpulse,info[i].numConstraintRows * sizeof(btScalar));
}
#endif
// solve the LCP problem and get lambda and invM*constraint_force
// solve the LCP problem and get lambdaAccumulatedImpulse and invM*constraint_force
dRealAllocaArray (cforce,nb*6);
/// SOR_LCP
SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,numIter,sOr,stackAlloc);
SOR_LCP (numConstraintRows,nb,JconstraintAxis,jb,body,inverseInertiaWorld,lambdaAccumulatedImpulse,cforce,rhs,lowerLimit,higherLimit,cfm,findex,numIter,sOr,stackAlloc);
#ifdef WARM_STARTING
// save lambda for the next iteration
// save lambdaAccumulatedImpulse for the next iteration
//@@@ note that this doesn't work for contact joints yet, as they are
// recreated every iteration
for (i=0; i<nj; i++) {
memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(btScalar));
memcpy (joint[i]->lambdaAccumulatedImpulse,lambdaAccumulatedImpulse+constraintRowOffsets[i],info[i].numConstraintRows * sizeof(btScalar));
}
#endif
// note that the SOR method overwrites rhs and J at this point, so
// note that the SOR method overwrites rhs and JconstraintAxis at this point, so
// they should not be used again.
// add stepsize * cforce to the body velocity
for (i=0; i<nb; i++) {
@@ -677,7 +680,7 @@ void btSorLcpSolver::SolveInternal1 (
{
body[i]->m_tacc[j] *= solverInfo.m_timeStep;
}
dMULTIPLY0_331NEW(angvel,+=,invI + i*12,body[i]->m_tacc);
dMULTIPLY0_331NEW(angvel,+=,inverseInertiaWorld + i*12,body[i]->m_tacc);
body[i]->m_angularVelocity = angvel;
}
//stackAlloc->endBlock(saBlock);//Remo: 10.10.2007