updated Bullet sequential impulse constraint solver, so it matches 100% ODE PGS quickstep solver innerloop, mainly by renaming variables...

2008-11-26 00:27:35 +00:00
parent 09aa2dbbe7
commit 82047e601e
28 changed files with 646 additions and 1135 deletions
--- a/Extras/quickstep/btSorLcp.cpp
+++ b/Extras/quickstep/btSorLcp.cpp
@@ -65,29 +65,30 @@ subject to the following restrictions:
 // during the solution. depending on the situation, this can help a lot
 // or hardly at all, but it doesn't seem to hurt.

-#define RANDOMLY_REORDER_CONSTRAINTS 1
+//#define RANDOMLY_REORDER_CONSTRAINTS 1

 //***************************************************************************
-// various common computations involving the matrix J
-// compute iMJ = inv(M)*J'
-inline void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
+// various common computations involving the matrix JconstraintAxis
+// compute iMJ = inv(M)*JconstraintAxis'
+inline void compute_invM_JT (int numConstraintRows, dRealMutablePtr JconstraintAxis, dRealMutablePtr iMJ, int *jb,
 	//OdeSolverBody* const *body,
 	 const btAlignedObjectArray<btOdeSolverBody*> &body,
-	dRealPtr invI)
+	dRealPtr inverseInertiaWorld)
 {
 	int i,j;
 	dRealMutablePtr iMJ_ptr = iMJ;
-	dRealMutablePtr J_ptr = J;
-	for (i=0; i<m; i++) {
+	dRealMutablePtr J_ptr = JconstraintAxis;
+	for (i=0; i<numConstraintRows; i++) {
 		int b1 = jb[i*2];
 		int b2 = jb[i*2+1];
-		btScalar k = body[b1]->m_invMass;
-		for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j];
-		dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3);
+		for (j=0; j<3; j++) 
+			iMJ_ptr[j] = body[b1]->m_invMass*J_ptr[j];
+		dMULTIPLY0_331 (iMJ_ptr + 3, inverseInertiaWorld + 12*b1, J_ptr + 3);
+
 		if (b2 >= 0) {
-			k = body[b2]->m_invMass;
-			for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6];
-			dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9);
+			for (j=0; j<3; j++) 
+				iMJ_ptr[j+6] = body[b2]->m_invMass*J_ptr[j+6];//inv mass * constraint (normal) axis
+			dMULTIPLY0_331 (iMJ_ptr + 9, inverseInertiaWorld + 12*b2, J_ptr + 9);//inverse inertia world * constraint (normal) axis
 		}
 		J_ptr += 12;
 		iMJ_ptr += 12;
@@ -95,7 +96,7 @@ inline void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int
 }

 #if 0
-static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb,
+static void multiply_invM_JTSpecial (int numConstraintRows, int nb, dRealMutablePtr iMJ, int *jb,
 	dRealMutablePtr in, dRealMutablePtr out,int onlyBody1,int onlyBody2)
 {
 	int i,j;
@@ -116,7 +117,7 @@ static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb
 	}

 	dRealPtr iMJ_ptr = iMJ;
-	for (i=0; i<m; i++) {
+	for (i=0; i<numConstraintRows; i++) {

 		int b1 = jb[i*2];

@@ -147,16 +148,16 @@ static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb
 #endif


-// compute out = inv(M)*J'*in.
+// compute out = inv(M)*JconstraintAxis'*in.

 #if 0
-static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
+static void multiply_invM_JT (int numConstraintRows, int nb, dRealMutablePtr iMJ, int *jb,
 	dRealMutablePtr in, dRealMutablePtr out)
 {
 	int i,j;
 	dSetZero1 (out,6*nb);
 	dRealPtr iMJ_ptr = iMJ;
-	for (i=0; i<m; i++) {
+	for (i=0; i<numConstraintRows; i++) {
 		int b1 = jb[i*2];
 		int b2 = jb[i*2+1];
 		dRealMutablePtr out_ptr = out + b1*6;
@@ -173,13 +174,13 @@ static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
 #endif


-// compute out = J*in.
-inline void multiply_J (int m, dRealMutablePtr J, int *jb,
+// compute out = JconstraintAxis*in.
+inline void multiply_J (int numConstraintRows, dRealMutablePtr JconstraintAxis, int *jb,
 	dRealMutablePtr in, dRealMutablePtr out)
 {
 	int i,j;
-	dRealPtr J_ptr = J;
-	for (i=0; i<m; i++) {
+	dRealPtr J_ptr = JconstraintAxis;
+	for (i=0; i<numConstraintRows; i++) {
 		int b1 = jb[i*2];
 		int b2 = jb[i*2+1];
 		btScalar sum = 0;
@@ -199,14 +200,14 @@ inline void multiply_J (int m, dRealMutablePtr J, int *jb,
 // SOR-LCP method

 // nb is the number of bodies in the body array.
-// J is an m*12 matrix of constraint rows
+// JconstraintAxis is an numConstraintRows*12 matrix of constraint rows
 // jb is an array of first and second body numbers for each constraint row
-// invI is the global frame inverse inertia for each body (stacked 3x3 matrices)
+// inverseInertiaWorld is the global frame inverse inertia for each body (stacked 3x3 matrices)
 //
-// this returns lambda and fc (the constraint force).
-// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda
+// this returns lambdaAccumulatedImpulse and fc (the constraint force).
+// note: fc is returned as inv(M)*JconstraintAxis'*lambdaAccumulatedImpulse, the constraint force is actually JconstraintAxis'*lambdaAccumulatedImpulse
 //
-// b, lo and hi are modified on exit
+// b, lowerLimit and higherLimit are modified on exit

 //------------------------------------------------------------------------------
 ATTRIBUTE_ALIGNED16(struct) IndexError {
@@ -216,10 +217,10 @@ ATTRIBUTE_ALIGNED16(struct) IndexError {
 };

 //------------------------------------------------------------------------------
-void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb, 
+void btSorLcpSolver::SOR_LCP(int numConstraintRows, int nb, dRealMutablePtr JconstraintAxis, int *jb, 
 	const btAlignedObjectArray<btOdeSolverBody*> &body,
-	dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr invMforce, dRealMutablePtr rhs,
-	dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
+	dRealPtr inverseInertiaWorld, dRealMutablePtr lambdaAccumulatedImpulse, dRealMutablePtr invMforce, dRealMutablePtr rhs,
+	dRealMutablePtr lowerLimit, dRealMutablePtr higherLimit, dRealPtr cfm, int *findex,
 	int numiter,float overRelax,
 	btStackAlloc* stackAlloc
 	)
@@ -237,49 +238,51 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 #ifdef WARM_STARTING
 	// for warm starting, this seems to be necessary to prevent
 	// jerkiness in motor-driven joints. i have no idea why this works.
-	for (i=0; i<m; i++) lambda[i] *= 0.9;
+	for (i=0; i<numConstraintRows; i++) lambdaAccumulatedImpulse[i] *= 0.9;
 #else
-	dSetZero1 (lambda,m);
+	dSetZero1 (lambdaAccumulatedImpulse,numConstraintRows);
 #endif

-	// the lambda computed at the previous iteration.
+	// the lambdaAccumulatedImpulse computed at the previous iteration.
 	// this is used to measure error for when we are reordering the indexes.
-	dRealAllocaArray (last_lambda,m);
+	dRealAllocaArray (last_lambda,numConstraintRows);

-	// a copy of the 'hi' vector in case findex[] is being used
-	dRealAllocaArray (hicopy,m);
-	memcpy (hicopy,hi,m*sizeof(float));
+	// a copy of the 'higherLimit' vector in case findex[] is being used
+	dRealAllocaArray (hicopy,numConstraintRows);
+	memcpy (hicopy,higherLimit,numConstraintRows*sizeof(float));

-	// precompute iMJ = inv(M)*J'
-	dRealAllocaArray (iMJ,m*12);
-	compute_invM_JT (m,J,iMJ,jb,body,invI);
+	// precompute iMJ = inv(M)*JconstraintAxis'
+	dRealAllocaArray (iMJ,numConstraintRows*12);
+	compute_invM_JT (numConstraintRows,JconstraintAxis,iMJ,jb,body,inverseInertiaWorld);

-	// compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
-	// as we change lambda.
+	// compute fc=(inv(M)*JconstraintAxis')*lambdaAccumulatedImpulse. we will incrementally maintain fc
+	// as we change lambdaAccumulatedImpulse.
 #ifdef WARM_STARTING
-	multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
+	multiply_invM_JT (numConstraintRows,nb,iMJ,jb,lambdaAccumulatedImpulse,fc);
 #else
 	dSetZero1 (invMforce,nb*6);
 #endif

 	// precompute 1 / diagonals of A
-	dRealAllocaArray (Ad,m);
+	dRealAllocaArray (Ad,numConstraintRows);
 	dRealPtr iMJ_ptr = iMJ;
-	dRealMutablePtr J_ptr = J;
-	for (i=0; i<m; i++) {
+	dRealMutablePtr J_ptr = JconstraintAxis;
+	for (i=0; i<numConstraintRows; i++) {
 		float sum = 0;
-		for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
+		for (j=0; j<6; j++) 
+			sum += iMJ_ptr[j] * J_ptr[j];
 		if (jb[i*2+1] >= 0) {
-			for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
+			for (j=6; j<12; j++) 
+				sum += iMJ_ptr[j] * J_ptr[j];
 		}
 		iMJ_ptr += 12;
 		J_ptr += 12;
 		Ad[i] = sor_w / sum;//(sum + cfm[i]);
 	}

-	// scale J and b by Ad
-	J_ptr = J;
-	for (i=0; i<m; i++) {
+	// scale JconstraintAxis and b by Ad
+	J_ptr = JconstraintAxis;
+	for (i=0; i<numConstraintRows; i++) {
 		for (j=0; j<12; j++) {
 			J_ptr[0] *= Ad[i];
 			J_ptr++;
@@ -288,24 +291,24 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 	}

 	// scale Ad by CFM
-	for (i=0; i<m; i++)
+	for (i=0; i<numConstraintRows; i++)
 		Ad[i] *= cfm[i];

 	// order to solve constraint rows in
-	//IndexError *order = (IndexError*) alloca (m*sizeof(IndexError));
-	IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError));
+	//IndexError *order = (IndexError*) alloca (numConstraintRows*sizeof(IndexError));
+	IndexError *order = (IndexError*) ALLOCA (numConstraintRows*sizeof(IndexError));
 	

 #ifndef REORDER_CONSTRAINTS
 	// make sure constraints with findex < 0 come first.
 	j=0;
-	for (i=0; i<m; i++)
+	for (i=0; i<numConstraintRows; i++)
 		if (findex[i] < 0)
 			order[j++].index = i;
-	for (i=0; i<m; i++)
+	for (i=0; i<numConstraintRows; i++)
 		if (findex[i] >= 0)
 			order[j++].index = i;
-	dIASSERT (j==m);
+	dIASSERT (j==numConstraintRows);
 #endif

 	for (int iteration=0; iteration < num_iterations; iteration++) {
@@ -315,7 +318,7 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 		if (iteration < 2) {
 			// for the first two iterations, solve the constraints in
 			// the given order
-			for (i=0; i<m; i++) {
+			for (i=0; i<numConstraintRows; i++) {
 				order[i].error = i;
 				order[i].findex = findex[i];
 				order[i].index = i;
@@ -324,13 +327,13 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 		else {
 			// sort the constraints so that the ones converging slowest
 			// get solved last. use the absolute (not relative) error.
-			for (i=0; i<m; i++) {
-				float v1 = dFabs (lambda[i]);
+			for (i=0; i<numConstraintRows; i++) {
+				float v1 = dFabs (lambdaAccumulatedImpulse[i]);
 				float v2 = dFabs (last_lambda[i]);
 				float max = (v1 > v2) ? v1 : v2;
 				if (max > 0) {
-					//@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
-					order[i].error = dFabs(lambda[i]-last_lambda[i]);
+					//@@@ relative error: order[i].error = dFabs(lambdaAccumulatedImpulse[i]-last_lambda[i])/max;
+					order[i].error = dFabs(lambdaAccumulatedImpulse[i]-last_lambda[i]);
 				}
 				else {
 					order[i].error = dInfinity;
@@ -339,11 +342,11 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 				order[i].index = i;
 			}
 		}
-		qsort (order,m,sizeof(IndexError),&compare_index_error);
+		qsort (order,numConstraintRows,sizeof(IndexError),&compare_index_error);
 #endif
 #ifdef RANDOMLY_REORDER_CONSTRAINTS
                if ((iteration & 7) == 0) {
-			for (i=1; i<m; ++i) {
+			for (i=1; i<numConstraintRows; ++i) {
 				IndexError tmp = order[i];
 				int swapi = dRandInt2(i+1);
 				order[i] = order[swapi];
@@ -352,19 +355,19 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
                }
 #endif

-		//@@@ potential optimization: swap lambda and last_lambda pointers rather
-		//    than copying the data. we must make sure lambda is properly
+		//@@@ potential optimization: swap lambdaAccumulatedImpulse and last_lambda pointers rather
+		//    than copying the data. we must make sure lambdaAccumulatedImpulse is properly
 		//    returned to the caller
-		memcpy (last_lambda,lambda,m*sizeof(float));
+		memcpy (last_lambda,lambdaAccumulatedImpulse,numConstraintRows*sizeof(float));

-		for (int i=0; i<m; i++) {
-			// @@@ potential optimization: we could pre-sort J and iMJ, thereby
+		for (int i=0; i<numConstraintRows; i++) {
+			// @@@ potential optimization: we could pre-sort JconstraintAxis and iMJ, thereby
 			//     linearizing access to those arrays. hmmm, this does not seem
 			//     like a win, but we should think carefully about our memory
 			//     access pattern.

 			int index = order[i].index;
-			J_ptr = J + index*12;
+			J_ptr = JconstraintAxis + index*12;
 			iMJ_ptr = iMJ + index*12;

 			// set the limits for this constraint. note that 'hicopy' is used.
@@ -372,70 +375,72 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 			// direct LCP solving method, since that method only performs this
 			// limit adjustment once per time step, whereas this method performs
 			// once per iteration per constraint row.
-			// the constraints are ordered so that all lambda[] values needed have
+			// the constraints are ordered so that all lambdaAccumulatedImpulse[] values needed have
 			// already been computed.
 			if (findex[index] >= 0) {
-				hi[index] = btFabs (hicopy[index] * lambda[findex[index]]);
-				lo[index] = -hi[index];
+				higherLimit[index] = btFabs (hicopy[index] * lambdaAccumulatedImpulse[findex[index]]);
+				lowerLimit[index] = -higherLimit[index];
 			}

 			int b1 = jb[index*2];
 			int b2 = jb[index*2+1];
-			float delta = rhs[index] - lambda[index]*Ad[index];
-			dRealMutablePtr fc_ptr = invMforce + 6*b1;
+			
+			dRealMutablePtr deltaVelocity = invMforce + 6*b1;

+			float deltaAppliedImpulse = rhs[index] - lambdaAccumulatedImpulse[index]*Ad[index];
+			
 			// @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
-			delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] +
-				fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] +
-				fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5];
+			deltaAppliedImpulse -=deltaVelocity[0] * J_ptr[0] + deltaVelocity[1] * J_ptr[1] +
+				deltaVelocity[2] * J_ptr[2] + deltaVelocity[3] * J_ptr[3] +
+				deltaVelocity[4] * J_ptr[4] + deltaVelocity[5] * J_ptr[5];
 			// @@@ potential optimization: handle 1-body constraints in a separate
 			//     loop to avoid the cost of test & jump?
 			if (b2 >= 0) {
-				fc_ptr = invMforce + 6*b2;
-				delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] +
-					fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] +
-					fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11];
+				deltaVelocity = invMforce + 6*b2;
+				deltaAppliedImpulse -=deltaVelocity[0] * J_ptr[6] + deltaVelocity[1] * J_ptr[7] +
+					deltaVelocity[2] * J_ptr[8] + deltaVelocity[3] * J_ptr[9] +
+					deltaVelocity[4] * J_ptr[10] + deltaVelocity[5] * J_ptr[11];
 			}

-			// compute lambda and clamp it to [lo,hi].
+			// compute lambdaAccumulatedImpulse and clamp it to [lowerLimit,higherLimit].
 			// @@@ potential optimization: does SSE have clamping instructions
 			//     to save test+jump penalties here?
-			float new_lambda = lambda[index] + delta;
-			if (new_lambda < lo[index]) {
-				delta = lo[index]-lambda[index];
-				lambda[index] = lo[index];
+			float sum = lambdaAccumulatedImpulse[index] + deltaAppliedImpulse;
+			if (sum < lowerLimit[index]) {
+				deltaAppliedImpulse = lowerLimit[index]-lambdaAccumulatedImpulse[index];
+				lambdaAccumulatedImpulse[index] = lowerLimit[index];
 			}
-			else if (new_lambda > hi[index]) {
-				delta = hi[index]-lambda[index];
-				lambda[index] = hi[index];
+			else if (sum > higherLimit[index]) {
+				deltaAppliedImpulse = higherLimit[index]-lambdaAccumulatedImpulse[index];
+				lambdaAccumulatedImpulse[index] = higherLimit[index];
 			}
 			else {
-				lambda[index] = new_lambda;
+				lambdaAccumulatedImpulse[index] = sum;
 			}

 			//@@@ a trick that may or may not help
 			//float ramp = (1-((float)(iteration+1)/(float)num_iterations));
-			//delta *= ramp;
+			//deltaAppliedImpulse *= ramp;

 			// update invMforce.
 			// @@@ potential optimization: SIMD for this and the b2 >= 0 case
-			fc_ptr = invMforce + 6*b1;
-			fc_ptr[0] += delta * iMJ_ptr[0];
-			fc_ptr[1] += delta * iMJ_ptr[1];
-			fc_ptr[2] += delta * iMJ_ptr[2];
-			fc_ptr[3] += delta * iMJ_ptr[3];
-			fc_ptr[4] += delta * iMJ_ptr[4];
-			fc_ptr[5] += delta * iMJ_ptr[5];
+			deltaVelocity = invMforce + 6*b1;
+			deltaVelocity[0] += deltaAppliedImpulse * iMJ_ptr[0];
+			deltaVelocity[1] += deltaAppliedImpulse * iMJ_ptr[1];
+			deltaVelocity[2] += deltaAppliedImpulse * iMJ_ptr[2];
+			deltaVelocity[3] += deltaAppliedImpulse * iMJ_ptr[3];
+			deltaVelocity[4] += deltaAppliedImpulse * iMJ_ptr[4];
+			deltaVelocity[5] += deltaAppliedImpulse * iMJ_ptr[5];
 			// @@@ potential optimization: handle 1-body constraints in a separate
 			//     loop to avoid the cost of test & jump?
 			if (b2 >= 0) {
-				fc_ptr = invMforce + 6*b2;
-				fc_ptr[0] += delta * iMJ_ptr[6];
-				fc_ptr[1] += delta * iMJ_ptr[7];
-				fc_ptr[2] += delta * iMJ_ptr[8];
-				fc_ptr[3] += delta * iMJ_ptr[9];
-				fc_ptr[4] += delta * iMJ_ptr[10];
-				fc_ptr[5] += delta * iMJ_ptr[11];
+				deltaVelocity = invMforce + 6*b2;
+				deltaVelocity[0] += deltaAppliedImpulse * iMJ_ptr[6];
+				deltaVelocity[1] += deltaAppliedImpulse * iMJ_ptr[7];
+				deltaVelocity[2] += deltaAppliedImpulse * iMJ_ptr[8];
+				deltaVelocity[3] += deltaAppliedImpulse * iMJ_ptr[9];
+				deltaVelocity[4] += deltaAppliedImpulse * iMJ_ptr[10];
+				deltaVelocity[5] += deltaAppliedImpulse * iMJ_ptr[11];
 			}
 		}
 	}
@@ -443,10 +448,7 @@ void btSorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb,
 }

 //------------------------------------------------------------------------------
-void btSorLcpSolver::SolveInternal1 (
-			float global_cfm,
-			float global_erp,
-			const btAlignedObjectArray<btOdeSolverBody*> &body, int nb,
+void btSorLcpSolver::SolveInternal1 (const btAlignedObjectArray<btOdeSolverBody*> &body, int nb,
 			btAlignedObjectArray<btOdeJoint*> &joint, 
 			int nj, const btContactSolverInfo& solverInfo,
 			btStackAlloc* stackAlloc)
@@ -475,16 +477,16 @@ void btSorLcpSolver::SolveInternal1 (

 	// for all bodies, compute the inertia tensor and its inverse in the global
 	// frame, and compute the rotational force and add it to the torque
-	// accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
+	// accumulator. I and inverseInertiaWorld are a vertical stack of 3x4 matrices, one per body.
 	dRealAllocaArray (I,3*4*nb);
-	dRealAllocaArray (invI,3*4*nb);
+	dRealAllocaArray (inverseInertiaWorld,3*4*nb);
 /*	for (i=0; i<nb; i++) {
 		dMatrix3 tmp;
 		// compute inertia tensor in global frame
 		dMULTIPLY2_333 (tmp,body[i]->m_I,body[i]->m_R);
 		// compute inverse inertia tensor in global frame
 		dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R);
-		dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp);
+		dMULTIPLY0_333 (inverseInertiaWorld+i*12,body[i]->m_R,tmp);
 		// compute rotational force
 		dCROSS (body[i]->m_tacc,-=,body[i]->getAngularVelocity(),tmp);
 	}
@@ -497,7 +499,7 @@ void btSorLcpSolver::SolveInternal1 (

 		// compute inverse inertia tensor in global frame
 		dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R);
-		dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp);
+		dMULTIPLY0_333 (inverseInertiaWorld+i*12,body[i]->m_R,tmp);
 		// compute rotational force
 //		dMULTIPLY0_331 (tmp,I+i*12,body[i]->m_angularVelocity);
 //		dCROSS (body[i]->m_tacc,-=,body[i]->m_angularVelocity,tmp);
@@ -506,16 +508,16 @@ void btSorLcpSolver::SolveInternal1 (



-	// get joint information (m = total constraint dimension, nub = number of unbounded variables).
-	// joints with m=0 are inactive and are removed from the joints array
+	// get joint information (numConstraintRows = total constraint dimension, nub = number of unbounded variables).
+	// joints with numConstraintRows=0 are inactive and are removed from the joints array
 	// entirely, so that the code that follows does not consider them.
 	//@@@ do we really need to save all the info1's
 	btOdeJoint::Info1 *info = (btOdeJoint::Info1*) ALLOCA (nj*sizeof(btOdeJoint::Info1));
 	
 	for (i=0, j=0; j<nj; j++) {	// i=dest, j=src
 		joint[j]->GetInfo1 (info+i);
-		dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
-		if (info[i].m > 0) {
+		dIASSERT (info[i].m_numConstraintRows >= 0 && info[i].m_numConstraintRows <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m_numConstraintRows);
+		if (info[i].m_numConstraintRows > 0) {
 			joint[i] = joint[j];
 			i++;
 		}
@@ -523,34 +525,34 @@ void btSorLcpSolver::SolveInternal1 (
 	nj = i;

 	// create the row offset array
-	int m = 0;
-	int *ofs = (int*) ALLOCA (nj*sizeof(int));
+	int numConstraintRows = 0;
+	int *constraintRowOffsets = (int*) ALLOCA (nj*sizeof(int));
 	for (i=0; i<nj; i++) {
-		ofs[i] = m;
-		m += info[i].m;
+		constraintRowOffsets[i] = numConstraintRows;
+		numConstraintRows += info[i].m_numConstraintRows;
 	}

 	// if there are constraints, compute the constraint force
-	dRealAllocaArray (J,m*12);
-	int *jb = (int*) ALLOCA (m*2*sizeof(int));
-	if (m > 0) {
+	dRealAllocaArray (JconstraintAxis,numConstraintRows*12);
+	int *jb = (int*) ALLOCA (numConstraintRows*2*sizeof(int));
+	if (numConstraintRows > 0) {
 		// create a constraint equation right hand side vector `c', a constraint
 		// force mixing vector `cfm', and LCP low and high bound vectors, and an
 		// 'findex' vector.
-		dRealAllocaArray (c,m);
-		dRealAllocaArray (cfm,m);
-		dRealAllocaArray (lo,m);
-		dRealAllocaArray (hi,m);
+		dRealAllocaArray (c_rhs,numConstraintRows);
+		dRealAllocaArray (cfm,numConstraintRows);
+		dRealAllocaArray (lowerLimit,numConstraintRows);
+		dRealAllocaArray (higherLimit,numConstraintRows);

-		int *findex = (int*) ALLOCA (m*sizeof(int));
+		int *findex = (int*) ALLOCA (numConstraintRows*sizeof(int));

-		dSetZero1 (c,m);
-		dSetValue1 (cfm,m,global_cfm);
-		dSetValue1 (lo,m,-dInfinity);
-		dSetValue1 (hi,m, dInfinity);
-		for (i=0; i<m; i++) findex[i] = -1;
+		dSetZero1 (c_rhs,numConstraintRows);
+		dSetValue1 (cfm,numConstraintRows,solverInfo.m_globalCfm);
+		dSetValue1 (lowerLimit,numConstraintRows,-dInfinity);
+		dSetValue1 (higherLimit,numConstraintRows, dInfinity);
+		for (i=0; i<numConstraintRows; i++) findex[i] = -1;

-		// get jacobian data from constraints. an m*12 matrix will be created
+		// get jacobian data from constraints. an numConstraintRows*12 matrix will be created
 		// to store the two jacobian blocks from each constraint. it has this
 		// format:
 		//
@@ -563,30 +565,30 @@ void btSorLcpSolver::SolveInternal1 (
 		//   (lll) = linear jacobian data
 		//   (aaa) = angular jacobian data
 		//
-		dSetZero1 (J,m*12);
+		dSetZero1 (JconstraintAxis,numConstraintRows*12);
 		btOdeJoint::Info2 Jinfo;
 		Jinfo.rowskip = 12;
 		Jinfo.fps = stepsize1;
-		Jinfo.erp = global_erp;
+		Jinfo.erp = solverInfo.m_erp;
 		for (i=0; i<nj; i++) {
-			Jinfo.J1l = J + ofs[i]*12;
-			Jinfo.J1a = Jinfo.J1l + 3;
-			Jinfo.J2l = Jinfo.J1l + 6;
-			Jinfo.J2a = Jinfo.J1l + 9;
-			Jinfo.c = c + ofs[i];
-			Jinfo.cfm = cfm + ofs[i];
-			Jinfo.lo = lo + ofs[i];
-			Jinfo.hi = hi + ofs[i];
-			Jinfo.findex = findex + ofs[i];
+			Jinfo.m_J1linearAxis = JconstraintAxis + constraintRowOffsets[i]*12;
+			Jinfo.m_J1angularAxis = Jinfo.m_J1linearAxis + 3;
+			Jinfo.m_J2linearAxis = Jinfo.m_J1linearAxis + 6;
+			Jinfo.m_J2angularAxis = Jinfo.m_J1linearAxis + 9;
+			Jinfo.m_constraintError = c_rhs + constraintRowOffsets[i];
+			Jinfo.cfm = cfm + constraintRowOffsets[i];
+			Jinfo.m_lowerLimit = lowerLimit + constraintRowOffsets[i];
+			Jinfo.m_higherLimit = higherLimit + constraintRowOffsets[i];
+			Jinfo.findex = findex + constraintRowOffsets[i];
 			joint[i]->GetInfo2 (&Jinfo);

-			if (Jinfo.c[0] > solverInfo.m_maxErrorReduction)
-				Jinfo.c[0] = solverInfo.m_maxErrorReduction;
+			if (Jinfo.m_constraintError[0] > solverInfo.m_maxErrorReduction)
+				Jinfo.m_constraintError[0] = solverInfo.m_maxErrorReduction;

 			// adjust returned findex values for global index numbering
-			for (j=0; j<info[i].m; j++) {
-				if (findex[ofs[i] + j] >= 0)
-					findex[ofs[i] + j] += ofs[i];
+			for (j=0; j<info[i].m_numConstraintRows; j++) {
+				if (findex[constraintRowOffsets[i] + j] >= 0)
+					findex[constraintRowOffsets[i] + j] += constraintRowOffsets[i];
 			}
 		}

@@ -595,13 +597,13 @@ void btSorLcpSolver::SolveInternal1 (
 		for (i=0; i<nj; i++) {
 			int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->m_odeTag) : -1;
 			int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->m_odeTag) : -1;
-			for (j=0; j<info[i].m; j++) {
+			for (j=0; j<info[i].m_numConstraintRows; j++) {
 				jb_ptr[0] = b1;
 				jb_ptr[1] = b2;
 				jb_ptr += 2;
 			}
 		}
-		dIASSERT (jb_ptr == jb+2*m);
+		dIASSERT (jb_ptr == jb+2*numConstraintRows);

 		// compute the right hand side `rhs'
 		dRealAllocaArray (tmp1,nb*6);
@@ -610,47 +612,48 @@ void btSorLcpSolver::SolveInternal1 (
 			btScalar body_invMass = body[i]->m_invMass;
 			for (j=0; j<3; j++)
 				tmp1[i*6+j] = body[i]->m_facc[j] * body_invMass + body[i]->m_linearVelocity[j] * stepsize1;
-			dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,invI + i*12,body[i]->m_tacc);
+			dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,inverseInertiaWorld + i*12,body[i]->m_tacc);
 			for (j=0; j<3; j++)
 				tmp1[i*6+3+j] += body[i]->m_angularVelocity[j] * stepsize1;
 		}

-		// put J*tmp1 into rhs
-		dRealAllocaArray (rhs,m);
-		multiply_J (m,J,jb,tmp1,rhs);
+		// put JconstraintAxis*tmp1 into rhs
+		dRealAllocaArray (rhs,numConstraintRows);
+		multiply_J (numConstraintRows,JconstraintAxis,jb,tmp1,rhs);

 		// complete rhs
-		for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
+		for (i=0; i<numConstraintRows; i++) 
+			rhs[i] = c_rhs[i]*stepsize1 - rhs[i];

 		// scale CFM
-		for (i=0; i<m; i++)
+		for (i=0; i<numConstraintRows; i++)
 			cfm[i] *= stepsize1;

-		// load lambda from the value saved on the previous iteration
-		dRealAllocaArray (lambda,m);
+		// load lambdaAccumulatedImpulse from the value saved on the previous iteration
+		dRealAllocaArray (lambdaAccumulatedImpulse,numConstraintRows);
 #ifdef WARM_STARTING
-		dSetZero1 (lambda,m);	//@@@ shouldn't be necessary
+		dSetZero1 (lambdaAccumulatedImpulse,numConstraintRows);	//@@@ shouldn't be necessary
 		for (i=0; i<nj; i++) {
-			memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(btScalar));
+			memcpy (lambdaAccumulatedImpulse+constraintRowOffsets[i],joint[i]->lambdaAccumulatedImpulse,info[i].numConstraintRows * sizeof(btScalar));
 		}
 #endif

-		// solve the LCP problem and get lambda and invM*constraint_force
+		// solve the LCP problem and get lambdaAccumulatedImpulse and invM*constraint_force
 		dRealAllocaArray (cforce,nb*6);

 		/// SOR_LCP
-		SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,numIter,sOr,stackAlloc);
+		SOR_LCP (numConstraintRows,nb,JconstraintAxis,jb,body,inverseInertiaWorld,lambdaAccumulatedImpulse,cforce,rhs,lowerLimit,higherLimit,cfm,findex,numIter,sOr,stackAlloc);

 #ifdef WARM_STARTING
-		// save lambda for the next iteration
+		// save lambdaAccumulatedImpulse for the next iteration
 		//@@@ note that this doesn't work for contact joints yet, as they are
 		// recreated every iteration
 		for (i=0; i<nj; i++) {
-			memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(btScalar));
+			memcpy (joint[i]->lambdaAccumulatedImpulse,lambdaAccumulatedImpulse+constraintRowOffsets[i],info[i].numConstraintRows * sizeof(btScalar));
 		}
 #endif

-		// note that the SOR method overwrites rhs and J at this point, so
+		// note that the SOR method overwrites rhs and JconstraintAxis at this point, so
 		// they should not be used again.
 		// add stepsize * cforce to the body velocity
 		for (i=0; i<nb; i++) {
@@ -677,7 +680,7 @@ void btSorLcpSolver::SolveInternal1 (
 		{
 			body[i]->m_tacc[j] *= solverInfo.m_timeStep;
 		}
-		dMULTIPLY0_331NEW(angvel,+=,invI + i*12,body[i]->m_tacc);
+		dMULTIPLY0_331NEW(angvel,+=,inverseInertiaWorld + i*12,body[i]->m_tacc);
 		body[i]->m_angularVelocity = angvel;
 	}
 	//stackAlloc->endBlock(saBlock);//Remo: 10.10.2007