/************************************************************************* * * * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * * All rights reserved. Email: russ@q12.org Web: www.q12.org * * * * This library is free software; you can redistribute it and/or * * modify it under the terms of EITHER: * * (1) The GNU Lesser bteral Public License as published by the Free * * Software Foundation; either version 2.1 of the License, or (at * * your option) any later version. The text of the GNU Lesser * * bteral Public License is included with this library in the * * file LICENSE.TXT. * * (2) The BSD-style license that is included with this library in * * the file LICENSE-BSD.TXT. * * * * This library is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * * LICENSE.TXT and LICENSE-BSD.TXT for more details. * * * *************************************************************************/ #include "SorLcp.h" #include "OdeSolverBody.h" #ifdef USE_SOR_SOLVER // SOR LCP taken from ode quickstep, for comparisons to Bullet sequential impulse solver. #include "LinearMath/btScalar.h" #include "BulletDynamics/Dynamics/btRigidBody.h" #include #include //FLT_MAX #ifdef WIN32 #include #endif #include #include #if defined (WIN32) #include #else #if defined (__FreeBSD__) #include #else #include #endif #endif #include "OdeJoint.h" #include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h" //////////////////////////////////////////////////////////////////// //math stuff #include "OdeMacros.h" //*************************************************************************** // configuration // for the SOR and CG methods: // uncomment the following line to use warm starting. this definitely // help for motor-driven joints. unfortunately it appears to hurt // with high-friction contacts using the SOR method. use with care //#define WARM_STARTING 1 // for the SOR method: // uncomment the following line to randomly reorder constraint rows // during the solution. depending on the situation, this can help a lot // or hardly at all, but it doesn't seem to hurt. #define RANDOMLY_REORDER_CONSTRAINTS 1 //*************************************************************************** // various common computations involving the matrix J // compute iMJ = inv(M)*J' inline void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, //OdeSolverBody* const *body, const btAlignedObjectArray &body, dRealPtr invI) { int i,j; dRealMutablePtr iMJ_ptr = iMJ; dRealMutablePtr J_ptr = J; for (i=0; im_invMass; for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j]; dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3); if (b2 >= 0) { k = body[b2]->m_invMass; for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6]; dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9); } J_ptr += 12; iMJ_ptr += 12; } } #if 0 static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb, dRealMutablePtr in, dRealMutablePtr out,int onlyBody1,int onlyBody2) { int i,j; dRealMutablePtr out_ptr1 = out + onlyBody1*6; for (j=0; j<6; j++) out_ptr1[j] = 0; if (onlyBody2 >= 0) { out_ptr1 = out + onlyBody2*6; for (j=0; j<6; j++) out_ptr1[j] = 0; } dRealPtr iMJ_ptr = iMJ; for (i=0; i= 0) { out_ptr = out + b2*6; for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; } } iMJ_ptr += 6; } } #endif // compute out = inv(M)*J'*in. #if 0 static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb, dRealMutablePtr in, dRealMutablePtr out) { int i,j; dSetZero1 (out,6*nb); dRealPtr iMJ_ptr = iMJ; for (i=0; i= 0) { out_ptr = out + b2*6; for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; } iMJ_ptr += 6; } } #endif // compute out = J*in. inline void multiply_J (int m, dRealMutablePtr J, int *jb, dRealMutablePtr in, dRealMutablePtr out) { int i,j; dRealPtr J_ptr = J; for (i=0; i= 0) { in_ptr = in + b2*6; for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; } J_ptr += 6; out[i] = sum; } } //*************************************************************************** // SOR-LCP method // nb is the number of bodies in the body array. // J is an m*12 matrix of constraint rows // jb is an array of first and second body numbers for each constraint row // invI is the global frame inverse inertia for each body (stacked 3x3 matrices) // // this returns lambda and fc (the constraint force). // note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda // // b, lo and hi are modified on exit //------------------------------------------------------------------------------ ATTRIBUTE_ALIGNED16(struct) IndexError { btScalar error; // error to sort on int findex; int index; // row index }; //------------------------------------------------------------------------------ void SorLcpSolver::SOR_LCP(int m, int nb, dRealMutablePtr J, int *jb, const btAlignedObjectArray &body, dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr invMforce, dRealMutablePtr rhs, dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, int numiter,float overRelax, btStackAlloc* stackAlloc ) { //btBlock* saBlock = stackAlloc->beginBlock();//Remo: 10.10.2007 AutoBlockSa asaBlock(stackAlloc); const int num_iterations = numiter; const float sor_w = overRelax; // SOR over-relaxation parameter int i,j; #ifdef WARM_STARTING // for warm starting, this seems to be necessary to prevent // jerkiness in motor-driven joints. i have no idea why this works. for (i=0; i= 0) { for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; } iMJ_ptr += 12; J_ptr += 12; Ad[i] = sor_w / sum;//(sum + cfm[i]); } // scale J and b by Ad J_ptr = J; for (i=0; i= 0) order[j++].index = i; dIASSERT (j==m); #endif for (int iteration=0; iteration < num_iterations; iteration++) { #ifdef REORDER_CONSTRAINTS // constraints with findex < 0 always come first. if (iteration < 2) { // for the first two iterations, solve the constraints in // the given order for (i=0; i v2) ? v1 : v2; if (max > 0) { //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max; order[i].error = dFabs(lambda[i]-last_lambda[i]); } else { order[i].error = dInfinity; } order[i].findex = findex[i]; order[i].index = i; } } qsort (order,m,sizeof(IndexError),&compare_index_error); #endif #ifdef RANDOMLY_REORDER_CONSTRAINTS if ((iteration & 7) == 0) { for (i=1; i= 0) { hi[index] = btFabs (hicopy[index] * lambda[findex[index]]); lo[index] = -hi[index]; } int b1 = jb[index*2]; int b2 = jb[index*2+1]; float delta = rhs[index] - lambda[index]*Ad[index]; dRealMutablePtr fc_ptr = invMforce + 6*b1; // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] + fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] + fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5]; // @@@ potential optimization: handle 1-body constraints in a separate // loop to avoid the cost of test & jump? if (b2 >= 0) { fc_ptr = invMforce + 6*b2; delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] + fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] + fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11]; } // compute lambda and clamp it to [lo,hi]. // @@@ potential optimization: does SSE have clamping instructions // to save test+jump penalties here? float new_lambda = lambda[index] + delta; if (new_lambda < lo[index]) { delta = lo[index]-lambda[index]; lambda[index] = lo[index]; } else if (new_lambda > hi[index]) { delta = hi[index]-lambda[index]; lambda[index] = hi[index]; } else { lambda[index] = new_lambda; } //@@@ a trick that may or may not help //float ramp = (1-((float)(iteration+1)/(float)num_iterations)); //delta *= ramp; // update invMforce. // @@@ potential optimization: SIMD for this and the b2 >= 0 case fc_ptr = invMforce + 6*b1; fc_ptr[0] += delta * iMJ_ptr[0]; fc_ptr[1] += delta * iMJ_ptr[1]; fc_ptr[2] += delta * iMJ_ptr[2]; fc_ptr[3] += delta * iMJ_ptr[3]; fc_ptr[4] += delta * iMJ_ptr[4]; fc_ptr[5] += delta * iMJ_ptr[5]; // @@@ potential optimization: handle 1-body constraints in a separate // loop to avoid the cost of test & jump? if (b2 >= 0) { fc_ptr = invMforce + 6*b2; fc_ptr[0] += delta * iMJ_ptr[6]; fc_ptr[1] += delta * iMJ_ptr[7]; fc_ptr[2] += delta * iMJ_ptr[8]; fc_ptr[3] += delta * iMJ_ptr[9]; fc_ptr[4] += delta * iMJ_ptr[10]; fc_ptr[5] += delta * iMJ_ptr[11]; } } } //stackAlloc->endBlock(saBlock);//Remo: 10.10.2007 } //------------------------------------------------------------------------------ void SorLcpSolver::SolveInternal1 ( float global_cfm, float global_erp, const btAlignedObjectArray &body, int nb, btAlignedObjectArray &joint, int nj, const btContactSolverInfo& solverInfo, btStackAlloc* stackAlloc) { //btBlock* saBlock = stackAlloc->beginBlock();//Remo: 10.10.2007 AutoBlockSa asaBlock(stackAlloc); int numIter = solverInfo.m_numIterations; float sOr = solverInfo.m_sor; int i,j; btScalar stepsize1 = dRecip(solverInfo.m_timeStep); // number all bodies in the body list - set their tag values for (i=0; im_odeTag = i; // make a local copy of the joint array, because we might want to modify it. // (the "BU_Joint *const*" declaration says we're allowed to modify the joints // but not the joint array, because the caller might need it unchanged). //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints //BU_Joint **joint = (BU_Joint**) alloca (nj * sizeof(BU_Joint*)); //memcpy (joint,_joint,nj * sizeof(BU_Joint*)); // for all bodies, compute the inertia tensor and its inverse in the global // frame, and compute the rotational force and add it to the torque // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body. dRealAllocaArray (I,3*4*nb); dRealAllocaArray (invI,3*4*nb); /* for (i=0; im_I,body[i]->m_R); // compute inverse inertia tensor in global frame dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R); dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp); // compute rotational force dCROSS (body[i]->m_tacc,-=,body[i]->getAngularVelocity(),tmp); } */ for (i=0; im_I,body[i]->m_R); dMULTIPLY0_333 (I+i*12,body[i]->m_R,tmp); // compute inverse inertia tensor in global frame dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R); dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp); // compute rotational force // dMULTIPLY0_331 (tmp,I+i*12,body[i]->m_angularVelocity); // dCROSS (body[i]->m_tacc,-=,body[i]->m_angularVelocity,tmp); } // get joint information (m = total constraint dimension, nub = number of unbounded variables). // joints with m=0 are inactive and are removed from the joints array // entirely, so that the code that follows does not consider them. //@@@ do we really need to save all the info1's BU_Joint::Info1 *info = (BU_Joint::Info1*) ALLOCA (nj*sizeof(BU_Joint::Info1)); for (i=0, j=0; jGetInfo1 (info+i); dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m); if (info[i].m > 0) { joint[i] = joint[j]; i++; } } nj = i; // create the row offset array int m = 0; int *ofs = (int*) ALLOCA (nj*sizeof(int)); for (i=0; i 0) { // create a constraint equation right hand side vector `c', a constraint // force mixing vector `cfm', and LCP low and high bound vectors, and an // 'findex' vector. dRealAllocaArray (c,m); dRealAllocaArray (cfm,m); dRealAllocaArray (lo,m); dRealAllocaArray (hi,m); int *findex = (int*) ALLOCA (m*sizeof(int)); dSetZero1 (c,m); dSetValue1 (cfm,m,global_cfm); dSetValue1 (lo,m,-dInfinity); dSetValue1 (hi,m, dInfinity); for (i=0; iGetInfo2 (&Jinfo); if (Jinfo.c[0] > solverInfo.m_maxErrorReduction) Jinfo.c[0] = solverInfo.m_maxErrorReduction; // adjust returned findex values for global index numbering for (j=0; j= 0) findex[ofs[i] + j] += ofs[i]; } } // create an array of body numbers for each joint row int *jb_ptr = jb; for (i=0; inode[0].body) ? (joint[i]->node[0].body->m_odeTag) : -1; int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->m_odeTag) : -1; for (j=0; jm_invMass; for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->m_facc[j] * body_invMass + body[i]->m_linearVelocity[j] * stepsize1; dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,invI + i*12,body[i]->m_tacc); for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->m_angularVelocity[j] * stepsize1; } // put J*tmp1 into rhs dRealAllocaArray (rhs,m); multiply_J (m,J,jb,tmp1,rhs); // complete rhs for (i=0; ilambda,info[i].m * sizeof(btScalar)); } #endif // solve the LCP problem and get lambda and invM*constraint_force dRealAllocaArray (cforce,nb*6); /// SOR_LCP SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,numIter,sOr,stackAlloc); #ifdef WARM_STARTING // save lambda for the next iteration //@@@ note that this doesn't work for contact joints yet, as they are // recreated every iteration for (i=0; ilambda,lambda+ofs[i],info[i].m * sizeof(btScalar)); } #endif // note that the SOR method overwrites rhs and J at this point, so // they should not be used again. // add stepsize * cforce to the body velocity for (i=0; im_linearVelocity[j] += solverInfo.m_timeStep* cforce[i*6+j]; for (j=0; j<3; j++) body[i]->m_angularVelocity[j] += solverInfo.m_timeStep* cforce[i*6+3+j]; } } // compute the velocity update: // add stepsize * invM * fe to the body velocity for (i=0; im_invMass; btVector3 linvel = body[i]->m_linearVelocity; btVector3 angvel = body[i]->m_angularVelocity; for (j=0; j<3; j++) { linvel[j] += solverInfo.m_timeStep * body_invMass * body[i]->m_facc[j]; } for (j=0; j<3; j++) { body[i]->m_tacc[j] *= solverInfo.m_timeStep; } dMULTIPLY0_331NEW(angvel,+=,invI + i*12,body[i]->m_tacc); body[i]->m_angularVelocity = angvel; } //stackAlloc->endBlock(saBlock);//Remo: 10.10.2007 } #endif //USE_SOR_SOLVER