work-in-progress jacobi gpu (still broken :(

This commit is contained in:
erwin coumans
2013-03-27 19:09:23 -07:00
parent b8c32a99cb
commit 2133712207
8 changed files with 660 additions and 52 deletions

View File

@@ -37,9 +37,9 @@ public:
:useOpenCL(true),
preferredOpenCLPlatformIndex(-1),
preferredOpenCLDeviceIndex(-1),
arraySizeX(41),
arraySizeY(44),
arraySizeZ(41),
arraySizeX(1),
arraySizeY(3),
arraySizeZ(1),
m_useConcaveMesh(false),
gapX(14.3),
gapY(14.0),

View File

@@ -63,7 +63,7 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
{
for (int k=0;k<ci.arraySizeZ;k++)
{
float mass = j==0? 0.f : 1.f;
float mass = 1.f;
btVector3 position((j&1)+i*2.2,2+j*2.,(j&1)+k*2.2);
@@ -84,6 +84,6 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
float camPos[4]={ci.arraySizeX,ci.arraySizeY/2,ci.arraySizeZ,0};
//float camPos[4]={1,12.5,1.5,0};
m_instancingRenderer->setCameraTargetPosition(camPos);
m_instancingRenderer->setCameraDistance(120);
m_instancingRenderer->setCameraDistance(20);
}

View File

@@ -13,15 +13,13 @@ ATTRIBUTE_ALIGNED16(struct) btGpuConstraint4
float m_jacCoeffInv[4];
float m_b[4];
float m_appliedRambdaDt[4];
float m_fJacCoeffInv[2]; // friction
float m_fAppliedRambdaDt[2]; // friction
unsigned int m_bodyA;
unsigned int m_bodyB;
unsigned int m_batchIdx;
unsigned int m_paddings[1];
int m_batchIdx;
unsigned int m_paddings;
inline void setFrictionCoeff(float value) { m_linear[3] = value; }
inline float getFrictionCoeff() const { return m_linear[3]; }

View File

@@ -38,7 +38,7 @@ struct btGpuJacobiSolverInternalData
cl_kernel m_countBodiesKernel;
cl_kernel m_contactToConstraintSplitKernel;
cl_kernel m_clearVelocitiesKernel;
cl_kernel m_averageVelocitiesKernel;
cl_kernel m_solveContactKernel;
cl_kernel m_solveFrictionKernel;
@@ -74,6 +74,12 @@ btGpuJacobiSolver::btGpuJacobiSolver(cl_context ctx, cl_device_id device, cl_com
btAssert(m_data->m_contactToConstraintSplitKernel);
m_data->m_clearVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
btAssert(m_data->m_clearVelocitiesKernel);
m_data->m_averageVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
btAssert(m_data->m_averageVelocitiesKernel);
m_data->m_solveContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros );
btAssert(m_data->m_solveContactKernel );
@@ -90,6 +96,7 @@ btGpuJacobiSolver::~btGpuJacobiSolver()
clReleaseKernel(m_data->m_solveFrictionKernel);
clReleaseKernel(m_data->m_countBodiesKernel);
clReleaseKernel(m_data->m_contactToConstraintSplitKernel);
clReleaseKernel(m_data->m_averageVelocitiesKernel);
clReleaseKernel(m_data->m_clearVelocitiesKernel );
delete m_data->m_deltaLinearVelocities;
@@ -192,6 +199,54 @@ static __inline void solveContact(btGpuConstraint4& cs,
void solveContact3(btGpuConstraint4* cs,
btVector3* posAPtr, btVector3* linVelA, btVector3* angVelA, float invMassA, const btMatrix3x3& invInertiaA,
btVector3* posBPtr, btVector3* linVelB, btVector3* angVelB, float invMassB, const btMatrix3x3& invInertiaB,
btVector3* dLinVelA, btVector3* dAngVelA, btVector3* dLinVelB, btVector3* dAngVelB)
{
float minRambdaDt = 0;
float maxRambdaDt = FLT_MAX;
for(int ic=0; ic<4; ic++)
{
if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
btVector3 angular0, angular1, linear;
btVector3 r0 = cs->m_worldPos[ic] - *posAPtr;
btVector3 r1 = cs->m_worldPos[ic] - *posBPtr;
setLinearAndAngular( -cs->m_linear, r0, r1, linear, angular0, angular1 );
float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1,
*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
rambdaDt *= cs->m_jacCoeffInv[ic];
{
float prevSum = cs->m_appliedRambdaDt[ic];
float updated = prevSum;
updated += rambdaDt;
updated = btMax( updated, minRambdaDt );
updated = btMin( updated, maxRambdaDt );
rambdaDt = updated - prevSum;
cs->m_appliedRambdaDt[ic] = updated;
}
btVector3 linImp0 = invMassA*linear*rambdaDt;
btVector3 linImp1 = invMassB*(-linear)*rambdaDt;
btVector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
btVector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
if (invMassA)
{
*dLinVelA += linImp0;
*dAngVelA += angImp0;
}
if (invMassB)
{
*dLinVelB += linImp1;
*dAngVelB += angImp1;
}
}
}
static inline void solveFriction(btGpuConstraint4& cs,
@@ -303,6 +358,8 @@ float calcJacCoeff(const btVector3& linear0, const btVector3& linear1, const btV
float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
float jmj3 = btDot(mtMul3(angular1,*invInertia1), angular1);
return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);
// return -1.f/((jmj0+jmj1)+(jmj2+jmj3));
}
@@ -426,8 +483,8 @@ float positionConstraintCoeff, int gIdx, btAlignedObjectArray<unsigned int>& bod
btMatrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;//m_invInertia;
btGpuConstraint4 cs;
float countA = invMassA ? btScalar(bodyCount[aIdx]) : 1;
float countB = invMassB ? btScalar(bodyCount[bIdx]) : 1;
float countA = invMassA ? (float)(bodyCount[aIdx]) : 1;
float countB = invMassB ? (float)(bodyCount[bIdx]) : 1;
setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,
&cs );
@@ -499,7 +556,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti
solverInfo.m_positionConstraintCoeff,
i, bodyCount);
}
int maxIter = 14;
int maxIter = 1;
btAlignedObjectArray<btVector3> deltaLinearVelocities;
@@ -592,7 +649,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti
}
#if 0
//solve friction
@@ -674,7 +731,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti
}
}
#endif
}
@@ -697,6 +754,422 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti
}
void btGpuJacobiSolver::solveGroupMixedHost(btRigidBodyCL* bodiesCPU,btInertiaCL* inertiasCPU,int numBodies,btContact4* manifoldPtrCpu, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo)
{
BT_PROFILE("btGpuJacobiSolver::solveGroup");
btAlignedObjectArray<unsigned int> bodyCount;
bool useHost = true;
btAlignedObjectArray<btInt2> contactConstraintOffsets;
btOpenCLArray<btContact4> manifoldGPU(m_context,m_queue);
manifoldGPU.resize(numManifolds);
manifoldGPU.copyFromHostPointer(manifoldPtrCpu,numManifolds);
btAlignedObjectArray<unsigned int> offsetSplitBodies;
unsigned int totalNumSplitBodies;
btAlignedObjectArray<btGpuConstraint4> contactConstraints;
contactConstraints.resize(numManifolds);
btOpenCLArray<btRigidBodyCL> bodiesGPU(m_context,m_queue);
bodiesGPU.resize(numBodies);
btOpenCLArray<btInertiaCL> inertiasGPU(m_context,m_queue);
inertiasGPU.resize(numBodies);
if (useHost)
{
bodyCount.resize(numBodies);
for (int i=0;i<numBodies;i++)
bodyCount[i] = 0;
contactConstraintOffsets.resize(numManifolds);
for (int i=0;i<numManifolds;i++)
{
int pa = manifoldPtrCpu[i].m_bodyAPtrAndSignBit;
int pb = manifoldPtrCpu[i].m_bodyBPtrAndSignBit;
bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
int bodyIndexA = manifoldPtrCpu[i].getBodyA();
int bodyIndexB = manifoldPtrCpu[i].getBodyB();
if (!isFixedA)
{
contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
bodyCount[bodyIndexA]++;
}
if (!isFixedB)
{
contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
bodyCount[bodyIndexB]++;
}
}
offsetSplitBodies.resize(numBodies);
m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodies,&totalNumSplitBodies);
int numlastBody = bodyCount[numBodies-1];
totalNumSplitBodies += numlastBody;
for (int i=0;i<numManifolds;i++)
{
ContactToConstraintKernel(&manifoldPtrCpu[0],bodiesCPU,inertiasCPU,&contactConstraints[0],numManifolds,
solverInfo.m_deltaTime,
solverInfo.m_positionDrift,
solverInfo.m_positionConstraintCoeff,
i, bodyCount);
}
} else
{
// int numBodies = bodies->size();
// int numManifolds = manifoldPtr->size();
m_data->m_bodyCount->resize(numBodies);
unsigned int val=0;
btInt2 val2;
val2.x=0;
val2.y=0;
{
BT_PROFILE("m_filler");
m_data->m_contactConstraintOffsets->resize(numManifolds);
m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
}
{
BT_PROFILE("m_countBodiesKernel");
btLauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
launcher.setBuffer(manifoldGPU.getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
launcher.setConst(numManifolds);
launcher.setConst(solverInfo.m_fixedBodyIndex);
launcher.launch1D(numManifolds);
}
m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets);
m_data->m_bodyCount->copyToHost(bodyCount);
// unsigned int totalNumSplitBodies=0;
m_data->m_offsetSplitBodies->resize(numBodies);
m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies);
int numContacts = manifoldGPU.size();
m_data->m_contactConstraints->resize(numContacts);
bodiesGPU.copyFromHostPointer(bodiesCPU,numBodies);
inertiasGPU.copyFromHostPointer(inertiasCPU,numBodies);
{
BT_PROFILE("contactToConstraintSplitKernel");
btLauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
launcher.setBuffer(manifoldGPU.getBufferCL());
launcher.setBuffer(bodiesGPU.getBufferCL());
launcher.setBuffer(inertiasGPU.getBufferCL());
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
launcher.setConst(numContacts);
launcher.setConst(solverInfo.m_deltaTime);
launcher.setConst(solverInfo.m_positionDrift);
launcher.setConst(solverInfo.m_positionConstraintCoeff);
launcher.launch1D( numContacts, 64 );
clFinish(m_queue);
}
m_data->m_contactConstraints->copyToHost(contactConstraints);
}
int maxIter = 1;
btAlignedObjectArray<btVector3> deltaLinearVelocities;
btAlignedObjectArray<btVector3> deltaAngularVelocities;
deltaLinearVelocities.resize(totalNumSplitBodies);
deltaAngularVelocities.resize(totalNumSplitBodies);
for (int i=0;i<totalNumSplitBodies;i++)
{
deltaLinearVelocities[i].setZero();
deltaAngularVelocities[i].setZero();
}
m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
for (int iter = 0;iter<maxIter;iter++)
{
bool solveHost=true;
if (solveHost)
{
int i=0;
for( i=0; i<numManifolds; i++)
{
float frictionCoeff = contactConstraints[i].getFrictionCoeff();
int aIdx = (int)contactConstraints[i].m_bodyA;
int bIdx = (int)contactConstraints[i].m_bodyB;
btRigidBodyCL& bodyA = bodiesCPU[aIdx];
btRigidBodyCL& bodyB = bodiesCPU[bIdx];
btVector3 zero(0,0,0);
btVector3* dlvAPtr=&zero;
btVector3* davAPtr=&zero;
btVector3* dlvBPtr=&zero;
btVector3* davBPtr=&zero;
if (bodyA.getInvMass())
{
int bodyOffsetA = offsetSplitBodies[aIdx];
int constraintOffsetA = contactConstraintOffsets[i].x;
int splitIndexA = bodyOffsetA+constraintOffsetA;
dlvAPtr = &deltaLinearVelocities[splitIndexA];
davAPtr = &deltaAngularVelocities[splitIndexA];
}
if (bodyB.getInvMass())
{
int bodyOffsetB = offsetSplitBodies[bIdx];
int constraintOffsetB = contactConstraintOffsets[i].y;
int splitIndexB= bodyOffsetB+constraintOffsetB;
dlvBPtr =&deltaLinearVelocities[splitIndexB];
davBPtr = &deltaAngularVelocities[splitIndexB];
}
{
bool test=false;
if (test)
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
solveContact( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld,
(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr );
} else
{
solveContact3(&contactConstraints[i], &bodyA.m_pos, &bodyA.m_linVel, &bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld,
&bodyB.m_pos, &bodyB.m_linVel, &bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
dlvAPtr,davAPtr,dlvBPtr,davBPtr );
}
printf("!");
}
}
}else
{
{
//__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
//__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
//float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
BT_PROFILE("m_solveContactKernel");
btLauncherCL launcher( m_queue, m_data->m_solveContactKernel );
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
launcher.setBuffer(bodiesGPU.getBufferCL());
launcher.setBuffer(inertiasGPU.getBufferCL());
launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launcher.setConst(solverInfo.m_deltaTime);
launcher.setConst(solverInfo.m_positionDrift);
launcher.setConst(solverInfo.m_positionConstraintCoeff);
launcher.setConst(solverInfo.m_fixedBodyIndex);
launcher.setConst(numManifolds);
launcher.launch1D(numManifolds);
clFinish(m_queue);
}
m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
}
bool useHostAverage=false;
if (useHostAverage)
{
//easy
for (int i=0;i<numBodies;i++)
{
if (bodiesCPU[i].getInvMass())
{
int bodyOffset = offsetSplitBodies[i];
int count = bodyCount[i];
float factor = 1.f/float(count);
btVector3 averageLinVel;
averageLinVel.setZero();
btVector3 averageAngVel;
averageAngVel.setZero();
for (int j=0;j<count;j++)
{
averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
}
for (int j=0;j<count;j++)
{
deltaLinearVelocities[bodyOffset+j] = averageLinVel;
deltaAngularVelocities[bodyOffset+j] = averageAngVel;
}
}
}
} else
{
// bodiesGPU.copyFromHostPointer(bodiesCPU,numBodies);
m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
BT_PROFILE("average velocities");
//__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
//__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
launcher.setBuffer(bodiesGPU.getBufferCL());
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launcher.setConst(numBodies);
launcher.launch1D(numBodies);
clFinish(m_queue);
m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
}
#if 0
//solve friction
for(int i=0; i<numManifolds; i++)
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
float sum = 0;
for(int j=0; j<4; j++)
{
sum +=contactConstraints[i].m_appliedRambdaDt[j];
}
float frictionCoeff = contactConstraints[i].getFrictionCoeff();
int aIdx = (int)contactConstraints[i].m_bodyA;
int bIdx = (int)contactConstraints[i].m_bodyB;
btRigidBodyCL& bodyA = bodies[aIdx];
btRigidBodyCL& bodyB = bodies[bIdx];
btVector3 zero(0,0,0);
btVector3* dlvAPtr=&zero;
btVector3* davAPtr=&zero;
btVector3* dlvBPtr=&zero;
btVector3* davBPtr=&zero;
if (bodyA.getInvMass())
{
int bodyOffsetA = offsetSplitBodies[aIdx];
int constraintOffsetA = contactConstraintOffsets[i].x;
int splitIndexA = bodyOffsetA+constraintOffsetA;
dlvAPtr = &deltaLinearVelocities[splitIndexA];
davAPtr = &deltaAngularVelocities[splitIndexA];
}
if (bodyB.getInvMass())
{
int bodyOffsetB = offsetSplitBodies[bIdx];
int constraintOffsetB = contactConstraintOffsets[i].y;
int splitIndexB= bodyOffsetB+constraintOffsetB;
dlvBPtr =&deltaLinearVelocities[splitIndexB];
davBPtr = &deltaAngularVelocities[splitIndexB];
}
for(int j=0; j<4; j++)
{
maxRambdaDt[j] = frictionCoeff*sum;
minRambdaDt[j] = -maxRambdaDt[j];
}
solveFriction( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass,inertias[aIdx].m_invInertiaWorld,
(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
}
//easy
for (int i=0;i<numBodies;i++)
{
if (bodies[i].getInvMass())
{
int bodyOffset = offsetSplitBodies[i];
int count = bodyCount[i];
float factor = 1.f/float(count);
btVector3 averageLinVel;
averageLinVel.setZero();
btVector3 averageAngVel;
averageAngVel.setZero();
for (int j=0;j<count;j++)
{
averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
}
for (int j=0;j<count;j++)
{
deltaLinearVelocities[bodyOffset+j] = averageLinVel;
deltaAngularVelocities[bodyOffset+j] = averageAngVel;
}
}
}
#endif
}
//easy
for (int i=0;i<numBodies;i++)
{
if (bodiesCPU[i].getInvMass())
{
int bodyOffset = offsetSplitBodies[i];
int count = bodyCount[i];
if (count)
{
bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset];
bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset];
}
}
}
}
void btGpuJacobiSolver::solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo)
{
@@ -774,11 +1247,17 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenC
launch.launch1D(totalNumSplitBodies);
}
int maxIter = 4;
int maxIter = 14;
for (int iter = 0;iter<maxIter;iter++)
{
{
//__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
//__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
//float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
BT_PROFILE("m_solveContactKernel");
btLauncherCL launcher( m_queue, m_data->m_solveContactKernel );
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
@@ -793,10 +1272,25 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenC
launcher.setConst(solverInfo.m_positionConstraintCoeff);
launcher.setConst(solverInfo.m_fixedBodyIndex);
launcher.setConst(numManifolds);
launcher.launch1D(numManifolds);
clFinish(m_queue);
}
{
BT_PROFILE("average velocities");
//__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
//__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
launcher.setBuffer(bodies->getBufferCL());
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launcher.setConst(numBodies);
launcher.launch1D(numBodies);
clFinish(m_queue);
}
/*
for(int i=0; i<numManifolds; i++)
{
@@ -960,6 +1454,34 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenC
}
btAlignedObjectArray<btVector3> dLinVel;
btAlignedObjectArray<btVector3> dAngVel;
m_data->m_deltaLinearVelocities->copyToHost(dLinVel);
m_data->m_deltaAngularVelocities->copyToHost(dAngVel);
btAlignedObjectArray<btRigidBodyCL> bodiesCPU;
bodies->copyToHost(bodiesCPU);
btAlignedObjectArray<unsigned int> bodyCountCPU;
m_data->m_bodyCount->copyToHost(bodyCountCPU);
btAlignedObjectArray<unsigned int> offsetSplitBodiesCPU;
m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodiesCPU);
for (int i=0;i<numBodies;i++)
{
if (bodiesCPU[i].getInvMass())
{
int bodyOffset = offsetSplitBodiesCPU[i];
int count = bodyCountCPU[i];
if (count)
{
bodiesCPU[i].m_linVel += dLinVel[bodyOffset];
bodiesCPU[i].m_angVel += dAngVel[bodyOffset];
}
}
}
bodies->copyFromHost(bodiesCPU);
printf(".");
/*
//easy

View File

@@ -45,6 +45,7 @@ public:
void solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo);
void solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo);
void solveGroupMixedHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo);
};
#endif //BT_GPU_JACOBI_SOLVER_H

View File

@@ -117,7 +117,7 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
bool useGpu = true;
if (useGpu)
{
bool forceHost = false;
bool forceHost = true;
if (forceHost)
{
btAlignedObjectArray<btRigidBodyCL> hostBodies;
@@ -133,7 +133,10 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
{
btJacobiSolverInfo solverInfo;
m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo);
// m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo);
m_data->m_solver3->solveGroupMixedHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo);
}
{
BT_PROFILE("copyFromHost");

View File

@@ -371,9 +371,8 @@ typedef struct
u32 m_bodyA;
u32 m_bodyB;
int m_batchIdx;
u32 m_paddings[1];
u32 m_paddings;
} Constraint4;
typedef struct
@@ -424,6 +423,37 @@ __kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global f
}
__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
{
int i = GET_GLOBAL_IDX;
if (i<numBodies)
{
if (gBodies[i].m_invMass)
{
int bodyOffset = offsetSplitBodies[i];
int count = bodyCount[i];
float factor = 1.f/((float)count);
float4 averageLinVel = make_float4(0.f);
float4 averageAngVel = make_float4(0.f);
for (int j=0;j<count;j++)
{
averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
}
for (int j=0;j<count;j++)
{
deltaLinearVelocities[bodyOffset+j] = averageLinVel;
deltaAngularVelocities[bodyOffset+j] = averageAngVel;
}
}//bodies[i].m_invMass
}//i<numBodies
}
void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
{
@@ -440,14 +470,14 @@ float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, fl
float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)
float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)
{
// linear0,1 are normlized
float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
return -1.f/(jmj0+jmj1+jmj2+jmj3);
return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);
}
@@ -502,7 +532,7 @@ void solveContact(__global Constraint4* cs,
setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );
float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1,
*linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];
*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
rambdaDt *= cs->m_jacCoeffInv[ic];
{
@@ -520,16 +550,25 @@ void solveContact(__global Constraint4* cs,
float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
*linVelA += linImp0;
*angVelA += angImp0;
*linVelB += linImp1;
*angVelB += angImp1;
if (invMassA)
{
*dLinVelA += linImp0;
*dAngVelA += angImp0;
}
if (invMassB)
{
*dLinVelB += linImp1;
*dAngVelB += angImp1;
}
}
}
// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,
__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,
__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
{
@@ -585,7 +624,7 @@ __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
deltaLinearVelocities[splitIndexA] = dLinVelA;
deltaAngularVelocities[splitIndexA] = dAngVelA;
}
if (gBodies[bIdx].m_invMass)
if (invMassB)
{
deltaLinearVelocities[splitIndexB] = dLinVelB;
deltaAngularVelocities[splitIndexB] = dAngVelB;
@@ -595,7 +634,7 @@ __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
)
{
@@ -615,7 +654,7 @@ __kernel void SolveFrictionJacobiKernel()
void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB,
__global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,
__global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,
Constraint4* dstC )
{
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
@@ -648,7 +687,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe
setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);
dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);
relVelN = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB);
@@ -683,7 +722,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe
setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);
dstC->m_fAppliedRambdaDt[i] = 0.f;
}
dstC->m_center = center;
@@ -734,9 +773,12 @@ float positionConstraintCoeff
Constraint4 cs;
float countA = invMassA ? (float)bodyCount[aIdx] : 1;
float countB = invMassB ? (float)bodyCount[bIdx] : 1;
setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,
&cs );
&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,
&cs );
cs.m_batchIdx = gContact[gIdx].m_batchIdx;

View File

@@ -373,9 +373,8 @@ static const char* solverUtilsCL= \
"\n"
" u32 m_bodyA;\n"
" u32 m_bodyB;\n"
"\n"
" int m_batchIdx;\n"
" u32 m_paddings[1];\n"
" u32 m_paddings;\n"
"} Constraint4;\n"
"\n"
"typedef struct\n"
@@ -426,6 +425,37 @@ static const char* solverUtilsCL= \
"}\n"
"\n"
"\n"
"__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n"
"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n"
"{\n"
" int i = GET_GLOBAL_IDX;\n"
" if (i<numBodies)\n"
" {\n"
" if (gBodies[i].m_invMass)\n"
" {\n"
" int bodyOffset = offsetSplitBodies[i];\n"
" int count = bodyCount[i];\n"
" float factor = 1.f/((float)count);\n"
" float4 averageLinVel = make_float4(0.f);\n"
" float4 averageAngVel = make_float4(0.f);\n"
" \n"
" for (int j=0;j<count;j++)\n"
" {\n"
" averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;\n"
" averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;\n"
" }\n"
" \n"
" for (int j=0;j<count;j++)\n"
" {\n"
" deltaLinearVelocities[bodyOffset+j] = averageLinVel;\n"
" deltaAngularVelocities[bodyOffset+j] = averageAngVel;\n"
" }\n"
" \n"
" }//bodies[i].m_invMass\n"
" }//i<numBodies\n"
"}\n"
"\n"
"\n"
"\n"
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
"{\n"
@@ -442,14 +472,14 @@ static const char* solverUtilsCL= \
"\n"
"\n"
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n"
"{\n"
" // linear0,1 are normlized\n"
" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n"
" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n"
" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n"
" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
" return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n"
"}\n"
"\n"
"\n"
@@ -504,7 +534,7 @@ static const char* solverUtilsCL= \
" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n"
"\n"
" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n"
" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n"
" *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n"
" rambdaDt *= cs->m_jacCoeffInv[ic];\n"
"\n"
" {\n"
@@ -522,16 +552,25 @@ static const char* solverUtilsCL= \
" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"
" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"
"\n"
" *linVelA += linImp0;\n"
" *angVelA += angImp0;\n"
" *linVelB += linImp1;\n"
" *angVelB += angImp1;\n"
" if (invMassA)\n"
" {\n"
" *dLinVelA += linImp0;\n"
" *dAngVelA += angImp0;\n"
" }\n"
" if (invMassB)\n"
" {\n"
" *dLinVelB += linImp1;\n"
" *dAngVelB += angImp1;\n"
" }\n"
" }\n"
"}\n"
"\n"
"\n"
"// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n"
"\n"
"\n"
"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n"
"__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,\n"
"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n"
"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n"
"{\n"
"\n"
@@ -587,7 +626,7 @@ static const char* solverUtilsCL= \
" deltaLinearVelocities[splitIndexA] = dLinVelA;\n"
" deltaAngularVelocities[splitIndexA] = dAngVelA;\n"
" } \n"
" if (gBodies[bIdx].m_invMass)\n"
" if (invMassB)\n"
" {\n"
" deltaLinearVelocities[splitIndexB] = dLinVelB;\n"
" deltaAngularVelocities[splitIndexB] = dAngVelB;\n"
@@ -597,7 +636,7 @@ static const char* solverUtilsCL= \
"\n"
"\n"
"__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n"
"__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n"
"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n"
"float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n"
")\n"
"{\n"
@@ -617,7 +656,7 @@ static const char* solverUtilsCL= \
"\n"
"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n"
" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"
" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,\n"
" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n"
" Constraint4* dstC )\n"
"{\n"
" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n"
@@ -650,7 +689,7 @@ static const char* solverUtilsCL= \
" setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n"
"\n"
" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n"
"\n"
" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
" linVelA, angVelA, linVelB, angVelB);\n"
@@ -685,7 +724,7 @@ static const char* solverUtilsCL= \
" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
"\n"
" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n"
" dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
" }\n"
" dstC->m_center = center;\n"
@@ -736,9 +775,12 @@ static const char* solverUtilsCL= \
"\n"
" Constraint4 cs;\n"
"\n"
" float countA = invMassA ? (float)bodyCount[aIdx] : 1;\n"
" float countB = invMassB ? (float)bodyCount[bIdx] : 1;\n"
"\n"
" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"
" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n"
" &cs );\n"
" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n"
" &cs );\n"
" \n"
" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n"
"\n"