Will move joint setup to GPU, and then some benefit should be visible. Don't use 64 alignment, it causes data structures size mismatch between cpu and gpu
165 lines
4.5 KiB
C
165 lines
4.5 KiB
C
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
|
static const char* solveConstraintRowsCL= \
|
|
"\n"
|
|
"\n"
|
|
"typedef float4 Quaternion;\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
" float4 m_row[3];\n"
|
|
"}Matrix3x3;\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
" Matrix3x3 m_invInertia;\n"
|
|
" Matrix3x3 m_initInvInertia;\n"
|
|
"} Shape;\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
" Matrix3x3 m_basis;//orientation\n"
|
|
" float4 m_origin;//transform\n"
|
|
"}b3Transform;\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
" b3Transform m_worldTransform;\n"
|
|
" float4 m_deltaLinearVelocity;\n"
|
|
" float4 m_deltaAngularVelocity;\n"
|
|
" float4 m_angularFactor;\n"
|
|
" float4 m_linearFactor;\n"
|
|
" float4 m_invMass;\n"
|
|
" float4 m_pushVelocity;\n"
|
|
" float4 m_turnVelocity;\n"
|
|
" float4 m_linearVelocity;\n"
|
|
" float4 m_angularVelocity;\n"
|
|
"\n"
|
|
" union \n"
|
|
" {\n"
|
|
" void* m_originalBody;\n"
|
|
" int m_originalBodyIndex;\n"
|
|
" };\n"
|
|
" int padding[3];\n"
|
|
"\n"
|
|
"} b3SolverBody;\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
"\n"
|
|
" float4 m_relpos1CrossNormal;\n"
|
|
" float4 m_contactNormal;\n"
|
|
"\n"
|
|
" float4 m_relpos2CrossNormal;\n"
|
|
" //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n"
|
|
"\n"
|
|
" float4 m_angularComponentA;\n"
|
|
" float4 m_angularComponentB;\n"
|
|
" \n"
|
|
" float4 m_appliedPushImpulse;\n"
|
|
" float4 m_appliedImpulse;\n"
|
|
"\n"
|
|
" float m_friction;\n"
|
|
" float m_jacDiagABInv;\n"
|
|
" float m_rhs;\n"
|
|
" float m_cfm;\n"
|
|
" \n"
|
|
" float m_lowerLimit;\n"
|
|
" float m_upperLimit;\n"
|
|
" float m_rhsPenetration;\n"
|
|
"\n"
|
|
" union\n"
|
|
" {\n"
|
|
" void* m_originalContactPoint;\n"
|
|
" float m_unusedPadding4;\n"
|
|
" };\n"
|
|
"\n"
|
|
" int m_overrideNumSolverIterations;\n"
|
|
" int m_frictionIndex;\n"
|
|
" int m_solverBodyIdA;\n"
|
|
" int m_solverBodyIdB;\n"
|
|
"\n"
|
|
"} b3SolverConstraint;\n"
|
|
"\n"
|
|
"typedef struct\n"
|
|
"{\n"
|
|
" int m_bodyAPtrAndSignBit;\n"
|
|
" int m_bodyBPtrAndSignBit;\n"
|
|
" int m_constraintRowOffset;\n"
|
|
" short int m_numConstraintRows;\n"
|
|
" short int m_batchId;\n"
|
|
"\n"
|
|
"} b3BatchConstraint;\n"
|
|
"\n"
|
|
"#define mymake_float4 (float4)\n"
|
|
"\n"
|
|
"\n"
|
|
"__inline float dot3F4(float4 a, float4 b)\n"
|
|
"{\n"
|
|
" float4 a1 = mymake_float4(a.xyz,0.f);\n"
|
|
" float4 b1 = mymake_float4(b.xyz,0.f);\n"
|
|
" return dot(a1, b1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__inline void internalApplyImpulse(__global b3SolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n"
|
|
"{\n"
|
|
" body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n"
|
|
" body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n"
|
|
"}\n"
|
|
"\n"
|
|
"\n"
|
|
"void resolveSingleConstraintRowGeneric(__global b3SolverBody* body1, __global b3SolverBody* body2, __global b3SolverConstraint* c)\n"
|
|
"{\n"
|
|
" float deltaImpulse = c->m_rhs-c->m_appliedImpulse.x*c->m_cfm;\n"
|
|
" float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n"
|
|
" float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n"
|
|
"\n"
|
|
" deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n"
|
|
" deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n"
|
|
"\n"
|
|
" float sum = c->m_appliedImpulse.x + deltaImpulse;\n"
|
|
" if (sum < c->m_lowerLimit)\n"
|
|
" {\n"
|
|
" deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse.x;\n"
|
|
" c->m_appliedImpulse.x = c->m_lowerLimit;\n"
|
|
" }\n"
|
|
" else if (sum > c->m_upperLimit) \n"
|
|
" {\n"
|
|
" deltaImpulse = c->m_upperLimit-c->m_appliedImpulse.x;\n"
|
|
" c->m_appliedImpulse.x = c->m_upperLimit;\n"
|
|
" }\n"
|
|
" else\n"
|
|
" {\n"
|
|
" c->m_appliedImpulse.x = sum;\n"
|
|
" }\n"
|
|
"\n"
|
|
" internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n"
|
|
" internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n"
|
|
"\n"
|
|
"}\n"
|
|
"\n"
|
|
"__kernel\n"
|
|
"void solveJointConstraintRows(__global b3SolverBody* solverBodies,\n"
|
|
" __global b3BatchConstraint* batchConstraints,\n"
|
|
" __global b3SolverConstraint* rows,\n"
|
|
" int batchOffset,\n"
|
|
" int constraintOffset,\n"
|
|
" int numConstraintsInBatch\n"
|
|
" )\n"
|
|
"{\n"
|
|
" int b = get_global_id(0);\n"
|
|
" if (b>=numConstraintsInBatch)\n"
|
|
" return;\n"
|
|
"\n"
|
|
" __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n"
|
|
" for (int jj=0;jj<c->m_numConstraintRows;jj++)\n"
|
|
" {\n"
|
|
" __global b3SolverConstraint* constraint = &rows[c->m_constraintRowOffset+jj];\n"
|
|
" resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n"
|
|
" }\n"
|
|
"};\n"
|
|
;
|