From e20cb228324447fb3438355879564894339c0f28 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Thu, 7 Nov 2013 12:46:01 -0800 Subject: [PATCH] share more data structures and code between OpenCL and C/C++ on CPU: move the setConstraint4/b3ConvertConstraint4 to shared code. --- .../NarrowPhaseCollision/b3RigidBodyCL.h | 6 +- .../shared/b3RigidBodyData.h | 11 +- src/Bullet3Common/shared/b3Mat3x3.h | 108 ++- src/Bullet3Common/shared/b3Quat.h | 13 +- .../shared/b3ContactConstraint4.h | 5 +- .../shared/b3ConvertConstraint4.h | 153 ++++ .../NarrowphaseCollision/kernels/satKernels.h | 98 ++- .../RigidBody/b3GpuBatchingPgsSolver.cpp | 35 +- .../RigidBody/b3GpuConstraint4.h | 19 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 20 +- src/Bullet3OpenCL/RigidBody/b3Solver.cpp | 183 ++--- .../RigidBody/kernels/integrateKernel.cl | 33 +- .../RigidBody/kernels/integrateKernel.h | 274 ++++++- .../RigidBody/kernels/solverSetup.cl | 385 +--------- .../RigidBody/kernels/solverSetup.h | 704 ++++++++++-------- .../RigidBody/kernels/updateAabbsKernel.h | 110 ++- 16 files changed, 1249 insertions(+), 908 deletions(-) create mode 100644 src/Bullet3Dynamics/shared/b3ConvertConstraint4.h diff --git a/src/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h b/src/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h index be1e5c1a1..6d0c0337e 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h @@ -34,11 +34,9 @@ B3_ATTRIBUTE_ALIGNED16(struct) b3RigidBodyCL : public b3RigidBodyData }; -struct b3InertiaCL +struct b3InertiaCL : public b3InertiaData { - b3Matrix3x3 m_invInertiaWorld; - b3Matrix3x3 m_initInvInertia; + }; - #endif//B3_RIGID_BODY_CL diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h index bf5608a12..50632c871 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h @@ -3,6 +3,7 @@ #include "Bullet3Common/shared/b3Float4.h" #include "Bullet3Common/shared/b3Quat.h" +#include "Bullet3Common/shared/b3Mat3x3.h" typedef struct b3RigidBodyData b3RigidBodyData_t; @@ -19,7 +20,15 @@ struct b3RigidBodyData float m_restituitionCoeff; float m_frictionCoeff; }; - + +typedef struct b3InertiaData b3InertiaData_t; + +struct b3InertiaData +{ + b3Mat3x3 m_invInertiaWorld; + b3Mat3x3 m_initInvInertia; +}; + #endif //B3_RIGIDBODY_DATA_H \ No newline at end of file diff --git a/src/Bullet3Common/shared/b3Mat3x3.h b/src/Bullet3Common/shared/b3Mat3x3.h index 74f9fae49..7b1fef32f 100644 --- a/src/Bullet3Common/shared/b3Mat3x3.h +++ b/src/Bullet3Common/shared/b3Mat3x3.h @@ -24,11 +24,18 @@ inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat) #define b3GetRow(m,row) m.getRow(row) +__inline +b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b) +{ + return b*a; +} + + #else typedef struct { - float4 m_row[3]; + b3Float4 m_row[3]; }b3Mat3x3; #define b3Mat3x3ConstArg const b3Mat3x3 @@ -36,7 +43,7 @@ typedef struct inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat) { - float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); + b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); b3Mat3x3 out; out.m_row[0].x=1-2*quat2.y-2*quat2.z; @@ -66,10 +73,107 @@ inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn) return out; } + +__inline +b3Mat3x3 mtZero(); + +__inline +b3Mat3x3 mtIdentity(); + +__inline +b3Mat3x3 mtTranspose(b3Mat3x3 m); + +__inline +b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b); + +__inline +b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b); + +__inline +b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b); + +__inline +b3Mat3x3 mtZero() +{ + b3Mat3x3 m; + m.m_row[0] = (b3Float4)(0.f); + m.m_row[1] = (b3Float4)(0.f); + m.m_row[2] = (b3Float4)(0.f); + return m; +} + +__inline +b3Mat3x3 mtIdentity() +{ + b3Mat3x3 m; + m.m_row[0] = (b3Float4)(1,0,0,0); + m.m_row[1] = (b3Float4)(0,1,0,0); + m.m_row[2] = (b3Float4)(0,0,1,0); + return m; +} + +__inline +b3Mat3x3 mtTranspose(b3Mat3x3 m) +{ + b3Mat3x3 out; + out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); + out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); + out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); + return out; +} + +__inline +b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b) +{ + b3Mat3x3 transB; + transB = mtTranspose( b ); + b3Mat3x3 ans; + // why this doesn't run when 0ing in the for{} + a.m_row[0].w = 0.f; + a.m_row[1].w = 0.f; + a.m_row[2].w = 0.f; + for(int i=0; i<3; i++) + { +// a.m_row[i].w = 0.f; + ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]); + ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]); + ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]); + ans.m_row[i].w = 0.f; + } + return ans; +} + +__inline +b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b) +{ + b3Float4 ans; + ans.x = b3Dot3F4( a.m_row[0], b ); + ans.y = b3Dot3F4( a.m_row[1], b ); + ans.z = b3Dot3F4( a.m_row[2], b ); + ans.w = 0.f; + return ans; +} + +__inline +b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b) +{ + b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); + b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); + b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); + + b3Float4 ans; + ans.x = b3Dot3F4( a, colx ); + ans.y = b3Dot3F4( a, coly ); + ans.z = b3Dot3F4( a, colz ); + return ans; +} + + #endif + #endif //B3_MAT3x3_H diff --git a/src/Bullet3Common/shared/b3Quat.h b/src/Bullet3Common/shared/b3Quat.h index 108c719f7..49b3d5b77 100644 --- a/src/Bullet3Common/shared/b3Quat.h +++ b/src/Bullet3Common/shared/b3Quat.h @@ -46,7 +46,18 @@ inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) inline b3Quat b3QuatNormalize(b3QuatConstArg in) { - return b3FastNormalize4(in); + //return b3FastNormalize4(in); + float len = native_sqrt(dot(q, q)); + if(len > 0.f) + { + q *= 1.f / len; + } + else + { + q.x = q.y = q.z = 0.f; + q.w = 1.f; + } + return q; } inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec) { diff --git a/src/Bullet3Dynamics/shared/b3ContactConstraint4.h b/src/Bullet3Dynamics/shared/b3ContactConstraint4.h index 32e11fc0c..68cf65e31 100644 --- a/src/Bullet3Dynamics/shared/b3ContactConstraint4.h +++ b/src/Bullet3Dynamics/shared/b3ContactConstraint4.h @@ -5,6 +5,7 @@ typedef struct b3ContactConstraint4 b3ContactConstraint4_t; + struct b3ContactConstraint4 { @@ -25,9 +26,9 @@ struct b3ContactConstraint4 }; //inline void setFrictionCoeff(float value) { m_linear[3] = value; } -inline float b3GetFrictionCoeff(b3ContactConstraint4* constraint) +inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) { - return constraint->m_linear[3]; + return constraint->m_linear.w; } #endif //B3_CONTACT_CONSTRAINT5_H diff --git a/src/Bullet3Dynamics/shared/b3ConvertConstraint4.h b/src/Bullet3Dynamics/shared/b3ConvertConstraint4.h new file mode 100644 index 000000000..805a2bd3e --- /dev/null +++ b/src/Bullet3Dynamics/shared/b3ConvertConstraint4.h @@ -0,0 +1,153 @@ + + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" +#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" + + +void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q); + void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q) +{ + if (b3Fabs(n.z) > 0.70710678f) { + // choose p in y-z plane + float a = n.y*n.y + n.z*n.z; + float k = 1.f/sqrt(a); + p[0].x = 0; + p[0].y = -n.z*k; + p[0].z = n.y*k; + // set q = n x p + q[0].x = a*k; + q[0].y = -n.x*p[0].z; + q[0].z = n.x*p[0].y; + } + else { + // choose p in x-y plane + float a = n.x*n.x + n.y*n.y; + float k = 1.f/sqrt(a); + p[0].x = -n.y*k; + p[0].y = n.x*k; + p[0].z = 0; + // set q = n x p + q[0].x = -n.z*p[0].y; + q[0].y = n.z*p[0].x; + q[0].z = a*k; + } +} + + + +void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1) +{ + *linear = b3MakeFloat4(n.x,n.y,n.z,0.f); + *angular0 = b3Cross3(r0, n); + *angular1 = -b3Cross3(r1, n); +} + + +float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0, + b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 ) +{ + return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1); +} + + +float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1, + float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1) +{ + // linear0,1 are normlized + float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0; + float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0); + float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1; + float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1); + return -1.f/(jmj0+jmj1+jmj2+jmj3); +} + + +void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA, + b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, + __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff, + b3ContactConstraint4_t* dstC ) +{ + dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); + dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); + + float dtInv = 1.f/dt; + for(int ic=0; ic<4; ic++) + { + dstC->m_appliedRambdaDt[ic] = 0.f; + } + dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; + + + dstC->m_linear = src->m_worldNormalOnB; + dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); + for(int ic=0; ic<4; ic++) + { + b3Float4 r0 = src->m_worldPosB[ic] - posA; + b3Float4 r1 = src->m_worldPosB[ic] - posB; + + if( ic >= src->m_worldNormalOnB.w )//npoints + { + dstC->m_jacCoeffInv[ic] = 0.f; + continue; + } + + float relVelN; + { + b3Float4 linear, angular0, angular1; + setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); + + dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ); + + relVelN = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB); + + float e = 0.f;//src->getRestituitionCoeff(); + if( relVelN*relVelN < 0.004f ) e = 0.f; + + dstC->m_b[ic] = e*relVelN; + //float penetration = src->m_worldPosB[ic].w; + dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; + dstC->m_appliedRambdaDt[ic] = 0.f; + } + } + + if( src->m_worldNormalOnB.w > 0 )//npoints + { // prepare friction + b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f); + for(int i=0; im_worldNormalOnB.w; i++) + center += src->m_worldPosB[i]; + center /= (float)src->m_worldNormalOnB.w; + + b3Float4 tangent[2]; + b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]); + + b3Float4 r[2]; + r[0] = center - posA; + r[1] = center - posB; + + for(int i=0; i<2; i++) + { + b3Float4 linear, angular0, angular1; + setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); + + dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ); + dstC->m_fAppliedRambdaDt[i] = 0.f; + } + dstC->m_center = center; + } + + for(int i=0; i<4; i++) + { + if( im_worldNormalOnB.w ) + { + dstC->m_worldPos[i] = src->m_worldPosB[i]; + } + else + { + dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f); + } + } +} diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index fbea20f96..de850b116 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -224,7 +224,18 @@ static const char* satKernelsCL= \ "}\n" "inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "{\n" -" return b3FastNormalize4(in);\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" "}\n" "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" "{\n" @@ -253,13 +264,13 @@ static const char* satKernelsCL= \ "#else\n" "typedef struct\n" "{\n" -" float4 m_row[3];\n" +" b3Float4 m_row[3];\n" "}b3Mat3x3;\n" "#define b3Mat3x3ConstArg const b3Mat3x3\n" "#define b3GetRow(m,row) (m.m_row[row])\n" "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" "{\n" -" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" " b3Mat3x3 out;\n" " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" @@ -283,6 +294,87 @@ static const char* satKernelsCL= \ " out.m_row[2] = fabs(matIn.m_row[2]);\n" " return out;\n" "}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" "#endif\n" "#endif //B3_MAT3x3_H\n" "typedef struct b3Aabb b3Aabb_t;\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 3ab20da6b..439bbcca5 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -1,15 +1,32 @@ -bool b3GpuBatchContacts = true; -bool b3GpuSolveConstraint = true; -bool gpuRadixSort=true; -bool gpuSetSortData = true; +//#define USE_CPU +#ifdef USE_CPU + bool b3GpuBatchContacts = false; + bool b3GpuSolveConstraint = false; + bool gpuRadixSort=false; + bool gpuSetSortData = false; -bool optionalSortContactsDeterminism = true; -bool gpuSortContactsDeterminism = true; -bool useCpuCopyConstraints = false; + bool optionalSortContactsDeterminism = true; + bool gpuSortContactsDeterminism = false; + bool useCpuCopyConstraints = true; -bool useScanHost = false; -bool reorderContactsOnCpu = false; + bool useScanHost = true; + bool reorderContactsOnCpu = true; + +#else + bool b3GpuBatchContacts = true; + bool b3GpuSolveConstraint = true; + bool gpuRadixSort=true; + bool gpuSetSortData = true; + + bool optionalSortContactsDeterminism = true; + bool gpuSortContactsDeterminism = true; + bool useCpuCopyConstraints = false; + + bool useScanHost = false; + bool reorderContactsOnCpu = false; + +#endif #include "b3GpuBatchingPgsSolver.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h b/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h index 009447dac..c7478f54a 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h @@ -3,24 +3,13 @@ #define B3_CONSTRAINT4_h #include "Bullet3Common/b3Vector3.h" -B3_ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4 +#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" + + +B3_ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4 : public b3ContactConstraint4 { B3_DECLARE_ALIGNED_ALLOCATOR(); - b3Vector3 m_linear;//normal? - b3Vector3 m_worldPos[4]; - b3Vector3 m_center; // friction - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - float m_fJacCoeffInv[2]; // friction - float m_fAppliedRambdaDt[2]; // friction - - unsigned int m_bodyA; - unsigned int m_bodyB; - int m_batchIdx; - unsigned int m_paddings; - inline void setFrictionCoeff(float value) { m_linear[3] = value; } inline float getFrictionCoeff() const { return m_linear[3]; } }; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 24f68a7d9..07a1d8084 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -33,11 +33,21 @@ subject to the following restrictions: #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" -bool useDbvt = false;//true; -bool useBullet2CpuSolver = true; -bool dumpContactStats = false; -bool calcWorldSpaceAabbOnCpu = false;//true; -bool useCalculateOverlappingPairsHost = false; +//#define USE_CPU +#ifdef USE_CPU + bool useDbvt = true; + bool useBullet2CpuSolver = true; + bool dumpContactStats = false; + bool calcWorldSpaceAabbOnCpu = true; + bool useCalculateOverlappingPairsHost = true; + +#else + bool useDbvt = false;//true; + bool useBullet2CpuSolver = true; + bool dumpContactStats = false; + bool calcWorldSpaceAabbOnCpu = false;//true; + bool useCalculateOverlappingPairsHost = false; +#endif #ifdef TEST_OTHER_GPU_SOLVER #include "b3GpuJacobiSolver.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index be17ecf22..db11067d9 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -18,6 +18,7 @@ subject to the following restrictions: ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments bool useNewBatchingKernel = false; +bool convertConstraintOnCpu = false;//true; #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" @@ -26,6 +27,7 @@ bool useNewBatchingKernel = false; #define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" #define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" +#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" #include "kernels/solverSetup.h" #include "kernels/solverSetup2.h" @@ -203,103 +205,6 @@ b3Solver::~b3Solver() - -/*void b3Solver::reorderConvertToConstraints( const b3OpenCLArray* bodyBuf, - const b3OpenCLArray* shapeBuf, - b3OpenCLArray* contactsIn, b3OpenCLArray* contactCOut, void* additionalData, - int nContacts, const b3Solver::ConstraintCfg& cfg ) -{ - if( m_contactBuffer ) - { - m_contactBuffer->resize(nContacts); - } - if( m_contactBuffer == 0 ) - { - B3_PROFILE("new m_contactBuffer;"); - m_contactBuffer = new b3OpenCLArray(m_context,m_queue,nContacts ); - m_contactBuffer->resize(nContacts); - } - - - - - //DeviceUtils::Config dhCfg; - //Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg ); - if( cfg.m_enableParallelSolve ) - { - - - clFinish(m_queue); - - // contactsIn -> m_contactBuffer - { - B3_PROFILE("sortContacts"); - sortContacts( bodyBuf, contactsIn, additionalData, nContacts, cfg ); - clFinish(m_queue); - } - - - { - B3_PROFILE("m_copyConstraintKernel"); - - - - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( contactsIn->getBufferCL() ) }; -// b3LauncherCL launcher( m_queue, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) ); - b3LauncherCL launcher( m_queue, m_copyConstraintKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - clFinish(m_queue); - } - - { - B3_PROFILE("batchContacts"); - b3Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx ); - - } - } - { - B3_PROFILE("waitForCompletion (batchContacts)"); - clFinish(m_queue); - } - - //================ - - { - B3_PROFILE("convertToConstraints"); - b3Solver::convertToConstraints( bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg ); - } - - { - B3_PROFILE("convertToConstraints waitForCompletion"); - clFinish(m_queue); - } - -} -*/ - - static - inline - float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, - const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) - { - return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); - } - - - static - inline - void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, - b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) - { - linear = -n; - angular0 = -b3Cross(r0, n); - angular1 = b3Cross(r1, n); - } - - template static __inline @@ -323,7 +228,7 @@ void solveContact(b3GpuConstraint4& cs, b3Vector3 angular0, angular1, linear; b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 ); + setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, &linear, &angular0, &angular1 ); float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; @@ -407,7 +312,7 @@ void solveContact(b3GpuConstraint4& cs, b3Vector3 r1 = center - posB; for(int i=0; i<2; i++) { - setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 ); + setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ); rambdaDt *= cs.m_fJacCoeffInv[i]; @@ -1066,28 +971,78 @@ void b3Solver::convertToConstraints( const b3OpenCLArray* bodyBuf }; { - B3_PROFILE("m_contactToConstraintKernel"); + CB cdata; cdata.m_nContacts = nContacts; cdata.m_dt = cfg.m_dt; cdata.m_positionDrift = cfg.m_positionDrift; cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), - b3BufferInfoCL( contactCOut->getBufferCL() )}; - b3LauncherCL launcher( m_queue, m_contactToConstraintKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - //launcher.setConst( cdata ); - - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_dt); - launcher.setConst(cdata.m_positionDrift); - launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D( nContacts, 64 ); - clFinish(m_queue); + b3AlignedObjectArray gBodies; + bodyBuf->copyToHost(gBodies); + b3AlignedObjectArray gContact; + contactsIn->copyToHost(gContact); + + b3AlignedObjectArray gShapes; + shapeBuf->copyToHost(gShapes); + + b3AlignedObjectArray gConstraintOut; + gConstraintOut.resize(nContacts); + + if (convertConstraintOnCpu) + { + B3_PROFILE("cpu contactToConstraintKernel"); + for (int gIdx=0;gIdxcopyFromHost(gConstraintOut); + + } else + { + B3_PROFILE("gpu m_contactToConstraintKernel"); + + + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), + b3BufferInfoCL( contactCOut->getBufferCL() )}; + b3LauncherCL launcher( m_queue, m_contactToConstraintKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + //launcher.setConst( cdata ); + + launcher.setConst(cdata.m_nContacts); + launcher.setConst(cdata.m_dt); + launcher.setConst(cdata.m_positionDrift); + launcher.setConst(cdata.m_positionConstraintCoeff); + + launcher.launch1D( nContacts, 64 ); + clFinish(m_queue); + + } } diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl index b431ba511..d589efdab 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl @@ -13,6 +13,9 @@ subject to the following restrictions: */ //Originally written by Erwin Coumans + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" + float4 quatMult(float4 q1, float4 q2) { float4 q; @@ -23,40 +26,14 @@ float4 quatMult(float4 q1, float4 q2) return q; } -float4 quatNorm(float4 q) -{ - float len = native_sqrt(dot(q, q)); - if(len > 0.f) - { - q *= 1.f / len; - } - else - { - q.x = q.y = q.z = 0.f; - q.w = 1.f; - } - return q; -} -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; __kernel void - integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) + integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) { int nodeID = get_global_id(0); float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); @@ -92,7 +69,7 @@ __kernel void float4 orn0 = bodies[nodeID].m_quat; float4 predictedOrn = quatMult(dorn, orn0); - predictedOrn = quatNorm(predictedOrn); + predictedOrn = b3QuatNormalize(predictedOrn); bodies[nodeID].m_quat=predictedOrn; } diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h index 148c053c8..360478e13 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h @@ -12,17 +12,88 @@ static const char* integrateKernelCL= \ "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Erwin Coumans\n" -"float4 quatMult(float4 q1, float4 q2)\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#define B3_RIGIDBODY_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" "{\n" -" float4 q;\n" -" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n" -" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n" -" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n" -" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n" -" return q;\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" "}\n" -"float4 quatNorm(float4 q)\n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" "{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"{\n" +" //return b3FastNormalize4(in);\n" " float len = native_sqrt(dot(q, q));\n" " if(len > 0.f)\n" " {\n" @@ -35,19 +106,184 @@ static const char* integrateKernelCL= \ " }\n" " return q;\n" "}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" "typedef struct\n" "{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" unsigned int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +"struct b3RigidBodyData\n" +"{\n" +" b3Float4 m_pos;\n" +" b3Quat m_quat;\n" +" b3Float4 m_linVel;\n" +" b3Float4 m_angVel;\n" +" int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"};\n" +"typedef struct b3InertiaData b3InertiaData_t;\n" +"struct b3InertiaData\n" +"{\n" +" b3Mat3x3 m_invInertiaWorld;\n" +" b3Mat3x3 m_initInvInertia;\n" +"};\n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"float4 quatMult(float4 q1, float4 q2)\n" +"{\n" +" float4 q;\n" +" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n" +" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n" +" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n" +" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n" +" return q;\n" +"}\n" "__kernel void \n" -" integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" +" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" "{\n" " int nodeID = get_global_id(0);\n" " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" @@ -82,7 +318,7 @@ static const char* integrateKernelCL= \ " dorn.w = native_cos(fAngle * timeStep * 0.5f);\n" " float4 orn0 = bodies[nodeID].m_quat;\n" " float4 predictedOrn = quatMult(dorn, orn0);\n" -" predictedOrn = quatNorm(predictedOrn);\n" +" predictedOrn = b3QuatNormalize(predictedOrn);\n" " bodies[nodeID].m_quat=predictedOrn;\n" " }\n" " //linear velocity \n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl index ed3d70526..8e2de7b5a 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl @@ -14,7 +14,7 @@ subject to the following restrictions: */ //Originally written by Takahiro Harada -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" +#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" #pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable @@ -171,238 +171,13 @@ float4 createEquation(const float4 a, const float4 b, const float4 c) return eqn; } -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - -__inline -Matrix3x3 qtGetRotationMatrix(Quaternion q); - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -Matrix3x3 qtGetRotationMatrix(Quaternion quat) -{ - float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); - Matrix3x3 out; - - out.m_row[0].x=1-2*quat2.y-2*quat2.z; - out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z; - out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y; - out.m_row[0].w = 0.f; - - out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z; - out.m_row[1].y=1-2*quat2.x-2*quat2.z; - out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x; - out.m_row[1].w = 0.f; - - out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y; - out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x; - out.m_row[2].z=1-2*quat2.x-2*quat2.y; - out.m_row[2].w = 0.f; - - return out; -} - - #define WG_SIZE 64 -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; @@ -425,31 +200,6 @@ typedef struct } ConstBufferBatchSolve; -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = make_float4(n.xyz,0.f); - *angular0 = cross3(r0, n); - *angular1 = -cross3(r1, n); -} - - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} - @@ -470,133 +220,8 @@ typedef struct -typedef struct -{ - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; -} ConstBufferSSD; -void btPlaneSpace1 (float4 n, float4* p, float4* q); - void btPlaneSpace1 (float4 n, float4* p, float4* q) -{ - if (fabs(n.z) > 0.70710678f) { - // choose p in y-z plane - float a = n.y*n.y + n.z*n.z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n.z*k; - p[0].z = n.y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n.x*p[0].z; - q[0].z = n.x*p[0].y; - } - else { - // choose p in x-y plane - float a = n.x*n.x + n.y*n.y; - float k = 1.f/sqrt(a); - p[0].x = -n.y*k; - p[0].y = n.x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n.z*p[0].y; - q[0].y = n.z*p[0].x; - q[0].z = a*k; - } -} - - -void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, - const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff, - Constraint4* dstC ) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f/dt; - for(int ic=0; ic<4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); - for(int ic=0; ic<4; ic++) - { - float4 r0 = src->m_worldPosB[ic] - posA; - float4 r1 = src->m_worldPosB[ic] - posB; - - if( ic >= src->m_worldNormalOnB.w )//npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - float4 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f;//src->getRestituitionCoeff(); - if( relVelN*relVelN < 0.004f ) e = 0.f; - - dstC->m_b[ic] = e*relVelN; - //float penetration = src->m_worldPosB[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if( src->m_worldNormalOnB.w > 0 )//npoints - { // prepare friction - float4 center = make_float4(0.f); - for(int i=0; im_worldNormalOnB.w; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB.w; - - float4 tangent[2]; - btPlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]); - - float4 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for(int i=0; i<2; i++) - { - float4 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for(int i=0; i<4; i++) - { - if( im_worldNormalOnB.w ) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} typedef struct { @@ -608,7 +233,7 @@ typedef struct __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, +void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, int nContacts, float dt, float positionDrift, @@ -626,15 +251,15 @@ float positionConstraintCoeff float4 linVelA = gBodies[aIdx].m_linVel; float4 angVelA = gBodies[aIdx].m_angVel; float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; + b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; float4 posB = gBodies[bIdx].m_pos; float4 linVelB = gBodies[bIdx].m_linVel; float4 angVelB = gBodies[bIdx].m_angVel; float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; + b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - Constraint4 cs; + b3ContactConstraint4_t cs; setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index 0f70191b0..226607745 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -80,6 +80,385 @@ static const char* solverSetupCL= \ " contact->m_worldNormalOnB.w = (float)numPoints;\n" "};\n" "#endif //B3_CONTACT4DATA_H\n" +"#ifndef B3_CONTACT_CONSTRAINT5_H\n" +"#define B3_CONTACT_CONSTRAINT5_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" +"struct b3ContactConstraint4\n" +"{\n" +" b3Float4 m_linear;//normal?\n" +" b3Float4 m_worldPos[4];\n" +" b3Float4 m_center; // friction\n" +" float m_jacCoeffInv[4];\n" +" float m_b[4];\n" +" float m_appliedRambdaDt[4];\n" +" float m_fJacCoeffInv[2]; // friction\n" +" float m_fAppliedRambdaDt[2]; // friction\n" +" unsigned int m_bodyA;\n" +" unsigned int m_bodyB;\n" +" int m_batchIdx;\n" +" unsigned int m_paddings;\n" +"};\n" +"//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" +"inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" +"{\n" +" return constraint->m_linear.w; \n" +"}\n" +"#endif //B3_CONTACT_CONSTRAINT5_H\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#define B3_RIGIDBODY_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"{\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +"struct b3RigidBodyData\n" +"{\n" +" b3Float4 m_pos;\n" +" b3Quat m_quat;\n" +" b3Float4 m_linVel;\n" +" b3Float4 m_angVel;\n" +" int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"};\n" +"typedef struct b3InertiaData b3InertiaData_t;\n" +"struct b3InertiaData\n" +"{\n" +" b3Mat3x3 m_invInertiaWorld;\n" +" b3Mat3x3 m_initInvInertia;\n" +"};\n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" +" void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" +"{\n" +" if (b3Fabs(n.z) > 0.70710678f) {\n" +" // choose p in y-z plane\n" +" float a = n.y*n.y + n.z*n.z;\n" +" float k = 1.f/sqrt(a);\n" +" p[0].x = 0;\n" +" p[0].y = -n.z*k;\n" +" p[0].z = n.y*k;\n" +" // set q = n x p\n" +" q[0].x = a*k;\n" +" q[0].y = -n.x*p[0].z;\n" +" q[0].z = n.x*p[0].y;\n" +" }\n" +" else {\n" +" // choose p in x-y plane\n" +" float a = n.x*n.x + n.y*n.y;\n" +" float k = 1.f/sqrt(a);\n" +" p[0].x = -n.y*k;\n" +" p[0].y = n.x*k;\n" +" p[0].z = 0;\n" +" // set q = n x p\n" +" q[0].x = -n.z*p[0].y;\n" +" q[0].y = n.z*p[0].x;\n" +" q[0].z = a*k;\n" +" }\n" +"}\n" +" \n" +"void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" +"{\n" +" *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" +" *angular0 = b3Cross3(r0, n);\n" +" *angular1 = -b3Cross3(r1, n);\n" +"}\n" +"float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" +" b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" +"{\n" +" return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" +"}\n" +"float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" +" float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" +"{\n" +" // linear0,1 are normlized\n" +" float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" +" float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" +" float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" +" float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" +" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" +"}\n" +"void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" +" b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" +" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" +" b3ContactConstraint4_t* dstC )\n" +"{\n" +" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" +" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" +" float dtInv = 1.f/dt;\n" +" for(int ic=0; ic<4; ic++)\n" +" {\n" +" dstC->m_appliedRambdaDt[ic] = 0.f;\n" +" }\n" +" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" +" dstC->m_linear = src->m_worldNormalOnB;\n" +" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" +" for(int ic=0; ic<4; ic++)\n" +" {\n" +" b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" +" b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" +" if( ic >= src->m_worldNormalOnB.w )//npoints\n" +" {\n" +" dstC->m_jacCoeffInv[ic] = 0.f;\n" +" continue;\n" +" }\n" +" float relVelN;\n" +" {\n" +" b3Float4 linear, angular0, angular1;\n" +" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" +" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" +" linVelA, angVelA, linVelB, angVelB);\n" +" float e = 0.f;//src->getRestituitionCoeff();\n" +" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" +" dstC->m_b[ic] = e*relVelN;\n" +" //float penetration = src->m_worldPosB[ic].w;\n" +" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" +" dstC->m_appliedRambdaDt[ic] = 0.f;\n" +" }\n" +" }\n" +" if( src->m_worldNormalOnB.w > 0 )//npoints\n" +" { // prepare friction\n" +" b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" +" for(int i=0; im_worldNormalOnB.w; i++) \n" +" center += src->m_worldPosB[i];\n" +" center /= (float)src->m_worldNormalOnB.w;\n" +" b3Float4 tangent[2];\n" +" b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" +" \n" +" b3Float4 r[2];\n" +" r[0] = center - posA;\n" +" r[1] = center - posB;\n" +" for(int i=0; i<2; i++)\n" +" {\n" +" b3Float4 linear, angular0, angular1;\n" +" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" +" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" +" }\n" +" dstC->m_center = center;\n" +" }\n" +" for(int i=0; i<4; i++)\n" +" {\n" +" if( im_worldNormalOnB.w )\n" +" {\n" +" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" +" }\n" +" else\n" +" {\n" +" dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" +" }\n" +" }\n" +"}\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" @@ -208,197 +587,9 @@ static const char* solverSetupCL= \ " eqn.w = -dot3F4(eqn,a);\n" " return eqn;\n" "}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"Matrix3x3 mtZero();\n" -"__inline\n" -"Matrix3x3 mtIdentity();\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"Matrix3x3 mtZero()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(0.f);\n" -" m.m_row[1] = (float4)(0.f);\n" -" m.m_row[2] = (float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtIdentity()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(1,0,0,0);\n" -" m.m_row[1] = (float4)(0,1,0,0);\n" -" m.m_row[2] = (float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" -"{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Matrix3x3 qtGetRotationMatrix(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n" -"{\n" -" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" Matrix3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" "#define WG_SIZE 64\n" "typedef struct\n" "{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" " int m_nConstraints;\n" " int m_start;\n" " int m_batchIdx;\n" @@ -413,26 +604,6 @@ static const char* solverSetupCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBufferBatchSolve;\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = make_float4(n.xyz,0.f);\n" -" *angular0 = cross3(r0, n);\n" -" *angular1 = -cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" " \n" "typedef struct \n" "{\n" @@ -448,122 +619,13 @@ static const char* solverSetupCL= \ "typedef struct\n" "{\n" " int m_nContacts;\n" -" int m_staticIdx;\n" -" float m_scale;\n" -" int m_nSplit;\n" -"} ConstBufferSSD;\n" -"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" -" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n.z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n.y*n.y + n.z*n.z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n.z*k;\n" -" p[0].z = n.y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n.x*p[0].z;\n" -" q[0].z = n.x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n.x*n.x + n.y*n.y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n.y*k;\n" -" p[0].y = n.x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n.z*p[0].y;\n" -" q[0].y = n.z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" -" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" -" Constraint4* dstC )\n" -"{\n" -" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" -" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -" float dtInv = 1.f/dt;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -" dstC->m_linear = src->m_worldNormalOnB;\n" -" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" float4 r0 = src->m_worldPosB[ic] - posA;\n" -" float4 r1 = src->m_worldPosB[ic] - posB;\n" -" if( ic >= src->m_worldNormalOnB.w )//npoints\n" -" {\n" -" dstC->m_jacCoeffInv[ic] = 0.f;\n" -" continue;\n" -" }\n" -" float relVelN;\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" -" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB);\n" -" float e = 0.f;//src->getRestituitionCoeff();\n" -" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -" dstC->m_b[ic] = e*relVelN;\n" -" //float penetration = src->m_worldPosB[ic].w;\n" -" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" }\n" -" if( src->m_worldNormalOnB.w > 0 )//npoints\n" -" { // prepare friction\n" -" float4 center = make_float4(0.f);\n" -" for(int i=0; im_worldNormalOnB.w; i++) \n" -" center += src->m_worldPosB[i];\n" -" center /= (float)src->m_worldNormalOnB.w;\n" -" float4 tangent[2];\n" -" btPlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" -" \n" -" float4 r[2];\n" -" r[0] = center - posA;\n" -" r[1] = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" -" }\n" -" dstC->m_center = center;\n" -" }\n" -" for(int i=0; i<4; i++)\n" -" {\n" -" if( im_worldNormalOnB.w )\n" -" {\n" -" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" -" }\n" -" else\n" -" {\n" -" dstC->m_worldPos[i] = make_float4(0.f);\n" -" }\n" -" }\n" -"}\n" -"typedef struct\n" -"{\n" -" int m_nContacts;\n" " float m_dt;\n" " float m_positionDrift;\n" " float m_positionConstraintCoeff;\n" "} ConstBufferCTC;\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n" +"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" "int nContacts,\n" "float dt,\n" "float positionDrift,\n" @@ -580,13 +642,13 @@ static const char* solverSetupCL= \ " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" +" b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" Constraint4 cs;\n" +" b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" +" b3ContactConstraint4_t cs;\n" " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" " &cs );\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index 6964e9e2a..d66ccdd36 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -85,7 +85,18 @@ static const char* updateAabbsKernelCL= \ "}\n" "inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "{\n" -" return b3FastNormalize4(in);\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" "}\n" "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" "{\n" @@ -114,13 +125,13 @@ static const char* updateAabbsKernelCL= \ "#else\n" "typedef struct\n" "{\n" -" float4 m_row[3];\n" +" b3Float4 m_row[3];\n" "}b3Mat3x3;\n" "#define b3Mat3x3ConstArg const b3Mat3x3\n" "#define b3GetRow(m,row) (m.m_row[row])\n" "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" "{\n" -" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" " b3Mat3x3 out;\n" " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" @@ -144,6 +155,87 @@ static const char* updateAabbsKernelCL= \ " out.m_row[2] = fabs(matIn.m_row[2]);\n" " return out;\n" "}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" "#endif\n" "#endif //B3_MAT3x3_H\n" "typedef struct b3Aabb b3Aabb_t;\n" @@ -260,6 +352,11 @@ static const char* updateAabbsKernelCL= \ "#else\n" "#endif \n" "#endif //B3_QUAT_H\n" +"#ifndef B3_MAT3x3_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" "struct b3RigidBodyData\n" "{\n" @@ -272,7 +369,12 @@ static const char* updateAabbsKernelCL= \ " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "};\n" -" \n" +"typedef struct b3InertiaData b3InertiaData_t;\n" +"struct b3InertiaData\n" +"{\n" +" b3Mat3x3 m_invInertiaWorld;\n" +" b3Mat3x3 m_initInvInertia;\n" +"};\n" "#endif //B3_RIGIDBODY_DATA_H\n" " \n" "void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n"