share more data structures and code between OpenCL and C/C++ on CPU:

move the setConstraint4/b3ConvertConstraint4 to shared code.
This commit is contained in:
erwin coumans
2013-11-07 12:46:01 -08:00
parent a9a758dd54
commit e20cb22832
16 changed files with 1249 additions and 908 deletions

View File

@@ -34,11 +34,9 @@ B3_ATTRIBUTE_ALIGNED16(struct) b3RigidBodyCL : public b3RigidBodyData
};
struct b3InertiaCL
struct b3InertiaCL : public b3InertiaData
{
b3Matrix3x3 m_invInertiaWorld;
b3Matrix3x3 m_initInvInertia;
};
#endif//B3_RIGID_BODY_CL

View File

@@ -3,6 +3,7 @@
#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Common/shared/b3Quat.h"
#include "Bullet3Common/shared/b3Mat3x3.h"
typedef struct b3RigidBodyData b3RigidBodyData_t;
@@ -19,7 +20,15 @@ struct b3RigidBodyData
float m_restituitionCoeff;
float m_frictionCoeff;
};
typedef struct b3InertiaData b3InertiaData_t;
struct b3InertiaData
{
b3Mat3x3 m_invInertiaWorld;
b3Mat3x3 m_initInvInertia;
};
#endif //B3_RIGIDBODY_DATA_H

View File

@@ -24,11 +24,18 @@ inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
#define b3GetRow(m,row) m.getRow(row)
__inline
b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b)
{
return b*a;
}
#else
typedef struct
{
float4 m_row[3];
b3Float4 m_row[3];
}b3Mat3x3;
#define b3Mat3x3ConstArg const b3Mat3x3
@@ -36,7 +43,7 @@ typedef struct
inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
{
float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
b3Mat3x3 out;
out.m_row[0].x=1-2*quat2.y-2*quat2.z;
@@ -66,10 +73,107 @@ inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
return out;
}
__inline
b3Mat3x3 mtZero();
__inline
b3Mat3x3 mtIdentity();
__inline
b3Mat3x3 mtTranspose(b3Mat3x3 m);
__inline
b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);
__inline
b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);
__inline
b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);
__inline
b3Mat3x3 mtZero()
{
b3Mat3x3 m;
m.m_row[0] = (b3Float4)(0.f);
m.m_row[1] = (b3Float4)(0.f);
m.m_row[2] = (b3Float4)(0.f);
return m;
}
__inline
b3Mat3x3 mtIdentity()
{
b3Mat3x3 m;
m.m_row[0] = (b3Float4)(1,0,0,0);
m.m_row[1] = (b3Float4)(0,1,0,0);
m.m_row[2] = (b3Float4)(0,0,1,0);
return m;
}
__inline
b3Mat3x3 mtTranspose(b3Mat3x3 m)
{
b3Mat3x3 out;
out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
return out;
}
__inline
b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)
{
b3Mat3x3 transB;
transB = mtTranspose( b );
b3Mat3x3 ans;
// why this doesn't run when 0ing in the for{}
a.m_row[0].w = 0.f;
a.m_row[1].w = 0.f;
a.m_row[2].w = 0.f;
for(int i=0; i<3; i++)
{
// a.m_row[i].w = 0.f;
ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);
ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);
ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);
ans.m_row[i].w = 0.f;
}
return ans;
}
__inline
b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)
{
b3Float4 ans;
ans.x = b3Dot3F4( a.m_row[0], b );
ans.y = b3Dot3F4( a.m_row[1], b );
ans.z = b3Dot3F4( a.m_row[2], b );
ans.w = 0.f;
return ans;
}
__inline
b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)
{
b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
b3Float4 ans;
ans.x = b3Dot3F4( a, colx );
ans.y = b3Dot3F4( a, coly );
ans.z = b3Dot3F4( a, colz );
return ans;
}
#endif
#endif //B3_MAT3x3_H

View File

@@ -46,7 +46,18 @@ inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
inline b3Quat b3QuatNormalize(b3QuatConstArg in)
{
return b3FastNormalize4(in);
//return b3FastNormalize4(in);
float len = native_sqrt(dot(q, q));
if(len > 0.f)
{
q *= 1.f / len;
}
else
{
q.x = q.y = q.z = 0.f;
q.w = 1.f;
}
return q;
}
inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
{

View File

@@ -5,6 +5,7 @@
typedef struct b3ContactConstraint4 b3ContactConstraint4_t;
struct b3ContactConstraint4
{
@@ -25,9 +26,9 @@ struct b3ContactConstraint4
};
//inline void setFrictionCoeff(float value) { m_linear[3] = value; }
inline float b3GetFrictionCoeff(b3ContactConstraint4* constraint)
inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint)
{
return constraint->m_linear[3];
return constraint->m_linear.w;
}
#endif //B3_CONTACT_CONSTRAINT5_H

View File

@@ -0,0 +1,153 @@
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);
void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)
{
if (b3Fabs(n.z) > 0.70710678f) {
// choose p in y-z plane
float a = n.y*n.y + n.z*n.z;
float k = 1.f/sqrt(a);
p[0].x = 0;
p[0].y = -n.z*k;
p[0].z = n.y*k;
// set q = n x p
q[0].x = a*k;
q[0].y = -n.x*p[0].z;
q[0].z = n.x*p[0].y;
}
else {
// choose p in x-y plane
float a = n.x*n.x + n.y*n.y;
float k = 1.f/sqrt(a);
p[0].x = -n.y*k;
p[0].y = n.x*k;
p[0].z = 0;
// set q = n x p
q[0].x = -n.z*p[0].y;
q[0].y = n.z*p[0].x;
q[0].z = a*k;
}
}
void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)
{
*linear = b3MakeFloat4(n.x,n.y,n.z,0.f);
*angular0 = b3Cross3(r0, n);
*angular1 = -b3Cross3(r1, n);
}
float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,
b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )
{
return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);
}
float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,
float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)
{
// linear0,1 are normlized
float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;
float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);
float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;
float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);
return -1.f/(jmj0+jmj1+jmj2+jmj3);
}
void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,
b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB,
__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
b3ContactConstraint4_t* dstC )
{
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
float dtInv = 1.f/dt;
for(int ic=0; ic<4; ic++)
{
dstC->m_appliedRambdaDt[ic] = 0.f;
}
dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
dstC->m_linear = src->m_worldNormalOnB;
dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
for(int ic=0; ic<4; ic++)
{
b3Float4 r0 = src->m_worldPosB[ic] - posA;
b3Float4 r1 = src->m_worldPosB[ic] - posB;
if( ic >= src->m_worldNormalOnB.w )//npoints
{
dstC->m_jacCoeffInv[ic] = 0.f;
continue;
}
float relVelN;
{
b3Float4 linear, angular0, angular1;
setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
relVelN = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB);
float e = 0.f;//src->getRestituitionCoeff();
if( relVelN*relVelN < 0.004f ) e = 0.f;
dstC->m_b[ic] = e*relVelN;
//float penetration = src->m_worldPosB[ic].w;
dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
dstC->m_appliedRambdaDt[ic] = 0.f;
}
}
if( src->m_worldNormalOnB.w > 0 )//npoints
{ // prepare friction
b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);
for(int i=0; i<src->m_worldNormalOnB.w; i++)
center += src->m_worldPosB[i];
center /= (float)src->m_worldNormalOnB.w;
b3Float4 tangent[2];
b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);
b3Float4 r[2];
r[0] = center - posA;
r[1] = center - posB;
for(int i=0; i<2; i++)
{
b3Float4 linear, angular0, angular1;
setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
dstC->m_fAppliedRambdaDt[i] = 0.f;
}
dstC->m_center = center;
}
for(int i=0; i<4; i++)
{
if( i<src->m_worldNormalOnB.w )
{
dstC->m_worldPos[i] = src->m_worldPosB[i];
}
else
{
dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);
}
}
}

View File

@@ -224,7 +224,18 @@ static const char* satKernelsCL= \
"}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n"
"{\n"
" return b3FastNormalize4(in);\n"
" //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n"
" {\n"
" q *= 1.f / len;\n"
" }\n"
" else\n"
" {\n"
" q.x = q.y = q.z = 0.f;\n"
" q.w = 1.f;\n"
" }\n"
" return q;\n"
"}\n"
"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
@@ -253,13 +264,13 @@ static const char* satKernelsCL= \
"#else\n"
"typedef struct\n"
"{\n"
" float4 m_row[3];\n"
" b3Float4 m_row[3];\n"
"}b3Mat3x3;\n"
"#define b3Mat3x3ConstArg const b3Mat3x3\n"
"#define b3GetRow(m,row) (m.m_row[row])\n"
"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
"{\n"
" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Mat3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
@@ -283,6 +294,87 @@ static const char* satKernelsCL= \
" out.m_row[2] = fabs(matIn.m_row[2]);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtZero();\n"
"__inline\n"
"b3Mat3x3 mtIdentity();\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Mat3x3 mtZero()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(0.f);\n"
" m.m_row[1] = (b3Float4)(0.f);\n"
" m.m_row[2] = (b3Float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtIdentity()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(1,0,0,0);\n"
" m.m_row[1] = (b3Float4)(0,1,0,0);\n"
" m.m_row[2] = (b3Float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
"{\n"
" b3Mat3x3 transB;\n"
" transB = mtTranspose( b );\n"
" b3Mat3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
"{\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a.m_row[0], b );\n"
" ans.y = b3Dot3F4( a.m_row[1], b );\n"
" ans.z = b3Dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
"{\n"
" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a, colx );\n"
" ans.y = b3Dot3F4( a, coly );\n"
" ans.z = b3Dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3Aabb b3Aabb_t;\n"

View File

@@ -1,15 +1,32 @@
bool b3GpuBatchContacts = true;
bool b3GpuSolveConstraint = true;
bool gpuRadixSort=true;
bool gpuSetSortData = true;
//#define USE_CPU
#ifdef USE_CPU
bool b3GpuBatchContacts = false;
bool b3GpuSolveConstraint = false;
bool gpuRadixSort=false;
bool gpuSetSortData = false;
bool optionalSortContactsDeterminism = true;
bool gpuSortContactsDeterminism = true;
bool useCpuCopyConstraints = false;
bool optionalSortContactsDeterminism = true;
bool gpuSortContactsDeterminism = false;
bool useCpuCopyConstraints = true;
bool useScanHost = false;
bool reorderContactsOnCpu = false;
bool useScanHost = true;
bool reorderContactsOnCpu = true;
#else
bool b3GpuBatchContacts = true;
bool b3GpuSolveConstraint = true;
bool gpuRadixSort=true;
bool gpuSetSortData = true;
bool optionalSortContactsDeterminism = true;
bool gpuSortContactsDeterminism = true;
bool useCpuCopyConstraints = false;
bool useScanHost = false;
bool reorderContactsOnCpu = false;
#endif
#include "b3GpuBatchingPgsSolver.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"

View File

@@ -3,24 +3,13 @@
#define B3_CONSTRAINT4_h
#include "Bullet3Common/b3Vector3.h"
B3_ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4
#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
B3_ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4 : public b3ContactConstraint4
{
B3_DECLARE_ALIGNED_ALLOCATOR();
b3Vector3 m_linear;//normal?
b3Vector3 m_worldPos[4];
b3Vector3 m_center; // friction
float m_jacCoeffInv[4];
float m_b[4];
float m_appliedRambdaDt[4];
float m_fJacCoeffInv[2]; // friction
float m_fAppliedRambdaDt[2]; // friction
unsigned int m_bodyA;
unsigned int m_bodyB;
int m_batchIdx;
unsigned int m_paddings;
inline void setFrictionCoeff(float value) { m_linear[3] = value; }
inline float getFrictionCoeff() const { return m_linear[3]; }
};

View File

@@ -33,11 +33,21 @@ subject to the following restrictions:
#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
bool useDbvt = false;//true;
bool useBullet2CpuSolver = true;
bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = false;//true;
bool useCalculateOverlappingPairsHost = false;
//#define USE_CPU
#ifdef USE_CPU
bool useDbvt = true;
bool useBullet2CpuSolver = true;
bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = true;
bool useCalculateOverlappingPairsHost = true;
#else
bool useDbvt = false;//true;
bool useBullet2CpuSolver = true;
bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = false;//true;
bool useCalculateOverlappingPairsHost = false;
#endif
#ifdef TEST_OTHER_GPU_SOLVER
#include "b3GpuJacobiSolver.h"

View File

@@ -18,6 +18,7 @@ subject to the following restrictions:
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
bool useNewBatchingKernel = false;
bool convertConstraintOnCpu = false;//true;
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"
@@ -26,6 +27,7 @@ bool useNewBatchingKernel = false;
#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl"
#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl"
#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h"
#include "kernels/solverSetup.h"
#include "kernels/solverSetup2.h"
@@ -203,103 +205,6 @@ b3Solver::~b3Solver()
/*void b3Solver::reorderConvertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
const b3OpenCLArray<b3InertiaCL>* shapeBuf,
b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
int nContacts, const b3Solver::ConstraintCfg& cfg )
{
if( m_contactBuffer )
{
m_contactBuffer->resize(nContacts);
}
if( m_contactBuffer == 0 )
{
B3_PROFILE("new m_contactBuffer;");
m_contactBuffer = new b3OpenCLArray<b3Contact4>(m_context,m_queue,nContacts );
m_contactBuffer->resize(nContacts);
}
//DeviceUtils::Config dhCfg;
//Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
if( cfg.m_enableParallelSolve )
{
clFinish(m_queue);
// contactsIn -> m_contactBuffer
{
B3_PROFILE("sortContacts");
sortContacts( bodyBuf, contactsIn, additionalData, nContacts, cfg );
clFinish(m_queue);
}
{
B3_PROFILE("m_copyConstraintKernel");
b3Int4 cdata; cdata.x = nContacts;
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( contactsIn->getBufferCL() ) };
// b3LauncherCL launcher( m_queue, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
b3LauncherCL launcher( m_queue, m_copyConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
launcher.launch1D( nContacts, 64 );
clFinish(m_queue);
}
{
B3_PROFILE("batchContacts");
b3Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
}
}
{
B3_PROFILE("waitForCompletion (batchContacts)");
clFinish(m_queue);
}
//================
{
B3_PROFILE("convertToConstraints");
b3Solver::convertToConstraints( bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
}
{
B3_PROFILE("convertToConstraints waitForCompletion");
clFinish(m_queue);
}
}
*/
static
inline
float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
{
return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
}
static
inline
void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
{
linear = -n;
angular0 = -b3Cross(r0, n);
angular1 = b3Cross(r1, n);
}
template<bool JACOBI>
static
__inline
@@ -323,7 +228,7 @@ void solveContact(b3GpuConstraint4& cs,
b3Vector3 angular0, angular1, linear;
b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
setLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 );
setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, &linear, &angular0, &angular1 );
float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
@@ -407,7 +312,7 @@ void solveContact(b3GpuConstraint4& cs,
b3Vector3 r1 = center - posB;
for(int i=0; i<2; i++)
{
setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );
float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB );
rambdaDt *= cs.m_fJacCoeffInv[i];
@@ -1066,28 +971,78 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
};
{
B3_PROFILE("m_contactToConstraintKernel");
CB cdata;
cdata.m_nContacts = nContacts;
cdata.m_dt = cfg.m_dt;
cdata.m_positionDrift = cfg.m_positionDrift;
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
b3BufferInfoCL( contactCOut->getBufferCL() )};
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
//launcher.setConst( cdata );
launcher.setConst(cdata.m_nContacts);
launcher.setConst(cdata.m_dt);
launcher.setConst(cdata.m_positionDrift);
launcher.setConst(cdata.m_positionConstraintCoeff);
launcher.launch1D( nContacts, 64 );
clFinish(m_queue);
b3AlignedObjectArray<b3RigidBodyCL> gBodies;
bodyBuf->copyToHost(gBodies);
b3AlignedObjectArray<b3Contact4> gContact;
contactsIn->copyToHost(gContact);
b3AlignedObjectArray<b3InertiaCL> gShapes;
shapeBuf->copyToHost(gShapes);
b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut;
gConstraintOut.resize(nContacts);
if (convertConstraintOnCpu)
{
B3_PROFILE("cpu contactToConstraintKernel");
for (int gIdx=0;gIdx<nContacts;gIdx++)
{
int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
b3Float4 posA = gBodies[aIdx].m_pos;
b3Float4 linVelA = gBodies[aIdx].m_linVel;
b3Float4 angVelA = gBodies[aIdx].m_angVel;
float invMassA = gBodies[aIdx].m_invMass;
b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;
b3Float4 posB = gBodies[bIdx].m_pos;
b3Float4 linVelB = gBodies[bIdx].m_linVel;
b3Float4 angVelB = gBodies[bIdx].m_angVel;
float invMassB = gBodies[bIdx].m_invMass;
b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;
b3ContactConstraint4_t cs;
setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
&gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff,
&cs );
cs.m_batchIdx = gContact[gIdx].m_batchIdx;
gConstraintOut[gIdx] = (b3GpuConstraint4&)cs;
}
contactCOut->copyFromHost(gConstraintOut);
} else
{
B3_PROFILE("gpu m_contactToConstraintKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
b3BufferInfoCL( contactCOut->getBufferCL() )};
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
//launcher.setConst( cdata );
launcher.setConst(cdata.m_nContacts);
launcher.setConst(cdata.m_dt);
launcher.setConst(cdata.m_positionDrift);
launcher.setConst(cdata.m_positionConstraintCoeff);
launcher.launch1D( nContacts, 64 );
clFinish(m_queue);
}
}

View File

@@ -13,6 +13,9 @@ subject to the following restrictions:
*/
//Originally written by Erwin Coumans
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
float4 quatMult(float4 q1, float4 q2)
{
float4 q;
@@ -23,40 +26,14 @@ float4 quatMult(float4 q1, float4 q2)
return q;
}
float4 quatNorm(float4 q)
{
float len = native_sqrt(dot(q, q));
if(len > 0.f)
{
q *= 1.f / len;
}
else
{
q.x = q.y = q.z = 0.f;
q.w = 1.f;
}
return q;
}
typedef struct
{
float4 m_pos;
float4 m_quat;
float4 m_linVel;
float4 m_angVel;
unsigned int m_collidableIdx;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
} Body;
__kernel void
integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)
integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)
{
int nodeID = get_global_id(0);
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
@@ -92,7 +69,7 @@ __kernel void
float4 orn0 = bodies[nodeID].m_quat;
float4 predictedOrn = quatMult(dorn, orn0);
predictedOrn = quatNorm(predictedOrn);
predictedOrn = b3QuatNormalize(predictedOrn);
bodies[nodeID].m_quat=predictedOrn;
}

View File

@@ -12,17 +12,88 @@ static const char* integrateKernelCL= \
"3. This notice may not be removed or altered from any source distribution.\n"
"*/\n"
"//Originally written by Erwin Coumans\n"
"float4 quatMult(float4 q1, float4 q2)\n"
"#ifndef B3_RIGIDBODY_DATA_H\n"
"#define B3_RIGIDBODY_DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#define B3_FLOAT4_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#define B3_PLATFORM_DEFINITIONS_H\n"
"struct MyTest\n"
"{\n"
" float4 q;\n"
" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n"
" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n"
" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n"
" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n"
" return q;\n"
" int bla;\n"
"};\n"
"#ifdef __cplusplus\n"
"#else\n"
"#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n"
"#endif\n"
"#endif\n"
"#ifdef __cplusplus\n"
"#else\n"
" typedef float4 b3Float4;\n"
" #define b3Float4ConstArg const b3Float4\n"
" #define b3MakeFloat4 (float4)\n"
" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
" {\n"
" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return dot(a1, b1);\n"
" }\n"
" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
" {\n"
" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n"
" }\n"
"#endif \n"
" \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
"{\n"
" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n"
" return false;\n"
" return true;\n"
"}\n"
"float4 quatNorm(float4 q)\n"
"#endif //B3_FLOAT4_H\n"
"#ifndef B3_QUAT_H\n"
"#define B3_QUAT_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif\n"
"#endif\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
" typedef float4 b3Quat;\n"
" #define b3QuatConstArg const b3Quat\n"
" \n"
" \n"
"inline float4 b3FastNormalize4(float4 v)\n"
"{\n"
" v = (float4)(v.xyz,0.f);\n"
" return fast_normalize(v);\n"
"}\n"
" \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
"{\n"
" b3Quat ans;\n"
" ans = b3Cross3( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n"
"{\n"
" //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n"
" {\n"
@@ -35,19 +106,184 @@ static const char* integrateKernelCL= \
" }\n"
" return q;\n"
"}\n"
"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" b3Quat qInv = b3QuatInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
"{\n"
" return (b3Quat)(-q.xyz, q.w);\n"
"}\n"
"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" return b3QuatRotate( b3QuatInvert( q ), vec );\n"
"}\n"
"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n"
"{\n"
" return b3QuatRotate( orientation, point ) + (translation);\n"
"}\n"
" \n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifndef B3_MAT3x3_H\n"
"#define B3_MAT3x3_H\n"
"#ifndef B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"typedef struct\n"
"{\n"
" float4 m_pos;\n"
" float4 m_quat;\n"
" float4 m_linVel;\n"
" float4 m_angVel;\n"
" unsigned int m_collidableIdx;\n"
" float m_invMass;\n"
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"} Body;\n"
" b3Float4 m_row[3];\n"
"}b3Mat3x3;\n"
"#define b3Mat3x3ConstArg const b3Mat3x3\n"
"#define b3GetRow(m,row) (m.m_row[row])\n"
"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
"{\n"
" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Mat3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
" out.m_row[0].w = 0.f;\n"
" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
" out.m_row[1].w = 0.f;\n"
" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
" out.m_row[2].w = 0.f;\n"
" return out;\n"
"}\n"
"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = fabs(matIn.m_row[0]);\n"
" out.m_row[1] = fabs(matIn.m_row[1]);\n"
" out.m_row[2] = fabs(matIn.m_row[2]);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtZero();\n"
"__inline\n"
"b3Mat3x3 mtIdentity();\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Mat3x3 mtZero()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(0.f);\n"
" m.m_row[1] = (b3Float4)(0.f);\n"
" m.m_row[2] = (b3Float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtIdentity()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(1,0,0,0);\n"
" m.m_row[1] = (b3Float4)(0,1,0,0);\n"
" m.m_row[2] = (b3Float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
"{\n"
" b3Mat3x3 transB;\n"
" transB = mtTranspose( b );\n"
" b3Mat3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
"{\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a.m_row[0], b );\n"
" ans.y = b3Dot3F4( a.m_row[1], b );\n"
" ans.z = b3Dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
"{\n"
" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a, colx );\n"
" ans.y = b3Dot3F4( a, coly );\n"
" ans.z = b3Dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
"struct b3RigidBodyData\n"
"{\n"
" b3Float4 m_pos;\n"
" b3Quat m_quat;\n"
" b3Float4 m_linVel;\n"
" b3Float4 m_angVel;\n"
" int m_collidableIdx;\n"
" float m_invMass;\n"
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"};\n"
"typedef struct b3InertiaData b3InertiaData_t;\n"
"struct b3InertiaData\n"
"{\n"
" b3Mat3x3 m_invInertiaWorld;\n"
" b3Mat3x3 m_initInvInertia;\n"
"};\n"
"#endif //B3_RIGIDBODY_DATA_H\n"
" \n"
"float4 quatMult(float4 q1, float4 q2)\n"
"{\n"
" float4 q;\n"
" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n"
" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n"
" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n"
" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n"
" return q;\n"
"}\n"
"__kernel void \n"
" integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
"{\n"
" int nodeID = get_global_id(0);\n"
" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
@@ -82,7 +318,7 @@ static const char* integrateKernelCL= \
" dorn.w = native_cos(fAngle * timeStep * 0.5f);\n"
" float4 orn0 = bodies[nodeID].m_quat;\n"
" float4 predictedOrn = quatMult(dorn, orn0);\n"
" predictedOrn = quatNorm(predictedOrn);\n"
" predictedOrn = b3QuatNormalize(predictedOrn);\n"
" bodies[nodeID].m_quat=predictedOrn;\n"
" }\n"
" //linear velocity \n"

View File

@@ -14,7 +14,7 @@ subject to the following restrictions:
*/
//Originally written by Takahiro Harada
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h"
#pragma OPENCL EXTENSION cl_amd_printf : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
@@ -171,238 +171,13 @@ float4 createEquation(const float4 a, const float4 b, const float4 c)
return eqn;
}
///////////////////////////////////////
// Matrix3x3
///////////////////////////////////////
typedef struct
{
float4 m_row[3];
}Matrix3x3;
__inline
Matrix3x3 mtZero();
__inline
Matrix3x3 mtIdentity();
__inline
Matrix3x3 mtTranspose(Matrix3x3 m);
__inline
Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);
__inline
float4 mtMul1(Matrix3x3 a, float4 b);
__inline
float4 mtMul3(float4 a, Matrix3x3 b);
__inline
Matrix3x3 mtZero()
{
Matrix3x3 m;
m.m_row[0] = (float4)(0.f);
m.m_row[1] = (float4)(0.f);
m.m_row[2] = (float4)(0.f);
return m;
}
__inline
Matrix3x3 mtIdentity()
{
Matrix3x3 m;
m.m_row[0] = (float4)(1,0,0,0);
m.m_row[1] = (float4)(0,1,0,0);
m.m_row[2] = (float4)(0,0,1,0);
return m;
}
__inline
Matrix3x3 mtTranspose(Matrix3x3 m)
{
Matrix3x3 out;
out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
return out;
}
__inline
Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
{
Matrix3x3 transB;
transB = mtTranspose( b );
Matrix3x3 ans;
// why this doesn't run when 0ing in the for{}
a.m_row[0].w = 0.f;
a.m_row[1].w = 0.f;
a.m_row[2].w = 0.f;
for(int i=0; i<3; i++)
{
// a.m_row[i].w = 0.f;
ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
ans.m_row[i].w = 0.f;
}
return ans;
}
__inline
float4 mtMul1(Matrix3x3 a, float4 b)
{
float4 ans;
ans.x = dot3F4( a.m_row[0], b );
ans.y = dot3F4( a.m_row[1], b );
ans.z = dot3F4( a.m_row[2], b );
ans.w = 0.f;
return ans;
}
__inline
float4 mtMul3(float4 a, Matrix3x3 b)
{
float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
float4 ans;
ans.x = dot3F4( a, colx );
ans.y = dot3F4( a, coly );
ans.z = dot3F4( a, colz );
return ans;
}
///////////////////////////////////////
// Quaternion
///////////////////////////////////////
typedef float4 Quaternion;
__inline
Quaternion qtMul(Quaternion a, Quaternion b);
__inline
Quaternion qtNormalize(Quaternion in);
__inline
float4 qtRotate(Quaternion q, float4 vec);
__inline
Quaternion qtInvert(Quaternion q);
__inline
Matrix3x3 qtGetRotationMatrix(Quaternion q);
__inline
Quaternion qtMul(Quaternion a, Quaternion b)
{
Quaternion ans;
ans = cross3( a, b );
ans += a.w*b+b.w*a;
// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
ans.w = a.w*b.w - dot3F4(a, b);
return ans;
}
__inline
Quaternion qtNormalize(Quaternion in)
{
return fastNormalize4(in);
// in /= length( in );
// return in;
}
__inline
float4 qtRotate(Quaternion q, float4 vec)
{
Quaternion qInv = qtInvert( q );
float4 vcpy = vec;
vcpy.w = 0.f;
float4 out = qtMul(qtMul(q,vcpy),qInv);
return out;
}
__inline
Quaternion qtInvert(Quaternion q)
{
return (Quaternion)(-q.xyz, q.w);
}
__inline
float4 qtInvRotate(const Quaternion q, float4 vec)
{
return qtRotate( qtInvert( q ), vec );
}
__inline
Matrix3x3 qtGetRotationMatrix(Quaternion quat)
{
float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
Matrix3x3 out;
out.m_row[0].x=1-2*quat2.y-2*quat2.z;
out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;
out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;
out.m_row[0].w = 0.f;
out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;
out.m_row[1].y=1-2*quat2.x-2*quat2.z;
out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;
out.m_row[1].w = 0.f;
out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;
out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;
out.m_row[2].z=1-2*quat2.x-2*quat2.y;
out.m_row[2].w = 0.f;
return out;
}
#define WG_SIZE 64
typedef struct
{
float4 m_pos;
Quaternion m_quat;
float4 m_linVel;
float4 m_angVel;
u32 m_shapeIdx;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
} Body;
typedef struct
{
Matrix3x3 m_invInertia;
Matrix3x3 m_initInvInertia;
} Shape;
typedef struct
{
float4 m_linear;
float4 m_worldPos[4];
float4 m_center;
float m_jacCoeffInv[4];
float m_b[4];
float m_appliedRambdaDt[4];
float m_fJacCoeffInv[2];
float m_fAppliedRambdaDt[2];
u32 m_bodyA;
u32 m_bodyB;
int m_batchIdx;
u32 m_paddings[1];
} Constraint4;
@@ -425,31 +200,6 @@ typedef struct
} ConstBufferBatchSolve;
void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
{
*linear = make_float4(n.xyz,0.f);
*angular0 = cross3(r0, n);
*angular1 = -cross3(r1, n);
}
float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )
{
return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
}
float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)
{
// linear0,1 are normlized
float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
return -1.f/(jmj0+jmj1+jmj2+jmj3);
}
@@ -470,133 +220,8 @@ typedef struct
typedef struct
{
int m_nContacts;
int m_staticIdx;
float m_scale;
int m_nSplit;
} ConstBufferSSD;
void btPlaneSpace1 (float4 n, float4* p, float4* q);
void btPlaneSpace1 (float4 n, float4* p, float4* q)
{
if (fabs(n.z) > 0.70710678f) {
// choose p in y-z plane
float a = n.y*n.y + n.z*n.z;
float k = 1.f/sqrt(a);
p[0].x = 0;
p[0].y = -n.z*k;
p[0].z = n.y*k;
// set q = n x p
q[0].x = a*k;
q[0].y = -n.x*p[0].z;
q[0].z = n.x*p[0].y;
}
else {
// choose p in x-y plane
float a = n.x*n.x + n.y*n.y;
float k = 1.f/sqrt(a);
p[0].x = -n.y*k;
p[0].y = n.x*k;
p[0].z = 0;
// set q = n x p
q[0].x = -n.z*p[0].y;
q[0].y = n.z*p[0].x;
q[0].z = a*k;
}
}
void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB,
__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
Constraint4* dstC )
{
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
float dtInv = 1.f/dt;
for(int ic=0; ic<4; ic++)
{
dstC->m_appliedRambdaDt[ic] = 0.f;
}
dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
dstC->m_linear = src->m_worldNormalOnB;
dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
for(int ic=0; ic<4; ic++)
{
float4 r0 = src->m_worldPosB[ic] - posA;
float4 r1 = src->m_worldPosB[ic] - posB;
if( ic >= src->m_worldNormalOnB.w )//npoints
{
dstC->m_jacCoeffInv[ic] = 0.f;
continue;
}
float relVelN;
{
float4 linear, angular0, angular1;
setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
relVelN = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB);
float e = 0.f;//src->getRestituitionCoeff();
if( relVelN*relVelN < 0.004f ) e = 0.f;
dstC->m_b[ic] = e*relVelN;
//float penetration = src->m_worldPosB[ic].w;
dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
dstC->m_appliedRambdaDt[ic] = 0.f;
}
}
if( src->m_worldNormalOnB.w > 0 )//npoints
{ // prepare friction
float4 center = make_float4(0.f);
for(int i=0; i<src->m_worldNormalOnB.w; i++)
center += src->m_worldPosB[i];
center /= (float)src->m_worldNormalOnB.w;
float4 tangent[2];
btPlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);
float4 r[2];
r[0] = center - posA;
r[1] = center - posB;
for(int i=0; i<2; i++)
{
float4 linear, angular0, angular1;
setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
dstC->m_fAppliedRambdaDt[i] = 0.f;
}
dstC->m_center = center;
}
for(int i=0; i<4; i++)
{
if( i<src->m_worldNormalOnB.w )
{
dstC->m_worldPos[i] = src->m_worldPosB[i];
}
else
{
dstC->m_worldPos[i] = make_float4(0.f);
}
}
}
typedef struct
{
@@ -608,7 +233,7 @@ typedef struct
__kernel
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut,
void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut,
int nContacts,
float dt,
float positionDrift,
@@ -626,15 +251,15 @@ float positionConstraintCoeff
float4 linVelA = gBodies[aIdx].m_linVel;
float4 angVelA = gBodies[aIdx].m_angVel;
float invMassA = gBodies[aIdx].m_invMass;
Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;
float4 posB = gBodies[bIdx].m_pos;
float4 linVelB = gBodies[bIdx].m_linVel;
float4 angVelB = gBodies[bIdx].m_angVel;
float invMassB = gBodies[bIdx].m_invMass;
Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;
Constraint4 cs;
b3ContactConstraint4_t cs;
setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,

View File

@@ -80,6 +80,385 @@ static const char* solverSetupCL= \
" contact->m_worldNormalOnB.w = (float)numPoints;\n"
"};\n"
"#endif //B3_CONTACT4DATA_H\n"
"#ifndef B3_CONTACT_CONSTRAINT5_H\n"
"#define B3_CONTACT_CONSTRAINT5_H\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n"
"struct b3ContactConstraint4\n"
"{\n"
" b3Float4 m_linear;//normal?\n"
" b3Float4 m_worldPos[4];\n"
" b3Float4 m_center; // friction\n"
" float m_jacCoeffInv[4];\n"
" float m_b[4];\n"
" float m_appliedRambdaDt[4];\n"
" float m_fJacCoeffInv[2]; // friction\n"
" float m_fAppliedRambdaDt[2]; // friction\n"
" unsigned int m_bodyA;\n"
" unsigned int m_bodyB;\n"
" int m_batchIdx;\n"
" unsigned int m_paddings;\n"
"};\n"
"//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n"
"inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n"
"{\n"
" return constraint->m_linear.w; \n"
"}\n"
"#endif //B3_CONTACT_CONSTRAINT5_H\n"
"#ifndef B3_RIGIDBODY_DATA_H\n"
"#define B3_RIGIDBODY_DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifndef B3_QUAT_H\n"
"#define B3_QUAT_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif\n"
"#endif\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
" typedef float4 b3Quat;\n"
" #define b3QuatConstArg const b3Quat\n"
" \n"
" \n"
"inline float4 b3FastNormalize4(float4 v)\n"
"{\n"
" v = (float4)(v.xyz,0.f);\n"
" return fast_normalize(v);\n"
"}\n"
" \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
"{\n"
" b3Quat ans;\n"
" ans = b3Cross3( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n"
"{\n"
" //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n"
" {\n"
" q *= 1.f / len;\n"
" }\n"
" else\n"
" {\n"
" q.x = q.y = q.z = 0.f;\n"
" q.w = 1.f;\n"
" }\n"
" return q;\n"
"}\n"
"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" b3Quat qInv = b3QuatInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
"{\n"
" return (b3Quat)(-q.xyz, q.w);\n"
"}\n"
"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" return b3QuatRotate( b3QuatInvert( q ), vec );\n"
"}\n"
"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n"
"{\n"
" return b3QuatRotate( orientation, point ) + (translation);\n"
"}\n"
" \n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifndef B3_MAT3x3_H\n"
"#define B3_MAT3x3_H\n"
"#ifndef B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"typedef struct\n"
"{\n"
" b3Float4 m_row[3];\n"
"}b3Mat3x3;\n"
"#define b3Mat3x3ConstArg const b3Mat3x3\n"
"#define b3GetRow(m,row) (m.m_row[row])\n"
"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
"{\n"
" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Mat3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
" out.m_row[0].w = 0.f;\n"
" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
" out.m_row[1].w = 0.f;\n"
" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
" out.m_row[2].w = 0.f;\n"
" return out;\n"
"}\n"
"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = fabs(matIn.m_row[0]);\n"
" out.m_row[1] = fabs(matIn.m_row[1]);\n"
" out.m_row[2] = fabs(matIn.m_row[2]);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtZero();\n"
"__inline\n"
"b3Mat3x3 mtIdentity();\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Mat3x3 mtZero()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(0.f);\n"
" m.m_row[1] = (b3Float4)(0.f);\n"
" m.m_row[2] = (b3Float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtIdentity()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(1,0,0,0);\n"
" m.m_row[1] = (b3Float4)(0,1,0,0);\n"
" m.m_row[2] = (b3Float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
"{\n"
" b3Mat3x3 transB;\n"
" transB = mtTranspose( b );\n"
" b3Mat3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
"{\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a.m_row[0], b );\n"
" ans.y = b3Dot3F4( a.m_row[1], b );\n"
" ans.z = b3Dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
"{\n"
" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a, colx );\n"
" ans.y = b3Dot3F4( a, coly );\n"
" ans.z = b3Dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
"struct b3RigidBodyData\n"
"{\n"
" b3Float4 m_pos;\n"
" b3Quat m_quat;\n"
" b3Float4 m_linVel;\n"
" b3Float4 m_angVel;\n"
" int m_collidableIdx;\n"
" float m_invMass;\n"
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"};\n"
"typedef struct b3InertiaData b3InertiaData_t;\n"
"struct b3InertiaData\n"
"{\n"
" b3Mat3x3 m_invInertiaWorld;\n"
" b3Mat3x3 m_initInvInertia;\n"
"};\n"
"#endif //B3_RIGIDBODY_DATA_H\n"
" \n"
"void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n"
" void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n"
"{\n"
" if (b3Fabs(n.z) > 0.70710678f) {\n"
" // choose p in y-z plane\n"
" float a = n.y*n.y + n.z*n.z;\n"
" float k = 1.f/sqrt(a);\n"
" p[0].x = 0;\n"
" p[0].y = -n.z*k;\n"
" p[0].z = n.y*k;\n"
" // set q = n x p\n"
" q[0].x = a*k;\n"
" q[0].y = -n.x*p[0].z;\n"
" q[0].z = n.x*p[0].y;\n"
" }\n"
" else {\n"
" // choose p in x-y plane\n"
" float a = n.x*n.x + n.y*n.y;\n"
" float k = 1.f/sqrt(a);\n"
" p[0].x = -n.y*k;\n"
" p[0].y = n.x*k;\n"
" p[0].z = 0;\n"
" // set q = n x p\n"
" q[0].x = -n.z*p[0].y;\n"
" q[0].y = n.z*p[0].x;\n"
" q[0].z = a*k;\n"
" }\n"
"}\n"
" \n"
"void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n"
"{\n"
" *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n"
" *angular0 = b3Cross3(r0, n);\n"
" *angular1 = -b3Cross3(r1, n);\n"
"}\n"
"float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n"
" b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n"
"{\n"
" return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n"
"}\n"
"float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n"
" float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n"
"{\n"
" // linear0,1 are normlized\n"
" float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n"
" float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n"
" float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n"
" float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
"}\n"
"void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n"
" b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n"
" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n"
" b3ContactConstraint4_t* dstC )\n"
"{\n"
" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n"
" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n"
" float dtInv = 1.f/dt;\n"
" for(int ic=0; ic<4; ic++)\n"
" {\n"
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
" }\n"
" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"
" dstC->m_linear = src->m_worldNormalOnB;\n"
" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n"
" for(int ic=0; ic<4; ic++)\n"
" {\n"
" b3Float4 r0 = src->m_worldPosB[ic] - posA;\n"
" b3Float4 r1 = src->m_worldPosB[ic] - posB;\n"
" if( ic >= src->m_worldNormalOnB.w )//npoints\n"
" {\n"
" dstC->m_jacCoeffInv[ic] = 0.f;\n"
" continue;\n"
" }\n"
" float relVelN;\n"
" {\n"
" b3Float4 linear, angular0, angular1;\n"
" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n"
" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
" linVelA, angVelA, linVelB, angVelB);\n"
" float e = 0.f;//src->getRestituitionCoeff();\n"
" if( relVelN*relVelN < 0.004f ) e = 0.f;\n"
" dstC->m_b[ic] = e*relVelN;\n"
" //float penetration = src->m_worldPosB[ic].w;\n"
" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
" }\n"
" }\n"
" if( src->m_worldNormalOnB.w > 0 )//npoints\n"
" { // prepare friction\n"
" b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n"
" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n"
" center += src->m_worldPosB[i];\n"
" center /= (float)src->m_worldNormalOnB.w;\n"
" b3Float4 tangent[2];\n"
" b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n"
" \n"
" b3Float4 r[2];\n"
" r[0] = center - posA;\n"
" r[1] = center - posB;\n"
" for(int i=0; i<2; i++)\n"
" {\n"
" b3Float4 linear, angular0, angular1;\n"
" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
" }\n"
" dstC->m_center = center;\n"
" }\n"
" for(int i=0; i<4; i++)\n"
" {\n"
" if( i<src->m_worldNormalOnB.w )\n"
" {\n"
" dstC->m_worldPos[i] = src->m_worldPosB[i];\n"
" }\n"
" else\n"
" {\n"
" dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n"
" }\n"
" }\n"
"}\n"
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
@@ -208,197 +587,9 @@ static const char* solverSetupCL= \
" eqn.w = -dot3F4(eqn,a);\n"
" return eqn;\n"
"}\n"
"///////////////////////////////////////\n"
"// Matrix3x3\n"
"///////////////////////////////////////\n"
"typedef struct\n"
"{\n"
" float4 m_row[3];\n"
"}Matrix3x3;\n"
"__inline\n"
"Matrix3x3 mtZero();\n"
"__inline\n"
"Matrix3x3 mtIdentity();\n"
"__inline\n"
"Matrix3x3 mtTranspose(Matrix3x3 m);\n"
"__inline\n"
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"
"__inline\n"
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
"__inline\n"
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
"__inline\n"
"Matrix3x3 mtZero()\n"
"{\n"
" Matrix3x3 m;\n"
" m.m_row[0] = (float4)(0.f);\n"
" m.m_row[1] = (float4)(0.f);\n"
" m.m_row[2] = (float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"Matrix3x3 mtIdentity()\n"
"{\n"
" Matrix3x3 m;\n"
" m.m_row[0] = (float4)(1,0,0,0);\n"
" m.m_row[1] = (float4)(0,1,0,0);\n"
" m.m_row[2] = (float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
"{\n"
" Matrix3x3 out;\n"
" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
"{\n"
" Matrix3x3 transB;\n"
" transB = mtTranspose( b );\n"
" Matrix3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
"{\n"
" float4 ans;\n"
" ans.x = dot3F4( a.m_row[0], b );\n"
" ans.y = dot3F4( a.m_row[1], b );\n"
" ans.z = dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
"{\n"
" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" float4 ans;\n"
" ans.x = dot3F4( a, colx );\n"
" ans.y = dot3F4( a, coly );\n"
" ans.z = dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"///////////////////////////////////////\n"
"// Quaternion\n"
"///////////////////////////////////////\n"
"typedef float4 Quaternion;\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in);\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec);\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q);\n"
"__inline\n"
"Matrix3x3 qtGetRotationMatrix(Quaternion q);\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
"{\n"
" Quaternion ans;\n"
" ans = cross3( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in)\n"
"{\n"
" return fastNormalize4(in);\n"
"// in /= length( in );\n"
"// return in;\n"
"}\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec)\n"
"{\n"
" Quaternion qInv = qtInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q)\n"
"{\n"
" return (Quaternion)(-q.xyz, q.w);\n"
"}\n"
"__inline\n"
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
"{\n"
" return qtRotate( qtInvert( q ), vec );\n"
"}\n"
"__inline\n"
"Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n"
"{\n"
" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" Matrix3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
" out.m_row[0].w = 0.f;\n"
" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
" out.m_row[1].w = 0.f;\n"
" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
" out.m_row[2].w = 0.f;\n"
" return out;\n"
"}\n"
"#define WG_SIZE 64\n"
"typedef struct\n"
"{\n"
" float4 m_pos;\n"
" Quaternion m_quat;\n"
" float4 m_linVel;\n"
" float4 m_angVel;\n"
" u32 m_shapeIdx;\n"
" float m_invMass;\n"
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"} Body;\n"
"typedef struct\n"
"{\n"
" Matrix3x3 m_invInertia;\n"
" Matrix3x3 m_initInvInertia;\n"
"} Shape;\n"
"typedef struct\n"
"{\n"
" float4 m_linear;\n"
" float4 m_worldPos[4];\n"
" float4 m_center; \n"
" float m_jacCoeffInv[4];\n"
" float m_b[4];\n"
" float m_appliedRambdaDt[4];\n"
" float m_fJacCoeffInv[2]; \n"
" float m_fAppliedRambdaDt[2]; \n"
" u32 m_bodyA;\n"
" u32 m_bodyB;\n"
" int m_batchIdx;\n"
" u32 m_paddings[1];\n"
"} Constraint4;\n"
"typedef struct\n"
"{\n"
" int m_nConstraints;\n"
" int m_start;\n"
" int m_batchIdx;\n"
@@ -413,26 +604,6 @@ static const char* solverSetupCL= \
" int m_nSplit;\n"
"// int m_paddings[1];\n"
"} ConstBufferBatchSolve;\n"
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
"{\n"
" *linear = make_float4(n.xyz,0.f);\n"
" *angular0 = cross3(r0, n);\n"
" *angular1 = -cross3(r1, n);\n"
"}\n"
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
"{\n"
" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
"}\n"
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
"{\n"
" // linear0,1 are normlized\n"
" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n"
" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n"
" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n"
" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
"}\n"
" \n"
"typedef struct \n"
"{\n"
@@ -448,122 +619,13 @@ static const char* solverSetupCL= \
"typedef struct\n"
"{\n"
" int m_nContacts;\n"
" int m_staticIdx;\n"
" float m_scale;\n"
" int m_nSplit;\n"
"} ConstBufferSSD;\n"
"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
"{\n"
" if (fabs(n.z) > 0.70710678f) {\n"
" // choose p in y-z plane\n"
" float a = n.y*n.y + n.z*n.z;\n"
" float k = 1.f/sqrt(a);\n"
" p[0].x = 0;\n"
" p[0].y = -n.z*k;\n"
" p[0].z = n.y*k;\n"
" // set q = n x p\n"
" q[0].x = a*k;\n"
" q[0].y = -n.x*p[0].z;\n"
" q[0].z = n.x*p[0].y;\n"
" }\n"
" else {\n"
" // choose p in x-y plane\n"
" float a = n.x*n.x + n.y*n.y;\n"
" float k = 1.f/sqrt(a);\n"
" p[0].x = -n.y*k;\n"
" p[0].y = n.x*k;\n"
" p[0].z = 0;\n"
" // set q = n x p\n"
" q[0].x = -n.z*p[0].y;\n"
" q[0].y = n.z*p[0].x;\n"
" q[0].z = a*k;\n"
" }\n"
"}\n"
"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n"
" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"
" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n"
" Constraint4* dstC )\n"
"{\n"
" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n"
" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n"
" float dtInv = 1.f/dt;\n"
" for(int ic=0; ic<4; ic++)\n"
" {\n"
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
" }\n"
" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"
" dstC->m_linear = src->m_worldNormalOnB;\n"
" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n"
" for(int ic=0; ic<4; ic++)\n"
" {\n"
" float4 r0 = src->m_worldPosB[ic] - posA;\n"
" float4 r1 = src->m_worldPosB[ic] - posB;\n"
" if( ic >= src->m_worldNormalOnB.w )//npoints\n"
" {\n"
" dstC->m_jacCoeffInv[ic] = 0.f;\n"
" continue;\n"
" }\n"
" float relVelN;\n"
" {\n"
" float4 linear, angular0, angular1;\n"
" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n"
" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
" linVelA, angVelA, linVelB, angVelB);\n"
" float e = 0.f;//src->getRestituitionCoeff();\n"
" if( relVelN*relVelN < 0.004f ) e = 0.f;\n"
" dstC->m_b[ic] = e*relVelN;\n"
" //float penetration = src->m_worldPosB[ic].w;\n"
" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
" }\n"
" }\n"
" if( src->m_worldNormalOnB.w > 0 )//npoints\n"
" { // prepare friction\n"
" float4 center = make_float4(0.f);\n"
" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n"
" center += src->m_worldPosB[i];\n"
" center /= (float)src->m_worldNormalOnB.w;\n"
" float4 tangent[2];\n"
" btPlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n"
" \n"
" float4 r[2];\n"
" r[0] = center - posA;\n"
" r[1] = center - posB;\n"
" for(int i=0; i<2; i++)\n"
" {\n"
" float4 linear, angular0, angular1;\n"
" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
" dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
" }\n"
" dstC->m_center = center;\n"
" }\n"
" for(int i=0; i<4; i++)\n"
" {\n"
" if( i<src->m_worldNormalOnB.w )\n"
" {\n"
" dstC->m_worldPos[i] = src->m_worldPosB[i];\n"
" }\n"
" else\n"
" {\n"
" dstC->m_worldPos[i] = make_float4(0.f);\n"
" }\n"
" }\n"
"}\n"
"typedef struct\n"
"{\n"
" int m_nContacts;\n"
" float m_dt;\n"
" float m_positionDrift;\n"
" float m_positionConstraintCoeff;\n"
"} ConstBufferCTC;\n"
"__kernel\n"
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n"
"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n"
"int nContacts,\n"
"float dt,\n"
"float positionDrift,\n"
@@ -580,13 +642,13 @@ static const char* solverSetupCL= \
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
" float invMassA = gBodies[aIdx].m_invMass;\n"
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
" b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n"
" float4 posB = gBodies[bIdx].m_pos;\n"
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
" float invMassB = gBodies[bIdx].m_invMass;\n"
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
" Constraint4 cs;\n"
" b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n"
" b3ContactConstraint4_t cs;\n"
" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"
" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n"
" &cs );\n"

View File

@@ -85,7 +85,18 @@ static const char* updateAabbsKernelCL= \
"}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n"
"{\n"
" return b3FastNormalize4(in);\n"
" //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n"
" {\n"
" q *= 1.f / len;\n"
" }\n"
" else\n"
" {\n"
" q.x = q.y = q.z = 0.f;\n"
" q.w = 1.f;\n"
" }\n"
" return q;\n"
"}\n"
"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
@@ -114,13 +125,13 @@ static const char* updateAabbsKernelCL= \
"#else\n"
"typedef struct\n"
"{\n"
" float4 m_row[3];\n"
" b3Float4 m_row[3];\n"
"}b3Mat3x3;\n"
"#define b3Mat3x3ConstArg const b3Mat3x3\n"
"#define b3GetRow(m,row) (m.m_row[row])\n"
"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
"{\n"
" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Mat3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
@@ -144,6 +155,87 @@ static const char* updateAabbsKernelCL= \
" out.m_row[2] = fabs(matIn.m_row[2]);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtZero();\n"
"__inline\n"
"b3Mat3x3 mtIdentity();\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Mat3x3 mtZero()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(0.f);\n"
" m.m_row[1] = (b3Float4)(0.f);\n"
" m.m_row[2] = (b3Float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtIdentity()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(1,0,0,0);\n"
" m.m_row[1] = (b3Float4)(0,1,0,0);\n"
" m.m_row[2] = (b3Float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
"{\n"
" b3Mat3x3 transB;\n"
" transB = mtTranspose( b );\n"
" b3Mat3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
"{\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a.m_row[0], b );\n"
" ans.y = b3Dot3F4( a.m_row[1], b );\n"
" ans.z = b3Dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
"{\n"
" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a, colx );\n"
" ans.y = b3Dot3F4( a, coly );\n"
" ans.z = b3Dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3Aabb b3Aabb_t;\n"
@@ -260,6 +352,11 @@ static const char* updateAabbsKernelCL= \
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifndef B3_MAT3x3_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
"struct b3RigidBodyData\n"
"{\n"
@@ -272,7 +369,12 @@ static const char* updateAabbsKernelCL= \
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"};\n"
" \n"
"typedef struct b3InertiaData b3InertiaData_t;\n"
"struct b3InertiaData\n"
"{\n"
" b3Mat3x3 m_invInertiaWorld;\n"
" b3Mat3x3 m_initInvInertia;\n"
"};\n"
"#endif //B3_RIGIDBODY_DATA_H\n"
" \n"
"void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n"