Files
bullet3/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h
erwin coumans 3fe969c4ee b3Solver -> pass pointer to source instead of 0 (was left over from a debugging session), thanks to David for the report
Break up clipHullHullConcaveConvexKernel into multiple stages, so it might 'fit' in Apple's OpenCL implementation
Implemented bvhTraversalKernel and findConcaveSeparatingAxis on CPU (debugging, possible future CPU version)
2013-12-13 07:52:41 -08:00

2052 lines
71 KiB
C

//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
static const char* satClipKernelsCL= \
"#define TRIANGLE_NUM_CONVEX_FACES 5\n"
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
"#ifdef cl_ext_atomic_counters_32\n"
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
"#else\n"
"#define counter32_t volatile __global int*\n"
"#endif\n"
"#define GET_GROUP_IDX get_group_id(0)\n"
"#define GET_LOCAL_IDX get_local_id(0)\n"
"#define GET_GLOBAL_IDX get_global_id(0)\n"
"#define GET_GROUP_SIZE get_local_size(0)\n"
"#define GET_NUM_GROUPS get_num_groups(0)\n"
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
"#define AtomInc(x) atom_inc(&(x))\n"
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
"#define AppendInc(x, out) out = atomic_inc(x)\n"
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
"#define max2 max\n"
"#define min2 min\n"
"typedef unsigned int u32;\n"
"#ifndef B3_CONTACT4DATA_H\n"
"#define B3_CONTACT4DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#define B3_FLOAT4_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#define B3_PLATFORM_DEFINITIONS_H\n"
"struct MyTest\n"
"{\n"
" int bla;\n"
"};\n"
"#ifdef __cplusplus\n"
"#else\n"
"#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n"
"#endif\n"
"#ifdef __cplusplus\n"
"#else\n"
" typedef float4 b3Float4;\n"
" #define b3Float4ConstArg const b3Float4\n"
" #define b3MakeFloat4 (float4)\n"
" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
" {\n"
" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return dot(a1, b1);\n"
" }\n"
" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
" {\n"
" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n"
" }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n"
" \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
"{\n"
" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n"
" return false;\n"
" return true;\n"
"}\n"
"#endif //B3_FLOAT4_H\n"
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
"struct b3Contact4Data\n"
"{\n"
" b3Float4 m_worldPosB[4];\n"
"// b3Float4 m_localPosA[4];\n"
"// b3Float4 m_localPosB[4];\n"
" b3Float4 m_worldNormalOnB; // w: m_nPoints\n"
" unsigned short m_restituitionCoeffCmp;\n"
" unsigned short m_frictionCoeffCmp;\n"
" int m_batchIdx;\n"
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
" int m_bodyBPtrAndSignBit;\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n"
" return (int)contact->m_worldNormalOnB.w;\n"
"};\n"
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
"{\n"
" contact->m_worldNormalOnB.w = (float)numPoints;\n"
"};\n"
"#endif //B3_CONTACT4DATA_H\n"
"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n"
"#define B3_CONVEX_POLYHEDRON_DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifndef B3_QUAT_H\n"
"#define B3_QUAT_H\n"
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif\n"
"#endif\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
" typedef float4 b3Quat;\n"
" #define b3QuatConstArg const b3Quat\n"
" \n"
" \n"
"inline float4 b3FastNormalize4(float4 v)\n"
"{\n"
" v = (float4)(v.xyz,0.f);\n"
" return fast_normalize(v);\n"
"}\n"
" \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
"{\n"
" b3Quat ans;\n"
" ans = b3Cross3( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
"{\n"
" b3Quat q;\n"
" q=in;\n"
" //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n"
" {\n"
" q *= 1.f / len;\n"
" }\n"
" else\n"
" {\n"
" q.x = q.y = q.z = 0.f;\n"
" q.w = 1.f;\n"
" }\n"
" return q;\n"
"}\n"
"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" b3Quat qInv = b3QuatInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
"{\n"
" return (b3Quat)(-q.xyz, q.w);\n"
"}\n"
"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
"{\n"
" return b3QuatRotate( b3QuatInvert( q ), vec );\n"
"}\n"
"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n"
"{\n"
" return b3QuatRotate( orientation, point ) + (translation);\n"
"}\n"
" \n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"typedef struct b3GpuFace b3GpuFace_t;\n"
"struct b3GpuFace\n"
"{\n"
" b3Float4 m_plane;\n"
" int m_indexOffset;\n"
" int m_numIndices;\n"
" int m_unusedPadding1;\n"
" int m_unusedPadding2;\n"
"};\n"
"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n"
"struct b3ConvexPolyhedronData\n"
"{\n"
" b3Float4 m_localCenter;\n"
" b3Float4 m_extents;\n"
" b3Float4 mC;\n"
" b3Float4 mE;\n"
" float m_radius;\n"
" int m_faceOffset;\n"
" int m_numFaces;\n"
" int m_numVertices;\n"
" int m_vertexOffset;\n"
" int m_uniqueEdgesOffset;\n"
" int m_numUniqueEdges;\n"
" int m_unused;\n"
"};\n"
"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n"
"#ifndef B3_COLLIDABLE_H\n"
"#define B3_COLLIDABLE_H\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifndef B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"enum b3ShapeTypes\n"
"{\n"
" SHAPE_HEIGHT_FIELD=1,\n"
" SHAPE_CONVEX_HULL=3,\n"
" SHAPE_PLANE=4,\n"
" SHAPE_CONCAVE_TRIMESH=5,\n"
" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n"
" SHAPE_SPHERE=7,\n"
" MAX_NUM_SHAPE_TYPES,\n"
"};\n"
"typedef struct b3Collidable b3Collidable_t;\n"
"struct b3Collidable\n"
"{\n"
" union {\n"
" int m_numChildShapes;\n"
" int m_bvhIndex;\n"
" };\n"
" union\n"
" {\n"
" float m_radius;\n"
" int m_compoundBvhIndex;\n"
" };\n"
" int m_shapeType;\n"
" int m_shapeIndex;\n"
"};\n"
"typedef struct b3GpuChildShape b3GpuChildShape_t;\n"
"struct b3GpuChildShape\n"
"{\n"
" b3Float4 m_childPosition;\n"
" b3Quat m_childOrientation;\n"
" int m_shapeIndex;\n"
" int m_unused0;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"};\n"
"struct b3CompoundOverlappingPair\n"
"{\n"
" int m_bodyIndexA;\n"
" int m_bodyIndexB;\n"
"// int m_pairType;\n"
" int m_childShapeIndexA;\n"
" int m_childShapeIndexB;\n"
"};\n"
"#endif //B3_COLLIDABLE_H\n"
"#ifndef B3_RIGIDBODY_DATA_H\n"
"#define B3_RIGIDBODY_DATA_H\n"
"#ifndef B3_FLOAT4_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_FLOAT4_H\n"
"#ifndef B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifndef B3_MAT3x3_H\n"
"#define B3_MAT3x3_H\n"
"#ifndef B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"#endif \n"
"#endif //B3_QUAT_H\n"
"#ifdef __cplusplus\n"
"#else\n"
"typedef struct\n"
"{\n"
" b3Float4 m_row[3];\n"
"}b3Mat3x3;\n"
"#define b3Mat3x3ConstArg const b3Mat3x3\n"
"#define b3GetRow(m,row) (m.m_row[row])\n"
"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
"{\n"
" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
" b3Mat3x3 out;\n"
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
" out.m_row[0].w = 0.f;\n"
" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
" out.m_row[1].w = 0.f;\n"
" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
" out.m_row[2].w = 0.f;\n"
" return out;\n"
"}\n"
"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = fabs(matIn.m_row[0]);\n"
" out.m_row[1] = fabs(matIn.m_row[1]);\n"
" out.m_row[2] = fabs(matIn.m_row[2]);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtZero();\n"
"__inline\n"
"b3Mat3x3 mtIdentity();\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
"__inline\n"
"b3Mat3x3 mtZero()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(0.f);\n"
" m.m_row[1] = (b3Float4)(0.f);\n"
" m.m_row[2] = (b3Float4)(0.f);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtIdentity()\n"
"{\n"
" b3Mat3x3 m;\n"
" m.m_row[0] = (b3Float4)(1,0,0,0);\n"
" m.m_row[1] = (b3Float4)(0,1,0,0);\n"
" m.m_row[2] = (b3Float4)(0,0,1,0);\n"
" return m;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
"{\n"
" b3Mat3x3 out;\n"
" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
" return out;\n"
"}\n"
"__inline\n"
"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
"{\n"
" b3Mat3x3 transB;\n"
" transB = mtTranspose( b );\n"
" b3Mat3x3 ans;\n"
" // why this doesn't run when 0ing in the for{}\n"
" a.m_row[0].w = 0.f;\n"
" a.m_row[1].w = 0.f;\n"
" a.m_row[2].w = 0.f;\n"
" for(int i=0; i<3; i++)\n"
" {\n"
"// a.m_row[i].w = 0.f;\n"
" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
" ans.m_row[i].w = 0.f;\n"
" }\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
"{\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a.m_row[0], b );\n"
" ans.y = b3Dot3F4( a.m_row[1], b );\n"
" ans.z = b3Dot3F4( a.m_row[2], b );\n"
" ans.w = 0.f;\n"
" return ans;\n"
"}\n"
"__inline\n"
"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
"{\n"
" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
" b3Float4 ans;\n"
" ans.x = b3Dot3F4( a, colx );\n"
" ans.y = b3Dot3F4( a, coly );\n"
" ans.z = b3Dot3F4( a, colz );\n"
" return ans;\n"
"}\n"
"#endif\n"
"#endif //B3_MAT3x3_H\n"
"typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
"struct b3RigidBodyData\n"
"{\n"
" b3Float4 m_pos;\n"
" b3Quat m_quat;\n"
" b3Float4 m_linVel;\n"
" b3Float4 m_angVel;\n"
" int m_collidableIdx;\n"
" float m_invMass;\n"
" float m_restituitionCoeff;\n"
" float m_frictionCoeff;\n"
"};\n"
"typedef struct b3InertiaData b3InertiaData_t;\n"
"struct b3InertiaData\n"
"{\n"
" b3Mat3x3 m_invInertiaWorld;\n"
" b3Mat3x3 m_initInvInertia;\n"
"};\n"
"#endif //B3_RIGIDBODY_DATA_H\n"
" \n"
"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n"
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
"#define make_float4 (float4)\n"
"#define make_float2 (float2)\n"
"#define make_uint4 (uint4)\n"
"#define make_int4 (int4)\n"
"#define make_uint2 (uint2)\n"
"#define make_int2 (int2)\n"
"__inline\n"
"float fastDiv(float numerator, float denominator)\n"
"{\n"
" return native_divide(numerator, denominator); \n"
"// return numerator/denominator; \n"
"}\n"
"__inline\n"
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
"{\n"
" return native_divide(numerator, denominator); \n"
"}\n"
"__inline\n"
"float4 cross3(float4 a, float4 b)\n"
"{\n"
" return cross(a,b);\n"
"}\n"
"//#define dot3F4 dot\n"
"__inline\n"
"float dot3F4(float4 a, float4 b)\n"
"{\n"
" float4 a1 = make_float4(a.xyz,0.f);\n"
" float4 b1 = make_float4(b.xyz,0.f);\n"
" return dot(a1, b1);\n"
"}\n"
"__inline\n"
"float4 fastNormalize4(float4 v)\n"
"{\n"
" return fast_normalize(v);\n"
"}\n"
"///////////////////////////////////////\n"
"// Quaternion\n"
"///////////////////////////////////////\n"
"typedef float4 Quaternion;\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in);\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec);\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q);\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
"{\n"
" Quaternion ans;\n"
" ans = cross3( a, b );\n"
" ans += a.w*b+b.w*a;\n"
"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
" ans.w = a.w*b.w - dot3F4(a, b);\n"
" return ans;\n"
"}\n"
"__inline\n"
"Quaternion qtNormalize(Quaternion in)\n"
"{\n"
" return fastNormalize4(in);\n"
"// in /= length( in );\n"
"// return in;\n"
"}\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec)\n"
"{\n"
" Quaternion qInv = qtInvert( q );\n"
" float4 vcpy = vec;\n"
" vcpy.w = 0.f;\n"
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
" return out;\n"
"}\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q)\n"
"{\n"
" return (Quaternion)(-q.xyz, q.w);\n"
"}\n"
"__inline\n"
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
"{\n"
" return qtRotate( qtInvert( q ), vec );\n"
"}\n"
"__inline\n"
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
"{\n"
" return qtRotate( *orientation, *p ) + (*translation);\n"
"}\n"
"__inline\n"
"float4 normalize3(const float4 a)\n"
"{\n"
" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
" return fastNormalize4( n );\n"
"}\n"
"__inline float4 lerp3(const float4 a,const float4 b, float t)\n"
"{\n"
" return make_float4( a.x + (b.x - a.x) * t,\n"
" a.y + (b.y - a.y) * t,\n"
" a.z + (b.z - a.z) * t,\n"
" 0.f);\n"
"}\n"
"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n"
"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n"
"{\n"
" \n"
" int ve;\n"
" float ds, de;\n"
" int numVertsOut = 0;\n"
" //double-check next test\n"
" // if (numVertsIn < 2)\n"
" // return 0;\n"
" \n"
" float4 firstVertex=pVtxIn[numVertsIn-1];\n"
" float4 endVertex = pVtxIn[0];\n"
" \n"
" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n"
" \n"
" for (ve = 0; ve < numVertsIn; ve++)\n"
" {\n"
" endVertex=pVtxIn[ve];\n"
" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n"
" if (ds<0)\n"
" {\n"
" if (de<0)\n"
" {\n"
" // Start < 0, end < 0, so output endVertex\n"
" ppVtxOut[numVertsOut++] = endVertex;\n"
" }\n"
" else\n"
" {\n"
" // Start < 0, end >= 0, so output intersection\n"
" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
" }\n"
" }\n"
" else\n"
" {\n"
" if (de<0)\n"
" {\n"
" // Start >= 0, end < 0 so output intersection and end\n"
" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
" ppVtxOut[numVertsOut++] = endVertex;\n"
" }\n"
" }\n"
" firstVertex = endVertex;\n"
" ds = de;\n"
" }\n"
" return numVertsOut;\n"
"}\n"
"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n"
"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n"
"{\n"
" \n"
" int ve;\n"
" float ds, de;\n"
" int numVertsOut = 0;\n"
"//double-check next test\n"
"// if (numVertsIn < 2)\n"
"// return 0;\n"
" float4 firstVertex=pVtxIn[numVertsIn-1];\n"
" float4 endVertex = pVtxIn[0];\n"
" \n"
" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n"
" for (ve = 0; ve < numVertsIn; ve++)\n"
" {\n"
" endVertex=pVtxIn[ve];\n"
" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n"
" if (ds<0)\n"
" {\n"
" if (de<0)\n"
" {\n"
" // Start < 0, end < 0, so output endVertex\n"
" ppVtxOut[numVertsOut++] = endVertex;\n"
" }\n"
" else\n"
" {\n"
" // Start < 0, end >= 0, so output intersection\n"
" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
" }\n"
" }\n"
" else\n"
" {\n"
" if (de<0)\n"
" {\n"
" // Start >= 0, end < 0 so output intersection and end\n"
" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
" ppVtxOut[numVertsOut++] = endVertex;\n"
" }\n"
" }\n"
" firstVertex = endVertex;\n"
" ds = de;\n"
" }\n"
" return numVertsOut;\n"
"}\n"
"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, \n"
" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n"
" float4* worldVertsB2, int capacityWorldVertsB2,\n"
" const float minDist, float maxDist,\n"
" __global const float4* vertices,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" float4* contactsOut,\n"
" int contactCapacity)\n"
"{\n"
" int numContactsOut = 0;\n"
" float4* pVtxIn = worldVertsB1;\n"
" float4* pVtxOut = worldVertsB2;\n"
" \n"
" int numVertsIn = numWorldVertsB1;\n"
" int numVertsOut = 0;\n"
" int closestFaceA=-1;\n"
" {\n"
" float dmin = FLT_MAX;\n"
" for(int face=0;face<hullA->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(\n"
" faces[hullA->m_faceOffset+face].m_plane.x, \n"
" faces[hullA->m_faceOffset+face].m_plane.y, \n"
" faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
" \n"
" float d = dot3F4(faceANormalWS,separatingNormal);\n"
" if (d < dmin)\n"
" {\n"
" dmin = d;\n"
" closestFaceA = face;\n"
" }\n"
" }\n"
" }\n"
" if (closestFaceA<0)\n"
" return numContactsOut;\n"
" b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
" int numVerticesA = polyA.m_numIndices;\n"
" for(int e0=0;e0<numVerticesA;e0++)\n"
" {\n"
" const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n"
" const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n"
" const float4 edge0 = a - b;\n"
" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n"
" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n"
" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
" float4 worldA1 = transform(&a,&posA,&ornA);\n"
" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
" \n"
" float4 planeNormalWS = planeNormalWS1;\n"
" float planeEqWS=planeEqWS1;\n"
" \n"
" //clip face\n"
" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
" //btSwap(pVtxIn,pVtxOut);\n"
" float4* tmp = pVtxOut;\n"
" pVtxOut = pVtxIn;\n"
" pVtxIn = tmp;\n"
" numVertsIn = numVertsOut;\n"
" numVertsOut = 0;\n"
" }\n"
" \n"
" // only keep points that are behind the witness face\n"
" {\n"
" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
" float localPlaneEq = polyA.m_plane.w;\n"
" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n"
" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n"
" for (int i=0;i<numVertsIn;i++)\n"
" {\n"
" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
" if (depth <=minDist)\n"
" {\n"
" depth = minDist;\n"
" }\n"
" if (depth <=maxDist)\n"
" {\n"
" float4 pointInWorld = pVtxIn[i];\n"
" //resultOut.addContactPoint(separatingNormal,point,depth);\n"
" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
" }\n"
" }\n"
" }\n"
" return numContactsOut;\n"
"}\n"
"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, \n"
" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n"
" float4* worldVertsB2, int capacityWorldVertsB2,\n"
" const float minDist, float maxDist,\n"
" const float4* verticesA,\n"
" const b3GpuFace_t* facesA,\n"
" const int* indicesA,\n"
" __global const float4* verticesB,\n"
" __global const b3GpuFace_t* facesB,\n"
" __global const int* indicesB,\n"
" float4* contactsOut,\n"
" int contactCapacity)\n"
"{\n"
" int numContactsOut = 0;\n"
" float4* pVtxIn = worldVertsB1;\n"
" float4* pVtxOut = worldVertsB2;\n"
" \n"
" int numVertsIn = numWorldVertsB1;\n"
" int numVertsOut = 0;\n"
" int closestFaceA=-1;\n"
" {\n"
" float dmin = FLT_MAX;\n"
" for(int face=0;face<hullA->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(\n"
" facesA[hullA->m_faceOffset+face].m_plane.x, \n"
" facesA[hullA->m_faceOffset+face].m_plane.y, \n"
" facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
" \n"
" float d = dot3F4(faceANormalWS,separatingNormal);\n"
" if (d < dmin)\n"
" {\n"
" dmin = d;\n"
" closestFaceA = face;\n"
" }\n"
" }\n"
" }\n"
" if (closestFaceA<0)\n"
" return numContactsOut;\n"
" b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
" int numVerticesA = polyA.m_numIndices;\n"
" for(int e0=0;e0<numVerticesA;e0++)\n"
" {\n"
" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n"
" const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n"
" const float4 edge0 = a - b;\n"
" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n"
" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n"
" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
" float4 worldA1 = transform(&a,&posA,&ornA);\n"
" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
" \n"
" float4 planeNormalWS = planeNormalWS1;\n"
" float planeEqWS=planeEqWS1;\n"
" \n"
" //clip face\n"
" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
" //btSwap(pVtxIn,pVtxOut);\n"
" float4* tmp = pVtxOut;\n"
" pVtxOut = pVtxIn;\n"
" pVtxIn = tmp;\n"
" numVertsIn = numVertsOut;\n"
" numVertsOut = 0;\n"
" }\n"
" \n"
" // only keep points that are behind the witness face\n"
" {\n"
" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
" float localPlaneEq = polyA.m_plane.w;\n"
" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n"
" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n"
" for (int i=0;i<numVertsIn;i++)\n"
" {\n"
" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
" if (depth <=minDist)\n"
" {\n"
" depth = minDist;\n"
" }\n"
" if (depth <=maxDist)\n"
" {\n"
" float4 pointInWorld = pVtxIn[i];\n"
" //resultOut.addContactPoint(separatingNormal,point,depth);\n"
" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
" }\n"
" }\n"
" }\n"
" return numContactsOut;\n"
"}\n"
"int clipHullAgainstHull(const float4 separatingNormal,\n"
" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n"
" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
" const float minDist, float maxDist,\n"
" __global const float4* vertices,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" float4* localContactsOut,\n"
" int localContactCapacity)\n"
"{\n"
" int numContactsOut = 0;\n"
" int numWorldVertsB1= 0;\n"
" int closestFaceB=-1;\n"
" float dmax = -FLT_MAX;\n"
" {\n"
" for(int face=0;face<hullB->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n"
" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 WorldNormal = qtRotate(ornB, Normal);\n"
" float d = dot3F4(WorldNormal,separatingNormal);\n"
" if (d > dmax)\n"
" {\n"
" dmax = d;\n"
" closestFaceB = face;\n"
" }\n"
" }\n"
" }\n"
" {\n"
" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
" const int numVertices = polyB.m_numIndices;\n"
" for(int e0=0;e0<numVertices;e0++)\n"
" {\n"
" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n"
" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
" }\n"
" }\n"
" if (closestFaceB>=0)\n"
" {\n"
" numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n"
" posA,ornA,\n"
" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n"
" faces,\n"
" indices,localContactsOut,localContactCapacity);\n"
" }\n"
" return numContactsOut;\n"
"}\n"
"int clipHullAgainstHullLocalA(const float4 separatingNormal,\n"
" const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n"
" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
" const float minDist, float maxDist,\n"
" const float4* verticesA,\n"
" const b3GpuFace_t* facesA,\n"
" const int* indicesA,\n"
" __global const float4* verticesB,\n"
" __global const b3GpuFace_t* facesB,\n"
" __global const int* indicesB,\n"
" float4* localContactsOut,\n"
" int localContactCapacity)\n"
"{\n"
" int numContactsOut = 0;\n"
" int numWorldVertsB1= 0;\n"
" int closestFaceB=-1;\n"
" float dmax = -FLT_MAX;\n"
" {\n"
" for(int face=0;face<hullB->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n"
" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 WorldNormal = qtRotate(ornB, Normal);\n"
" float d = dot3F4(WorldNormal,separatingNormal);\n"
" if (d > dmax)\n"
" {\n"
" dmax = d;\n"
" closestFaceB = face;\n"
" }\n"
" }\n"
" }\n"
" {\n"
" const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
" const int numVertices = polyB.m_numIndices;\n"
" for(int e0=0;e0<numVertices;e0++)\n"
" {\n"
" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n"
" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
" }\n"
" }\n"
" if (closestFaceB>=0)\n"
" {\n"
" numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n"
" posA,ornA,\n"
" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n"
" verticesA,facesA,indicesA,\n"
" verticesB,facesB,indicesB,\n"
" localContactsOut,localContactCapacity);\n"
" }\n"
" return numContactsOut;\n"
"}\n"
"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n"
"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n"
"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n"
"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n"
"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n"
"{\n"
" if( nPoints == 0 )\n"
" return 0;\n"
" \n"
" if (nPoints <=4)\n"
" return nPoints;\n"
" \n"
" \n"
" if (nPoints >64)\n"
" nPoints = 64;\n"
" \n"
" float4 center = make_float4(0.f);\n"
" {\n"
" \n"
" for (int i=0;i<nPoints;i++)\n"
" center += p[i];\n"
" center /= (float)nPoints;\n"
" }\n"
" \n"
" \n"
" \n"
" // sample 4 directions\n"
" \n"
" float4 aVector = p[0] - center;\n"
" float4 u = cross3( nearNormal, aVector );\n"
" float4 v = cross3( nearNormal, u );\n"
" u = normalize3( u );\n"
" v = normalize3( v );\n"
" \n"
" \n"
" //keep point with deepest penetration\n"
" float minW= FLT_MAX;\n"
" \n"
" int minIndex=-1;\n"
" \n"
" float4 maxDots;\n"
" maxDots.x = FLT_MIN;\n"
" maxDots.y = FLT_MIN;\n"
" maxDots.z = FLT_MIN;\n"
" maxDots.w = FLT_MIN;\n"
" \n"
" // idx, distance\n"
" for(int ie = 0; ie<nPoints; ie++ )\n"
" {\n"
" if (p[ie].w<minW)\n"
" {\n"
" minW = p[ie].w;\n"
" minIndex=ie;\n"
" }\n"
" float f;\n"
" float4 r = p[ie]-center;\n"
" f = dot3F4( u, r );\n"
" if (f<maxDots.x)\n"
" {\n"
" maxDots.x = f;\n"
" contactIdx[0].x = ie;\n"
" }\n"
" \n"
" f = dot3F4( -u, r );\n"
" if (f<maxDots.y)\n"
" {\n"
" maxDots.y = f;\n"
" contactIdx[0].y = ie;\n"
" }\n"
" \n"
" \n"
" f = dot3F4( v, r );\n"
" if (f<maxDots.z)\n"
" {\n"
" maxDots.z = f;\n"
" contactIdx[0].z = ie;\n"
" }\n"
" \n"
" f = dot3F4( -v, r );\n"
" if (f<maxDots.w)\n"
" {\n"
" maxDots.w = f;\n"
" contactIdx[0].w = ie;\n"
" }\n"
" \n"
" }\n"
" \n"
" if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n"
" {\n"
" //replace the first contact with minimum (todo: replace contact with least penetration)\n"
" contactIdx[0].x = minIndex;\n"
" }\n"
" \n"
" return 4;\n"
" \n"
"}\n"
"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n"
"{\n"
" contactIdx[0] = 0;\n"
" contactIdx[1] = 1;\n"
" contactIdx[2] = 2;\n"
" contactIdx[3] = 3;\n"
" \n"
" if( nPoints == 0 ) return 0;\n"
" \n"
" nPoints = min2( nPoints, 4 );\n"
" return nPoints;\n"
" \n"
"}\n"
"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n"
"{\n"
" if( nPoints == 0 ) return 0;\n"
" nPoints = min2( nPoints, 64 );\n"
" float4 center = make_float4(0.f);\n"
" {\n"
" float4 v[64];\n"
" for (int i=0;i<nPoints;i++)\n"
" v[i] = p[i];\n"
" //memcpy( v, p, nPoints*sizeof(float4) );\n"
" PARALLEL_SUM( v, nPoints );\n"
" center = v[0]/(float)nPoints;\n"
" }\n"
" \n"
" { // sample 4 directions\n"
" if( nPoints < 4 )\n"
" {\n"
" for(int i=0; i<nPoints; i++) \n"
" contactIdx[i] = i;\n"
" return nPoints;\n"
" }\n"
" float4 aVector = p[0] - center;\n"
" float4 u = cross3( nearNormal, aVector );\n"
" float4 v = cross3( nearNormal, u );\n"
" u = normalize3( u );\n"
" v = normalize3( v );\n"
" int idx[4];\n"
" float2 max00 = make_float2(0,FLT_MAX);\n"
" {\n"
" // idx, distance\n"
" {\n"
" {\n"
" int4 a[64];\n"
" for(int ie = 0; ie<nPoints; ie++ )\n"
" {\n"
" \n"
" \n"
" float f;\n"
" float4 r = p[ie]-center;\n"
" f = dot3F4( u, r );\n"
" a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
" f = dot3F4( -u, r );\n"
" a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
" f = dot3F4( v, r );\n"
" a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
" f = dot3F4( -v, r );\n"
" a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
" }\n"
" for(int ie=0; ie<nPoints; ie++)\n"
" {\n"
" a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n"
" a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n"
" a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n"
" a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n"
" }\n"
" idx[0] = (int)a[0].x & 0xff;\n"
" idx[1] = (int)a[0].y & 0xff;\n"
" idx[2] = (int)a[0].z & 0xff;\n"
" idx[3] = (int)a[0].w & 0xff;\n"
" }\n"
" }\n"
" {\n"
" float2 h[64];\n"
" PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n"
" REDUCE_MIN( h, nPoints );\n"
" max00 = h[0];\n"
" }\n"
" }\n"
" contactIdx[0] = idx[0];\n"
" contactIdx[1] = idx[1];\n"
" contactIdx[2] = idx[2];\n"
" contactIdx[3] = idx[3];\n"
" return 4;\n"
" }\n"
"}\n"
"__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n"
" __global const b3RigidBodyData_t* rigidBodies, \n"
" __global const float4* closestPointsWorld,\n"
" __global const float4* separatingNormalsWorld,\n"
" __global const int* contactCounts,\n"
" __global const int* contactOffsets,\n"
" __global struct b3Contact4Data* restrict contactsOut,\n"
" counter32_t nContactsOut,\n"
" int numPairs,\n"
" int pairIndex\n"
" )\n"
"{\n"
" int idx = get_global_id(0);\n"
" \n"
" if (idx<numPairs)\n"
" {\n"
" float4 normal = separatingNormalsWorld[idx];\n"
" int nPoints = contactCounts[idx];\n"
" __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n"
" float4 localPoints[64];\n"
" for (int i=0;i<nPoints;i++)\n"
" {\n"
" localPoints[i] = pointsIn[i];\n"
" }\n"
" int contactIdx[4];// = {-1,-1,-1,-1};\n"
" contactIdx[0] = -1;\n"
" contactIdx[1] = -1;\n"
" contactIdx[2] = -1;\n"
" contactIdx[3] = -1;\n"
" int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n"
" int dstIdx;\n"
" AppendInc( nContactsOut, dstIdx );\n"
" //if ((dstIdx+nContacts) < capacity)\n"
" {\n"
" __global struct b3Contact4Data* c = contactsOut + dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = idx;\n"
" int bodyA = pairs[pairIndex].x;\n"
" int bodyB = pairs[pairIndex].y;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n"
" c->m_childIndexA = -1;\n"
" c->m_childIndexB = -1;\n"
" for (int i=0;i<nContacts;i++)\n"
" {\n"
" c->m_worldPosB[i] = localPoints[contactIdx[i]];\n"
" }\n"
" GET_NPOINTS(*c) = nContacts;\n"
" }\n"
" }\n"
"}\n"
"void trInverse(float4 translationIn, Quaternion orientationIn,\n"
" float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
" *orientationOut = qtInvert(orientationIn);\n"
" *translationOut = qtRotate(*orientationOut, -translationIn);\n"
"}\n"
"void trMul(float4 translationA, Quaternion orientationA,\n"
" float4 translationB, Quaternion orientationB,\n"
" float4* translationOut, Quaternion* orientationOut)\n"
"{\n"
" *orientationOut = qtMul(orientationA,orientationB);\n"
" *translationOut = transform(&translationB,&translationA,&orientationA);\n"
"}\n"
"__kernel void clipHullHullKernel( __global int4* pairs, \n"
" __global const b3RigidBodyData_t* rigidBodies, \n"
" __global const b3Collidable_t* collidables,\n"
" __global const b3ConvexPolyhedronData_t* convexShapes, \n"
" __global const float4* vertices,\n"
" __global const float4* uniqueEdges,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n"
" int numPairs,\n"
" int contactCapacity)\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" float4 worldVertsB1[64];\n"
" float4 worldVertsB2[64];\n"
" int capacityWorldVerts = 64; \n"
" float4 localContactsOut[64];\n"
" int localContactCapacity=64;\n"
" \n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" if (i<numPairs)\n"
" {\n"
" int bodyIndexA = pairs[i].x;\n"
" int bodyIndexB = pairs[i].y;\n"
" \n"
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
" if (hasSeparatingAxis[i])\n"
" {\n"
" \n"
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
" \n"
" \n"
" int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n"
" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n"
" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n"
" worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
" minDist, maxDist,\n"
" vertices,faces,indices,\n"
" localContactsOut,localContactCapacity);\n"
" \n"
" if (numLocalContactsOut>0)\n"
" {\n"
" float4 normal = -separatingNormals[i];\n"
" int nPoints = numLocalContactsOut;\n"
" float4* pointsIn = localContactsOut;\n"
" int contactIdx[4];// = {-1,-1,-1,-1};\n"
" contactIdx[0] = -1;\n"
" contactIdx[1] = -1;\n"
" contactIdx[2] = -1;\n"
" contactIdx[3] = -1;\n"
" \n"
" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
" \n"
" int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n"
" if (dstIdx<contactCapacity)\n"
" {\n"
" pairs[pairIndex].z = dstIdx;\n"
" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = pairIndex;\n"
" int bodyA = pairs[pairIndex].x;\n"
" int bodyB = pairs[pairIndex].y;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
" c->m_childIndexA = -1;\n"
" c->m_childIndexB = -1;\n"
" for (int i=0;i<nReducedContacts;i++)\n"
" {\n"
" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
" }\n"
" GET_NPOINTS(*c) = nReducedContacts;\n"
" }\n"
" \n"
" }// if (numContactsOut>0)\n"
" }// if (hasSeparatingAxis[i])\n"
" }// if (i<numPairs)\n"
"}\n"
"__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n"
" __global const b3RigidBodyData_t* rigidBodies, \n"
" __global const b3Collidable_t* collidables,\n"
" __global const b3ConvexPolyhedronData_t* convexShapes, \n"
" __global const float4* vertices,\n"
" __global const float4* uniqueEdges,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" __global const b3GpuChildShape_t* gpuChildShapes,\n"
" __global const float4* gpuCompoundSepNormalsOut,\n"
" __global const int* gpuHasCompoundSepNormalsOut,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n"
" int numCompoundPairs, int maxContactCapacity)\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" float4 worldVertsB1[64];\n"
" float4 worldVertsB2[64];\n"
" int capacityWorldVerts = 64; \n"
" float4 localContactsOut[64];\n"
" int localContactCapacity=64;\n"
" \n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" if (i<numCompoundPairs)\n"
" {\n"
" if (gpuHasCompoundSepNormalsOut[i])\n"
" {\n"
" int bodyIndexA = gpuCompoundPairs[i].x;\n"
" int bodyIndexB = gpuCompoundPairs[i].y;\n"
" \n"
" int childShapeIndexA = gpuCompoundPairs[i].z;\n"
" int childShapeIndexB = gpuCompoundPairs[i].w;\n"
" \n"
" int collidableIndexA = -1;\n"
" int collidableIndexB = -1;\n"
" \n"
" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
" \n"
" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n"
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
" \n"
" if (childShapeIndexA >= 0)\n"
" {\n"
" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n"
" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n"
" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n"
" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n"
" float4 newOrnA = qtMul(ornA,childOrnA);\n"
" posA = newPosA;\n"
" ornA = newOrnA;\n"
" } else\n"
" {\n"
" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
" }\n"
" \n"
" if (childShapeIndexB>=0)\n"
" {\n"
" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n"
" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n"
" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
" float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
" float4 newOrnB = qtMul(ornB,childOrnB);\n"
" posB = newPosB;\n"
" ornB = newOrnB;\n"
" } else\n"
" {\n"
" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n"
" }\n"
" \n"
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
" \n"
" int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n"
" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
" posA,ornA,\n"
" posB,ornB,\n"
" worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
" minDist, maxDist,\n"
" vertices,faces,indices,\n"
" localContactsOut,localContactCapacity);\n"
" \n"
" if (numLocalContactsOut>0)\n"
" {\n"
" float4 normal = -gpuCompoundSepNormalsOut[i];\n"
" int nPoints = numLocalContactsOut;\n"
" float4* pointsIn = localContactsOut;\n"
" int contactIdx[4];// = {-1,-1,-1,-1};\n"
" contactIdx[0] = -1;\n"
" contactIdx[1] = -1;\n"
" contactIdx[2] = -1;\n"
" contactIdx[3] = -1;\n"
" \n"
" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
" \n"
" int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n"
" if ((dstIdx+nReducedContacts) < maxContactCapacity)\n"
" {\n"
" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = pairIndex;\n"
" int bodyA = gpuCompoundPairs[pairIndex].x;\n"
" int bodyB = gpuCompoundPairs[pairIndex].y;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
" c->m_childIndexA = childShapeIndexA;\n"
" c->m_childIndexB = childShapeIndexB;\n"
" for (int i=0;i<nReducedContacts;i++)\n"
" {\n"
" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
" }\n"
" GET_NPOINTS(*c) = nReducedContacts;\n"
" }\n"
" \n"
" }// if (numContactsOut>0)\n"
" }// if (gpuHasCompoundSepNormalsOut[i])\n"
" }// if (i<numCompoundPairs)\n"
"}\n"
"__kernel void sphereSphereCollisionKernel( __global const int4* pairs, \n"
" __global const b3RigidBodyData_t* rigidBodies, \n"
" __global const b3Collidable_t* collidables,\n"
" __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n"
" int numPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" if (i<numPairs)\n"
" {\n"
" int bodyIndexA = pairs[i].x;\n"
" int bodyIndexB = pairs[i].y;\n"
" \n"
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
" {\n"
" //sphere-sphere\n"
" float radiusA = collidables[collidableIndexA].m_radius;\n"
" float radiusB = collidables[collidableIndexB].m_radius;\n"
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
" float4 diff = posA-posB;\n"
" float len = length(diff);\n"
" \n"
" ///iff distance positive, don't generate a new contact\n"
" if ( len <= (radiusA+radiusB))\n"
" {\n"
" ///distance (negative means penetration)\n"
" float dist = len - (radiusA+radiusB);\n"
" float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n"
" if (len > 0.00001)\n"
" {\n"
" normalOnSurfaceB = diff / len;\n"
" }\n"
" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n"
" contactPosB.w = dist;\n"
" \n"
" int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n"
" \n"
" if (dstIdx < numPairs)\n"
" {\n"
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
" c->m_worldNormalOnB = -normalOnSurfaceB;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = pairIndex;\n"
" int bodyA = pairs[pairIndex].x;\n"
" int bodyB = pairs[pairIndex].y;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
" c->m_worldPosB[0] = contactPosB;\n"
" c->m_childIndexA = -1;\n"
" c->m_childIndexB = -1;\n"
" GET_NPOINTS(*c) = 1;\n"
" }//if (dstIdx < numPairs)\n"
" }//if ( len <= (radiusA+radiusB))\n"
" }//SHAPE_SPHERE SHAPE_SPHERE\n"
" }//if (i<numPairs)\n"
"} \n"
"__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n"
" __global const b3RigidBodyData_t* rigidBodies, \n"
" __global const b3Collidable_t* collidables,\n"
" __global const b3ConvexPolyhedronData_t* convexShapes, \n"
" __global const float4* vertices,\n"
" __global const float4* uniqueEdges,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" __global const b3GpuChildShape_t* gpuChildShapes,\n"
" __global const float4* separatingNormals,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n"
" int numConcavePairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" float4 worldVertsB1[64];\n"
" float4 worldVertsB2[64];\n"
" int capacityWorldVerts = 64; \n"
" float4 localContactsOut[64];\n"
" int localContactCapacity=64;\n"
" \n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" if (i<numConcavePairs)\n"
" {\n"
" //negative value means that the pair is invalid\n"
" if (concavePairsIn[i].w<0)\n"
" return;\n"
" int bodyIndexA = concavePairsIn[i].x;\n"
" int bodyIndexB = concavePairsIn[i].y;\n"
" int f = concavePairsIn[i].z;\n"
" int childShapeIndexA = f;\n"
" \n"
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
" \n"
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
" \n"
" ///////////////////////////////////////////////////////////////\n"
" \n"
" \n"
" bool overlap = false;\n"
" \n"
" b3ConvexPolyhedronData_t convexPolyhedronA;\n"
" //add 3 vertices of the triangle\n"
" convexPolyhedronA.m_numVertices = 3;\n"
" convexPolyhedronA.m_vertexOffset = 0;\n"
" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
" b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
" \n"
" float4 verticesA[3];\n"
" for (int i=0;i<3;i++)\n"
" {\n"
" int index = indices[face.m_indexOffset+i];\n"
" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n"
" verticesA[i] = vert;\n"
" localCenter += vert;\n"
" }\n"
" float dmin = FLT_MAX;\n"
" int localCC=0;\n"
" //a triangle has 3 unique edges\n"
" convexPolyhedronA.m_numUniqueEdges = 3;\n"
" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n"
" float4 uniqueEdgesA[3];\n"
" \n"
" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n"
" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n"
" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n"
" convexPolyhedronA.m_faceOffset = 0;\n"
" \n"
" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
" \n"
" b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
" int indicesA[3+3+2+2+2];\n"
" int curUsedIndices=0;\n"
" int fidx=0;\n"
" //front size of triangle\n"
" {\n"
" facesA[fidx].m_indexOffset=curUsedIndices;\n"
" indicesA[0] = 0;\n"
" indicesA[1] = 1;\n"
" indicesA[2] = 2;\n"
" curUsedIndices+=3;\n"
" float c = face.m_plane.w;\n"
" facesA[fidx].m_plane.x = normal.x;\n"
" facesA[fidx].m_plane.y = normal.y;\n"
" facesA[fidx].m_plane.z = normal.z;\n"
" facesA[fidx].m_plane.w = c;\n"
" facesA[fidx].m_numIndices=3;\n"
" }\n"
" fidx++;\n"
" //back size of triangle\n"
" {\n"
" facesA[fidx].m_indexOffset=curUsedIndices;\n"
" indicesA[3]=2;\n"
" indicesA[4]=1;\n"
" indicesA[5]=0;\n"
" curUsedIndices+=3;\n"
" float c = dot3F4(normal,verticesA[0]);\n"
" float c1 = -face.m_plane.w;\n"
" facesA[fidx].m_plane.x = -normal.x;\n"
" facesA[fidx].m_plane.y = -normal.y;\n"
" facesA[fidx].m_plane.z = -normal.z;\n"
" facesA[fidx].m_plane.w = c;\n"
" facesA[fidx].m_numIndices=3;\n"
" }\n"
" fidx++;\n"
" bool addEdgePlanes = true;\n"
" if (addEdgePlanes)\n"
" {\n"
" int numVertices=3;\n"
" int prevVertex = numVertices-1;\n"
" for (int i=0;i<numVertices;i++)\n"
" {\n"
" float4 v0 = verticesA[i];\n"
" float4 v1 = verticesA[prevVertex];\n"
" \n"
" float4 edgeNormal = normalize(cross(normal,v1-v0));\n"
" float c = -dot3F4(edgeNormal,v0);\n"
" facesA[fidx].m_numIndices = 2;\n"
" facesA[fidx].m_indexOffset=curUsedIndices;\n"
" indicesA[curUsedIndices++]=i;\n"
" indicesA[curUsedIndices++]=prevVertex;\n"
" \n"
" facesA[fidx].m_plane.x = edgeNormal.x;\n"
" facesA[fidx].m_plane.y = edgeNormal.y;\n"
" facesA[fidx].m_plane.z = edgeNormal.z;\n"
" facesA[fidx].m_plane.w = c;\n"
" fidx++;\n"
" prevVertex = i;\n"
" }\n"
" }\n"
" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n"
" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n"
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
" posA.w = 0.f;\n"
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
" posB.w = 0.f;\n"
" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n"
" float4 sepAxis = separatingNormals[i];\n"
" \n"
" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
" int childShapeIndexB =-1;\n"
" if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n"
" {\n"
" ///////////////////\n"
" ///compound shape support\n"
" \n"
" childShapeIndexB = concavePairsIn[pairIndex].w;\n"
" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n"
" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n"
" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n"
" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
" float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
" float4 newOrnB = qtMul(ornB,childOrnB);\n"
" posB = newPosB;\n"
" ornB = newOrnB;\n"
" \n"
" }\n"
" \n"
" ////////////////////////////////////////\n"
" \n"
" \n"
" \n"
" int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n"
" &convexPolyhedronA, &convexShapes[shapeIndexB],\n"
" posA,ornA,\n"
" posB,ornB,\n"
" worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
" minDist, maxDist,\n"
" &verticesA,&facesA,&indicesA,\n"
" vertices,faces,indices,\n"
" localContactsOut,localContactCapacity);\n"
" \n"
" if (numLocalContactsOut>0)\n"
" {\n"
" float4 normal = -separatingNormals[i];\n"
" int nPoints = numLocalContactsOut;\n"
" float4* pointsIn = localContactsOut;\n"
" int contactIdx[4];// = {-1,-1,-1,-1};\n"
" contactIdx[0] = -1;\n"
" contactIdx[1] = -1;\n"
" contactIdx[2] = -1;\n"
" contactIdx[3] = -1;\n"
" \n"
" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
" \n"
" int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n"
" //if ((dstIdx+nReducedContacts) < capacity)\n"
" {\n"
" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = pairIndex;\n"
" int bodyA = concavePairsIn[pairIndex].x;\n"
" int bodyB = concavePairsIn[pairIndex].y;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
" c->m_childIndexA = childShapeIndexA;\n"
" c->m_childIndexB = childShapeIndexB;\n"
" for (int i=0;i<nReducedContacts;i++)\n"
" {\n"
" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
" }\n"
" GET_NPOINTS(*c) = nReducedContacts;\n"
" }\n"
" \n"
" }// if (numContactsOut>0)\n"
" }// if (i<numPairs)\n"
"}\n"
"int findClippingFaces(const float4 separatingNormal,\n"
" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n"
" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n"
" __global float4* worldVertsA1,\n"
" __global float4* worldNormalsA1,\n"
" __global float4* worldVertsB1,\n"
" int capacityWorldVerts,\n"
" const float minDist, float maxDist,\n"
" __global const float4* vertices,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" __global int4* clippingFaces, int pairIndex)\n"
"{\n"
" int numContactsOut = 0;\n"
" int numWorldVertsB1= 0;\n"
" \n"
" \n"
" int closestFaceB=-1;\n"
" float dmax = -FLT_MAX;\n"
" \n"
" {\n"
" for(int face=0;face<hullB->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n"
" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 WorldNormal = qtRotate(ornB, Normal);\n"
" float d = dot3F4(WorldNormal,separatingNormal);\n"
" if (d > dmax)\n"
" {\n"
" dmax = d;\n"
" closestFaceB = face;\n"
" }\n"
" }\n"
" }\n"
" \n"
" {\n"
" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
" const int numVertices = polyB.m_numIndices;\n"
" for(int e0=0;e0<numVertices;e0++)\n"
" {\n"
" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n"
" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
" }\n"
" }\n"
" \n"
" int closestFaceA=-1;\n"
" {\n"
" float dmin = FLT_MAX;\n"
" for(int face=0;face<hullA->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(\n"
" faces[hullA->m_faceOffset+face].m_plane.x,\n"
" faces[hullA->m_faceOffset+face].m_plane.y,\n"
" faces[hullA->m_faceOffset+face].m_plane.z,\n"
" 0.f);\n"
" const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
" \n"
" float d = dot3F4(faceANormalWS,separatingNormal);\n"
" if (d < dmin)\n"
" {\n"
" dmin = d;\n"
" closestFaceA = face;\n"
" worldNormalsA1[pairIndex] = faceANormalWS;\n"
" }\n"
" }\n"
" }\n"
" \n"
" int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n"
" for(int e0=0;e0<numVerticesA;e0++)\n"
" {\n"
" const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n"
" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n"
" }\n"
" \n"
" clippingFaces[pairIndex].x = closestFaceA;\n"
" clippingFaces[pairIndex].y = closestFaceB;\n"
" clippingFaces[pairIndex].z = numVerticesA;\n"
" clippingFaces[pairIndex].w = numWorldVertsB1;\n"
" \n"
" \n"
" return numContactsOut;\n"
"}\n"
"int clipFaces(__global float4* worldVertsA1,\n"
" __global float4* worldNormalsA1,\n"
" __global float4* worldVertsB1,\n"
" __global float4* worldVertsB2, \n"
" int capacityWorldVertsB2,\n"
" const float minDist, float maxDist,\n"
" __global int4* clippingFaces,\n"
" int pairIndex)\n"
"{\n"
" int numContactsOut = 0;\n"
" \n"
" int closestFaceA = clippingFaces[pairIndex].x;\n"
" int closestFaceB = clippingFaces[pairIndex].y;\n"
" int numVertsInA = clippingFaces[pairIndex].z;\n"
" int numVertsInB = clippingFaces[pairIndex].w;\n"
" \n"
" int numVertsOut = 0;\n"
" \n"
" if (closestFaceA<0)\n"
" return numContactsOut;\n"
" \n"
" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n"
" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n"
" \n"
" \n"
" \n"
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
" \n"
" for(int e0=0;e0<numVertsInA;e0++)\n"
" {\n"
" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n"
" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n"
" const float4 WorldEdge0 = aw - bw;\n"
" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n"
" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
" float4 worldA1 = aw;\n"
" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
" float4 planeNormalWS = planeNormalWS1;\n"
" float planeEqWS=planeEqWS1;\n"
" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n"
" __global float4* tmp = pVtxOut;\n"
" pVtxOut = pVtxIn;\n"
" pVtxIn = tmp;\n"
" numVertsInB = numVertsOut;\n"
" numVertsOut = 0;\n"
" }\n"
" \n"
" //float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
" //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
" \n"
" /*for (int i=0;i<numVertsInB;i++)\n"
" {\n"
" pVtxOut[i] = pVtxIn[i];\n"
" }*/\n"
" \n"
" \n"
" \n"
" \n"
" //numVertsInB=0;\n"
" \n"
" float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
" for (int i=0;i<numVertsInB;i++)\n"
" {\n"
" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
" if (depth <=minDist)\n"
" {\n"
" depth = minDist;\n"
" }\n"
" \n"
" if (depth <=maxDist)\n"
" {\n"
" float4 pointInWorld = pVtxIn[i];\n"
" pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
" }\n"
" }\n"
" \n"
" clippingFaces[pairIndex].w =numContactsOut;\n"
" \n"
" \n"
" return numContactsOut;\n"
"}\n"
"__kernel void findClippingFacesKernel( __global const int4* pairs,\n"
" __global const b3RigidBodyData_t* rigidBodies,\n"
" __global const b3Collidable_t* collidables,\n"
" __global const b3ConvexPolyhedronData_t* convexShapes,\n"
" __global const float4* vertices,\n"
" __global const float4* uniqueEdges,\n"
" __global const b3GpuFace_t* faces,\n"
" __global const int* indices,\n"
" __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n"
" __global int4* clippingFacesOut,\n"
" __global float4* worldVertsA1,\n"
" __global float4* worldNormalsA1,\n"
" __global float4* worldVertsB1,\n"
" int capacityWorldVerts,\n"
" int numPairs\n"
" )\n"
"{\n"
" \n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" \n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" \n"
" if (i<numPairs)\n"
" {\n"
" \n"
" if (hasSeparatingAxis[i])\n"
" {\n"
" \n"
" int bodyIndexA = pairs[i].x;\n"
" int bodyIndexB = pairs[i].y;\n"
" \n"
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
" \n"
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
" \n"
" \n"
" \n"
" int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n"
" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n"
" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n"
" worldVertsA1,\n"
" worldNormalsA1,\n"
" worldVertsB1,capacityWorldVerts,\n"
" minDist, maxDist,\n"
" vertices,faces,indices,\n"
" clippingFacesOut,i);\n"
" \n"
" \n"
" }// if (hasSeparatingAxis[i])\n"
" }// if (i<numPairs)\n"
" \n"
"}\n"
"__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* globalContactsOut,\n"
" __global int4* clippingFacesOut,\n"
" __global float4* worldVertsA1,\n"
" __global float4* worldNormalsA1,\n"
" __global float4* worldVertsB1,\n"
" __global float4* worldVertsB2,\n"
" counter32_t nGlobalContactsOut,\n"
" int vertexFaceCapacity,\n"
" int numPairs,\n"
" int debugMode\n"
" )\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" \n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" \n"
" if (i<numPairs)\n"
" {\n"
" \n"
" if (hasSeparatingAxis[i])\n"
" {\n"
" \n"
"// int bodyIndexA = pairs[i].x;\n"
" // int bodyIndexB = pairs[i].y;\n"
" \n"
" int numLocalContactsOut = 0;\n"
" int capacityWorldVertsB2 = vertexFaceCapacity;\n"
" \n"
" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n"
" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n"
" \n"
" {\n"
" __global int4* clippingFaces = clippingFacesOut;\n"
" \n"
" \n"
" int closestFaceA = clippingFaces[pairIndex].x;\n"
" int closestFaceB = clippingFaces[pairIndex].y;\n"
" int numVertsInA = clippingFaces[pairIndex].z;\n"
" int numVertsInB = clippingFaces[pairIndex].w;\n"
" \n"
" int numVertsOut = 0;\n"
" \n"
" if (closestFaceA>=0)\n"
" {\n"
" \n"
" \n"
" \n"
" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
" \n"
" for(int e0=0;e0<numVertsInA;e0++)\n"
" {\n"
" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n"
" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n"
" const float4 WorldEdge0 = aw - bw;\n"
" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n"
" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
" float4 worldA1 = aw;\n"
" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
" float4 planeNormalWS = planeNormalWS1;\n"
" float planeEqWS=planeEqWS1;\n"
" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n"
" __global float4* tmp = pVtxOut;\n"
" pVtxOut = pVtxIn;\n"
" pVtxIn = tmp;\n"
" numVertsInB = numVertsOut;\n"
" numVertsOut = 0;\n"
" }\n"
" \n"
" float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
" \n"
" for (int i=0;i<numVertsInB;i++)\n"
" {\n"
" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
" if (depth <=minDist)\n"
" {\n"
" depth = minDist;\n"
" }\n"
" \n"
" if (depth <=maxDist)\n"
" {\n"
" float4 pointInWorld = pVtxIn[i];\n"
" pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
" }\n"
" }\n"
" \n"
" }\n"
" clippingFaces[pairIndex].w =numLocalContactsOut;\n"
" \n"
" }\n"
" \n"
" for (int i=0;i<numLocalContactsOut;i++)\n"
" pVtxIn[i] = pVtxOut[i];\n"
" \n"
" }// if (hasSeparatingAxis[i])\n"
" }// if (i<numPairs)\n"
" \n"
"}\n"
"__kernel void newContactReductionKernel( __global int4* pairs,\n"
" __global const b3RigidBodyData_t* rigidBodies,\n"
" __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* globalContactsOut,\n"
" __global int4* clippingFaces,\n"
" __global float4* worldVertsB2,\n"
" volatile __global int* nGlobalContactsOut,\n"
" int vertexFaceCapacity,\n"
" int numPairs\n"
" )\n"
"{\n"
" int i = get_global_id(0);\n"
" int pairIndex = i;\n"
" \n"
" int4 contactIdx;\n"
" contactIdx=make_int4(0,1,2,3);\n"
" \n"
" if (i<numPairs)\n"
" {\n"
" \n"
" if (hasSeparatingAxis[i])\n"
" {\n"
" \n"
" \n"
" \n"
" \n"
" int nPoints = clippingFaces[pairIndex].w;\n"
" \n"
" if (nPoints>0)\n"
" {\n"
" __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n"
" float4 normal = -separatingNormals[i];\n"
" \n"
" int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n"
" \n"
" int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n"
" \n"
"//#if 0\n"
" \n"
" if (dstIdx < numPairs)\n"
" {\n"
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
" c->m_worldNormalOnB = -normal;\n"
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
" c->m_batchIdx = pairIndex;\n"
" int bodyA = pairs[pairIndex].x;\n"
" int bodyB = pairs[pairIndex].y;\n"
" pairs[pairIndex].w = dstIdx;\n"
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
" c->m_childIndexA =-1;\n"
" c->m_childIndexB =-1;\n"
" switch (nReducedContacts)\n"
" {\n"
" case 4:\n"
" c->m_worldPosB[3] = pointsIn[contactIdx.w];\n"
" case 3:\n"
" c->m_worldPosB[2] = pointsIn[contactIdx.z];\n"
" case 2:\n"
" c->m_worldPosB[1] = pointsIn[contactIdx.y];\n"
" case 1:\n"
" c->m_worldPosB[0] = pointsIn[contactIdx.x];\n"
" default:\n"
" {\n"
" }\n"
" };\n"
" \n"
" GET_NPOINTS(*c) = nReducedContacts;\n"
" \n"
" }\n"
" \n"
" \n"
"//#endif\n"
" \n"
" }// if (numContactsOut>0)\n"
" }// if (hasSeparatingAxis[i])\n"
" }// if (i<numPairs)\n"
" \n"
" \n"
"}\n"
;