From 3bf003ace13c0f11d9d1b1195d042bb74960d33f Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Thu, 8 Aug 2013 12:24:09 -0700 Subject: [PATCH] change lcpp Lua preprocessor, to keep #defines and comments, remove empty lines remove duplicate data in b3Contact4 (now in btContact4Data shared between CPU/C++ and OpenCL) OpenCL kernels use #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" Increase number of batches back to 250 (from 50), need to fix this hard coded number (see https://github.com/erwincoumans/bullet3/issues/12) Work towards GJK/EPA, in addition to SAT/clipping (early on) --- .../Shaders/createShadowMapInstancingPS.h | 3 - .../Shaders/createShadowMapInstancingVS.h | 10 - btgui/OpenGLWindow/Shaders/instancingPS.h | 5 - btgui/OpenGLWindow/Shaders/instancingVS.h | 14 - btgui/OpenGLWindow/Shaders/pointSpritePS.h | 6 - btgui/OpenGLWindow/Shaders/pointSpriteVS.h | 12 - .../Shaders/useShadowMapInstancingPS.h | 10 - .../Shaders/useShadowMapInstancingVS.h | 16 - build3/lcpp.lua | 15 +- build3/premake4.lua | 4 +- .../NarrowPhaseCollision/b3Contact4.h | 17 - .../shared/b3Contact4Data.h | 26 +- .../kernels/sapFastKernels.h | 36 --- .../BroadphaseCollision/kernels/sapKernels.h | 45 --- .../NarrowphaseCollision/b3ContactCache.cpp | 200 +++++------- .../NarrowphaseCollision/b3ContactCache.h | 173 ++-------- .../b3ConvexHullContact.cpp | 79 +++-- .../kernels/bvhTraversal.h | 29 -- .../kernels/primitiveContacts.cl | 34 +- .../kernels/primitiveContacts.h | 281 +++------------- .../kernels/satClipHullContacts.cl | 55 ++-- .../kernels/satClipHullContacts.h | 303 ++++-------------- .../NarrowphaseCollision/kernels/satKernels.h | 171 ---------- .../kernels/BoundSearchKernelsCL.h | 22 -- .../kernels/FillKernelsCL.h | 19 -- .../kernels/PrefixScanKernelsCL.h | 28 -- .../kernels/PrefixScanKernelsFloat4CL.h | 28 -- .../kernels/RadixSort32KernelsCL.h | 164 ---------- .../Raycast/kernels/rayCastKernels.h | 57 ---- .../RigidBody/b3GpuBatchingPgsSolver.cpp | 6 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 2 +- .../RigidBody/kernels/batchingKernels.cl | 22 +- .../RigidBody/kernels/batchingKernels.h | 134 +++----- .../RigidBody/kernels/batchingKernelsNew.cl | 76 +++-- .../RigidBody/kernels/batchingKernelsNew.h | 154 +++++---- .../RigidBody/kernels/integrateKernel.h | 13 - .../RigidBody/kernels/jointSolver.h | 159 --------- .../RigidBody/kernels/solveContact.cl | 15 - .../RigidBody/kernels/solveContact.h | 122 ------- .../RigidBody/kernels/solveFriction.cl | 15 - .../RigidBody/kernels/solveFriction.h | 116 ------- .../RigidBody/kernels/solverSetup.cl | 20 +- .../RigidBody/kernels/solverSetup.h | 184 +++-------- .../RigidBody/kernels/solverSetup2.cl | 31 +- .../RigidBody/kernels/solverSetup2.h | 188 +++-------- .../RigidBody/kernels/solverUtils.cl | 25 +- .../RigidBody/kernels/solverUtils.h | 223 +++---------- .../RigidBody/kernels/updateAabbsKernel.h | 30 -- test/OpenCL/KernelLaunch/main.cpp | 221 +++++++++++++ test/OpenCL/KernelLaunch/premake4.lua | 33 ++ 50 files changed, 920 insertions(+), 2731 deletions(-) create mode 100644 test/OpenCL/KernelLaunch/main.cpp create mode 100644 test/OpenCL/KernelLaunch/premake4.lua diff --git a/btgui/OpenGLWindow/Shaders/createShadowMapInstancingPS.h b/btgui/OpenGLWindow/Shaders/createShadowMapInstancingPS.h index 6154ea364..8b2dce8f1 100644 --- a/btgui/OpenGLWindow/Shaders/createShadowMapInstancingPS.h +++ b/btgui/OpenGLWindow/Shaders/createShadowMapInstancingPS.h @@ -2,10 +2,7 @@ static const char* createShadowMapInstancingFragmentShader= \ "#version 330\n" "precision highp float;\n" -"\n" -"\n" "layout(location = 0) out float fragmentdepth;\n" -"\n" "void main(void)\n" "{\n" " fragmentdepth = gl_FragCoord.z;\n" diff --git a/btgui/OpenGLWindow/Shaders/createShadowMapInstancingVS.h b/btgui/OpenGLWindow/Shaders/createShadowMapInstancingVS.h index b8284c7a2..2a8aef96b 100644 --- a/btgui/OpenGLWindow/Shaders/createShadowMapInstancingVS.h +++ b/btgui/OpenGLWindow/Shaders/createShadowMapInstancingVS.h @@ -2,8 +2,6 @@ static const char* createShadowMapInstancingVertexShader= \ "#version 330\n" "precision highp float;\n" -"\n" -"\n" "layout (location = 0) in vec4 position;\n" "layout (location = 1) in vec4 instance_position;\n" "layout (location = 2) in vec4 instance_quaternion;\n" @@ -11,11 +9,7 @@ static const char* createShadowMapInstancingVertexShader= \ "layout (location = 4) in vec3 vertexnormal;\n" "layout (location = 5) in vec4 instance_color;\n" "layout (location = 6) in vec3 instance_scale;\n" -"\n" -"\n" "uniform mat4 depthMVP;\n" -"\n" -"\n" "vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" "{\n" " vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" @@ -23,7 +17,6 @@ static const char* createShadowMapInstancingVertexShader= \ " float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" " return vec4 ( im, re );\n" "}\n" -"\n" "vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" "{\n" " float cah = cos(angle*0.5);\n" @@ -45,8 +38,6 @@ static const char* createShadowMapInstancingVertexShader= \ " vec4 temp = quatMul ( q, p );\n" " return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" "}\n" -"\n" -"\n" "void main(void)\n" "{\n" " vec4 q = instance_quaternion;\n" @@ -54,5 +45,4 @@ static const char* createShadowMapInstancingVertexShader= \ " vec4 vertexPos = depthMVP * vec4( (instance_position+localcoord).xyz,1);\n" " gl_Position = vertexPos;\n" "}\n" -"\n" ; diff --git a/btgui/OpenGLWindow/Shaders/instancingPS.h b/btgui/OpenGLWindow/Shaders/instancingPS.h index e23ae1c41..cf52431be 100644 --- a/btgui/OpenGLWindow/Shaders/instancingPS.h +++ b/btgui/OpenGLWindow/Shaders/instancingPS.h @@ -2,26 +2,21 @@ static const char* instancingFragmentShader= \ "#version 330\n" "precision highp float;\n" -"\n" "in Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" "in Vert\n" "{\n" " vec2 texcoord;\n" "} vert;\n" -"\n" "uniform sampler2D Diffuse;\n" "in vec3 lightDir,normal,ambient;\n" "out vec4 color;\n" -"\n" "void main_textured(void)\n" "{\n" " color = vec4(0.1,0.2,0.3,0.3);\n" "}\n" -"\n" "void main(void)\n" "{\n" " vec4 texel = fragment.color*texture(Diffuse,vert.texcoord);//fragment.color;\n" diff --git a/btgui/OpenGLWindow/Shaders/instancingVS.h b/btgui/OpenGLWindow/Shaders/instancingVS.h index bc6059421..94abae198 100644 --- a/btgui/OpenGLWindow/Shaders/instancingVS.h +++ b/btgui/OpenGLWindow/Shaders/instancingVS.h @@ -2,8 +2,6 @@ static const char* instancingVertexShader= \ "#version 330\n" "precision highp float;\n" -"\n" -"\n" "layout (location = 0) in vec4 position;\n" "layout (location = 1) in vec4 instance_position;\n" "layout (location = 2) in vec4 instance_quaternion;\n" @@ -11,22 +9,16 @@ static const char* instancingVertexShader= \ "layout (location = 4) in vec3 vertexnormal;\n" "layout (location = 5) in vec4 instance_color;\n" "layout (location = 6) in vec3 instance_scale;\n" -"\n" -"\n" "uniform mat4 ModelViewMatrix;\n" "uniform mat4 ProjectionMatrix;\n" -"\n" "out Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" "out Vert\n" "{\n" " vec2 texcoord;\n" "} vert;\n" -"\n" -"\n" "vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" "{\n" " vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" @@ -34,7 +26,6 @@ static const char* instancingVertexShader= \ " float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" " return vec4 ( im, re );\n" "}\n" -"\n" "vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" "{\n" " float cah = cos(angle*0.5);\n" @@ -56,9 +47,7 @@ static const char* instancingVertexShader= \ " vec4 temp = quatMul ( q, p );\n" " return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" "}\n" -"\n" "out vec3 lightDir,normal,ambient;\n" -"\n" "void main(void)\n" "{\n" " vec4 q = instance_quaternion;\n" @@ -68,18 +57,15 @@ static const char* instancingVertexShader= \ " vec4 local_normal = (quatRotate3( vertexnormal,q));\n" " vec3 light_pos = vec3(-0.3,0.1,0.1);\n" " normal = local_normal.xyz;//normalize(ModelViewMatrix * local_normal).xyz;\n" -"\n" " lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" "// lightDir = normalize(vec3(gl_LightSource[0].position));\n" " \n" " vec4 axis = vec4(1,1,1,0);\n" " vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n" " vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n" -"\n" " gl_Position = vertexPos;\n" " \n" " fragment.color = instance_color;\n" " vert.texcoord = uvcoords;\n" "}\n" -"\n" ; diff --git a/btgui/OpenGLWindow/Shaders/pointSpritePS.h b/btgui/OpenGLWindow/Shaders/pointSpritePS.h index c5ccf1f42..c6d73928d 100644 --- a/btgui/OpenGLWindow/Shaders/pointSpritePS.h +++ b/btgui/OpenGLWindow/Shaders/pointSpritePS.h @@ -2,22 +2,16 @@ static const char* pointSpriteFragmentShader= \ "#version 330\n" "precision highp float;\n" -"\n" "in Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" -"\n" "in vec3 ambient;\n" -"\n" "out vec4 color;\n" -"\n" "void main_textured(void)\n" "{\n" " color = fragment.color;//texture2D(Diffuse,vert.texcoord);//fragment.color;\n" "}\n" -"\n" "void main(void)\n" "{\n" " vec3 N;\n" diff --git a/btgui/OpenGLWindow/Shaders/pointSpriteVS.h b/btgui/OpenGLWindow/Shaders/pointSpriteVS.h index 091ba201a..1234a37a5 100644 --- a/btgui/OpenGLWindow/Shaders/pointSpriteVS.h +++ b/btgui/OpenGLWindow/Shaders/pointSpriteVS.h @@ -2,34 +2,23 @@ static const char* pointSpriteVertexShader= \ "#version 330\n" "precision highp float;\n" -"\n" -"\n" -"\n" "layout (location = 0) in vec4 position;\n" "layout (location = 1) in vec4 instance_position;\n" "layout (location = 3) in vec2 uvcoords;\n" "layout (location = 4) in vec3 vertexnormal;\n" "layout (location = 5) in vec4 instance_color;\n" "layout (location = 6) in vec3 instance_scale;\n" -"\n" -"\n" "uniform float screenWidth = 700.f;\n" "uniform mat4 ModelViewMatrix;\n" "uniform mat4 ProjectionMatrix;\n" -"\n" "out Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" -"\n" -"\n" "//\n" "// vector rotation via quaternion\n" "//\n" -"\n" "out vec3 ambient;\n" -"\n" "void main(void)\n" "{\n" " ambient = vec3(0.3,.3,0.3);\n" @@ -41,7 +30,6 @@ static const char* pointSpriteVertexShader= \ " float dist = length(posEye);\n" " float pointRadius = 1.f;\n" " gl_PointSize = instance_scale.x * pointRadius * (screenWidth / dist);\n" -"\n" " gl_Position = vertexPos;\n" " \n" " fragment.color = instance_color;\n" diff --git a/btgui/OpenGLWindow/Shaders/useShadowMapInstancingPS.h b/btgui/OpenGLWindow/Shaders/useShadowMapInstancingPS.h index f4b2af227..92f545263 100644 --- a/btgui/OpenGLWindow/Shaders/useShadowMapInstancingPS.h +++ b/btgui/OpenGLWindow/Shaders/useShadowMapInstancingPS.h @@ -2,27 +2,19 @@ static const char* useShadowMapInstancingFragmentShader= \ "#version 330 core\n" "//precision highp float;\n" -"\n" "in Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" "in Vert\n" "{\n" " vec2 texcoord;\n" "} vert;\n" -"\n" "uniform sampler2D Diffuse;\n" "uniform sampler2DShadow shadowMap;\n" -"\n" "in vec3 lightDir,normal,ambient;\n" "in vec4 ShadowCoord;\n" -"\n" "out vec4 color;\n" -"\n" -"\n" -"\n" "void main(void)\n" "{\n" " vec4 texel = fragment.color*texture(Diffuse,vert.texcoord);//fragment.color;\n" @@ -41,8 +33,6 @@ static const char* useShadowMapInstancingFragmentShader= \ " \n" " float bias = 0.005*tan(acos(intensity));\n" " bias = clamp(bias, 0,0.01);\n" -"\n" -"\n" " float visibility = texture(shadowMap, vec3(ShadowCoord.xy,(ShadowCoord.z-bias)/ShadowCoord.w));\n" " \n" " intensity*=2;\n" diff --git a/btgui/OpenGLWindow/Shaders/useShadowMapInstancingVS.h b/btgui/OpenGLWindow/Shaders/useShadowMapInstancingVS.h index 473534c96..f8ed25a2d 100644 --- a/btgui/OpenGLWindow/Shaders/useShadowMapInstancingVS.h +++ b/btgui/OpenGLWindow/Shaders/useShadowMapInstancingVS.h @@ -2,8 +2,6 @@ static const char* useShadowMapInstancingVertexShader= \ "#version 330 \n" "precision highp float;\n" -"\n" -"\n" "layout (location = 0) in vec4 position;\n" "layout (location = 1) in vec4 instance_position;\n" "layout (location = 2) in vec4 instance_quaternion;\n" @@ -11,26 +9,19 @@ static const char* useShadowMapInstancingVertexShader= \ "layout (location = 4) in vec3 vertexnormal;\n" "layout (location = 5) in vec4 instance_color;\n" "layout (location = 6) in vec3 instance_scale;\n" -"\n" -"\n" "uniform mat4 ModelViewMatrix;\n" "uniform mat4 ProjectionMatrix;\n" "uniform mat4 DepthBiasModelViewProjectionMatrix;\n" "uniform mat4 MVP;\n" -"\n" "out vec4 ShadowCoord;\n" -"\n" "out Fragment\n" "{\n" " vec4 color;\n" "} fragment;\n" -"\n" "out Vert\n" "{\n" " vec2 texcoord;\n" "} vert;\n" -"\n" -"\n" "vec4 quatMul ( in vec4 q1, in vec4 q2 )\n" "{\n" " vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n" @@ -38,7 +29,6 @@ static const char* useShadowMapInstancingVertexShader= \ " float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n" " return vec4 ( im, re );\n" "}\n" -"\n" "vec4 quatFromAxisAngle(vec4 axis, in float angle)\n" "{\n" " float cah = cos(angle*0.5);\n" @@ -60,9 +50,7 @@ static const char* useShadowMapInstancingVertexShader= \ " vec4 temp = quatMul ( q, p );\n" " return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n" "}\n" -"\n" "out vec3 lightDir,normal,ambient;\n" -"\n" "void main(void)\n" "{\n" " vec4 q = instance_quaternion;\n" @@ -72,18 +60,14 @@ static const char* useShadowMapInstancingVertexShader= \ " vec4 worldNormal = (quatRotate3( vertexnormal,q));\n" " vec3 light_pos = vec3(-5.f,100,-40);\n" " normal = normalize(worldNormal).xyz;\n" -"\n" " lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n" " \n" " vec4 axis = vec4(1,1,1,0);\n" " vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n" " vec4 vertexPos = MVP* vec4((instance_position+localcoord).xyz,1);\n" -"\n" " gl_Position = vertexPos;\n" " ShadowCoord = DepthBiasModelViewProjectionMatrix * vec4((instance_position+localcoord).xyz,1);\n" -"\n" " fragment.color = instance_color;\n" " vert.texcoord = uvcoords;\n" "}\n" -"\n" ; diff --git a/build3/lcpp.lua b/build3/lcpp.lua index c42f52888..1c66e3699 100644 --- a/build3/lcpp.lua +++ b/build3/lcpp.lua @@ -98,7 +98,7 @@ lcpp.LCPP_LUA = false -- whether to use lcpp to preprocess Lua code (l lcpp.LCPP_FFI = true -- whether to use lcpp as LuaJIT ffi PreProcessor (if used in luaJIT) lcpp.LCPP_TEST = false -- whether to run lcpp unit tests when loading lcpp module lcpp.ENV = {} -- static predefines (env-like) -lcpp.FAST = false -- perf. tweaks when enabled. con: breaks minor stuff like __LINE__ macros +lcpp.FAST = true -- perf. tweaks when enabled. con: breaks minor stuff like __LINE__ macros lcpp.DEBUG = false -- PREDEFINES @@ -438,7 +438,7 @@ local function processLine(state, line) --[[ APPLY MACROS ]]-- - line = state:apply(line); + --line = state:apply(line); return line end @@ -470,7 +470,7 @@ local function processLine2(state, line) if elseif_ then state:elseBlock(state:parseExpr(elseif_)) end if else_ then state:elseBlock(true) end if endif then state:closeBlock() end - return -- remove structural directives + return line end end end @@ -520,7 +520,7 @@ local function processLine2(state, line) state:define(macroname, replacement) end - return + return line end -- ignore, because we dont have any pragma directives yet @@ -537,7 +537,7 @@ local function processLine2(state, line) --[[ APPLY MACROS ]]-- - line = state:apply(line); + --line = state:apply(line); return line end @@ -551,8 +551,11 @@ local function doWork(state) local input = state:getLine() if not input then break end local output = processLine(state, input) - if not lcpp.FAST and not output then output = "" end -- output empty skipped lines + if not lcpp.FAST and not output then + output = "" end -- output empty skipped lines + if lcpp.DEBUG then output = output.." -- "..input end -- input as comment when DEBUG + if output then coroutine.yield(output) end end if (oldIndent ~= state:getIndent()) then error("indentation level must be balanced within a file. was:"..oldIndent.." is:"..state:getIndent()) end diff --git a/build3/premake4.lua b/build3/premake4.lua index 443ac0dda..75c88712d 100644 --- a/build3/premake4.lua +++ b/build3/premake4.lua @@ -116,7 +116,8 @@ if not _OPTIONS["ios"] then include "../Demos3/GpuGuiInitialize" include "../test/OpenCL/BasicInitialize" --- include "../test/OpenCL/BroadphaseCollision" + include "../test/OpenCL/KernelLaunch"-- + include "../test/OpenCL/BroadphaseCollision" -- include "../test/OpenCL/NarrowphaseCollision" include "../test/OpenCL/ParallelPrimitives" include "../test/OpenCL/RadixSortBenchmark" @@ -149,6 +150,7 @@ if not _OPTIONS["ios"] then end + if _OPTIONS["bullet2gpu"] then include "../src/LinearMath" include "../src/BulletCollision" diff --git a/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h b/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h index c3f2daf96..917112b1b 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h @@ -23,23 +23,6 @@ B3_ATTRIBUTE_ALIGNED16(struct) b3Contact4 : public b3Contact4Data { B3_DECLARE_ALIGNED_ALLOCATOR(); - b3Vector3 m_worldPos[4]; - b3Vector3 m_worldNormal; -// float m_restituitionCoeff; -// float m_frictionCoeff; - unsigned short m_restituitionCoeffCmp; - unsigned short m_frictionCoeffCmp; - int m_batchIdx; - - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - - int m_unused1; - int m_unused2; - int getBodyA()const {return abs(m_bodyAPtrAndSignBit);} int getBodyB()const {return abs(m_bodyBPtrAndSignBit);} bool isBodyAFixed()const { return m_bodyAPtrAndSignBit<0;} diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h index 9be5067c8..0daf823bb 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h @@ -3,13 +3,16 @@ #include "Bullet3Common/shared/b3Float4.h" -typedef struct +typedef struct b3Contact4Data b3Contact4Data_t; + +struct b3Contact4Data { b3Float4 m_worldPos[4]; +// b3Float4 m_localPosB[4]; b3Float4 m_worldNormal; // w: m_nPoints - unsigned int m_coeffs; - unsigned int m_batchIdx; - + unsigned short m_restituitionCoeffCmp; + unsigned short m_frictionCoeffCmp; + int m_batchIdx; int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr int m_bodyBPtrAndSignBit; @@ -18,6 +21,19 @@ typedef struct int m_unused1; int m_unused2; -} b3Contact4Data; + b3Float4 m_localPosA; +}; + +inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact) +{ + return (int)contact->m_worldNormal.w; +}; + +inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints) +{ + contact->m_worldNormal.w = (float)numPoints; +}; + + #endif //B3_CONTACT4DATA_H \ No newline at end of file diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h index 588039e37..65d167c82 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h @@ -2,22 +2,18 @@ static const char* sapFastCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Erwin Coumans\n" -"\n" "#define NEW_PAIR_MARKER -1\n" "#define REMOVED_PAIR_MARKER -2\n" -"\n" "typedef struct \n" "{\n" " union\n" @@ -33,7 +29,6 @@ static const char* sapFastCL= \ " int m_maxIndices[4];\n" " };\n" "} btAabbCL;\n" -"\n" "typedef struct \n" "{\n" " union\n" @@ -41,7 +36,6 @@ static const char* sapFastCL= \ " unsigned int m_key;\n" " unsigned int x;\n" " };\n" -"\n" " union\n" " {\n" " unsigned int m_value;\n" @@ -49,8 +43,6 @@ static const char* sapFastCL= \ " \n" " };\n" "}b3SortData;\n" -"\n" -"\n" "/// conservative test for overlap between two aabbs\n" "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" @@ -65,7 +57,6 @@ static const char* sapFastCL= \ " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" " return overlap;\n" "}\n" -"\n" "__kernel void computePairsIncremental3dSapKernel( __global const uint2* objectMinMaxIndexGPUaxis0,\n" " __global const uint2* objectMinMaxIndexGPUaxis1,\n" " __global const uint2* objectMinMaxIndexGPUaxis2,\n" @@ -88,7 +79,6 @@ static const char* sapFastCL= \ " int i = get_global_id(0);\n" " if (i>=numObjects)\n" " return;\n" -"\n" " __global const uint2* objectMinMaxIndexGPU[3][2];\n" " objectMinMaxIndexGPU[0][0]=objectMinMaxIndexGPUaxis0;\n" " objectMinMaxIndexGPU[1][0]=objectMinMaxIndexGPUaxis1;\n" @@ -96,7 +86,6 @@ static const char* sapFastCL= \ " objectMinMaxIndexGPU[0][1]=objectMinMaxIndexGPUaxis0prev;\n" " objectMinMaxIndexGPU[1][1]=objectMinMaxIndexGPUaxis1prev;\n" " objectMinMaxIndexGPU[2][1]=objectMinMaxIndexGPUaxis2prev;\n" -"\n" " __global const b3SortData* sortedAxisGPU[3][2];\n" " sortedAxisGPU[0][0] = sortedAxisGPU0;\n" " sortedAxisGPU[1][0] = sortedAxisGPU1;\n" @@ -104,20 +93,16 @@ static const char* sapFastCL= \ " sortedAxisGPU[0][1] = sortedAxisGPU0prev;\n" " sortedAxisGPU[1][1] = sortedAxisGPU1prev;\n" " sortedAxisGPU[2][1] = sortedAxisGPU2prev;\n" -"\n" " int m_currentBuffer = 0;\n" -"\n" " for (int axis=0;axis<3;axis++)\n" " {\n" " //int i = checkObjects[a];\n" -"\n" " unsigned int curMinIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].x;\n" " unsigned int curMaxIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].y;\n" " unsigned int prevMinIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].x;\n" " int dmin = curMinIndex - prevMinIndex;\n" " \n" " unsigned int prevMaxIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].y;\n" -"\n" " int dmax = curMaxIndex - prevMaxIndex;\n" " \n" " for (int otherbuffer = 0;otherbuffer<2;otherbuffer++)\n" @@ -132,23 +117,18 @@ static const char* sapFastCL= \ " if (otherIndex!=i)\n" " {\n" " bool otherIsMax = ((otherIndex2&1)!=0);\n" -"\n" " if (otherIsMax)\n" " {\n" " \n" " bool overlap = true;\n" -"\n" " for (int ax=0;ax<3;ax++)\n" " {\n" " if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||\n" " (objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x))\n" " overlap=false;\n" " }\n" -"\n" " // b3Assert(overlap2==overlap);\n" -"\n" " bool prevOverlap = true;\n" -"\n" " for (int ax=0;ax<3;ax++)\n" " {\n" " if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||\n" @@ -156,11 +136,8 @@ static const char* sapFastCL= \ " prevOverlap=false;\n" " }\n" " \n" -"\n" " //b3Assert(overlap==overlap2);\n" " \n" -"\n" -"\n" " if (dmin<0)\n" " {\n" " if (overlap && !prevOverlap)\n" @@ -185,10 +162,8 @@ static const char* sapFastCL= \ " addedHostPairsGPU[curPair].y = newPair.y;\n" " addedHostPairsGPU[curPair].z = NEW_PAIR_MARKER;\n" " addedHostPairsGPU[curPair].w = NEW_PAIR_MARKER;\n" -"\n" " }\n" " }\n" -"\n" " }\n" " } \n" " else\n" @@ -216,7 +191,6 @@ static const char* sapFastCL= \ " removedHostPairsGPU[curPair].y = removedPair.y;\n" " removedHostPairsGPU[curPair].z = REMOVED_PAIR_MARKER;\n" " removedHostPairsGPU[curPair].w = REMOVED_PAIR_MARKER;\n" -"\n" " }\n" " }\n" " }\n" @@ -240,7 +214,6 @@ static const char* sapFastCL= \ " {\n" " \n" " bool overlap = true;\n" -"\n" " for (int ax=0;ax<3;ax++)\n" " {\n" " if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||\n" @@ -248,9 +221,7 @@ static const char* sapFastCL= \ " overlap=false;\n" " }\n" " //b3Assert(overlap2==overlap);\n" -"\n" " bool prevOverlap = true;\n" -"\n" " for (int ax=0;ax<3;ax++)\n" " {\n" " if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||\n" @@ -258,7 +229,6 @@ static const char* sapFastCL= \ " prevOverlap=false;\n" " }\n" " \n" -"\n" " if (dmax>0)\n" " {\n" " if (overlap && !prevOverlap)\n" @@ -283,7 +253,6 @@ static const char* sapFastCL= \ " addedHostPairsGPU[curPair].y = newPair.y;\n" " addedHostPairsGPU[curPair].z = NEW_PAIR_MARKER;\n" " addedHostPairsGPU[curPair].w = NEW_PAIR_MARKER;\n" -"\n" " }\n" " }\n" " \n" @@ -326,16 +295,12 @@ static const char* sapFastCL= \ " }\n" " }//for (int otherbuffer\n" " }//for (int axis=0;\n" -"\n" -"\n" "}\n" -"\n" "//computePairsKernelBatchWrite\n" "__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" "{\n" " int i = get_global_id(0);\n" " int localId = get_local_id(0);\n" -"\n" " __local int numActiveWgItems[1];\n" " __local int breakRequest[1];\n" " __local btAabbCL localAabbs[128];// = aabbs[i];\n" @@ -411,7 +376,6 @@ static const char* sapFastCL= \ " tmpPair.y = myPairs[p].y;\n" " tmpPair.z = NEW_PAIR_MARKER;\n" " tmpPair.w = NEW_PAIR_MARKER;\n" -"\n" " pairsOut[curPair+p] = tmpPair; //flush to main memory\n" " }\n" " }\n" diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h index 3c06cf3f1..e627f0681 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h @@ -2,21 +2,17 @@ static const char* sapCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Erwin Coumans\n" -"\n" "#define NEW_PAIR_MARKER -1\n" -"\n" "typedef struct \n" "{\n" " union\n" @@ -32,8 +28,6 @@ static const char* sapCL= \ " int m_maxIndices[4];\n" " };\n" "} btAabbCL;\n" -"\n" -"\n" "/// conservative test for overlap between two aabbs\n" "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" @@ -53,7 +47,6 @@ static const char* sapCL= \ " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" " return overlap;\n" "}\n" -"\n" "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" "{\n" @@ -63,18 +56,14 @@ static const char* sapCL= \ " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" " return overlap;\n" "}\n" -"\n" -"\n" "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numUnsortedAabbs)\n" " return;\n" -"\n" " int j = get_global_id(1);\n" " if (j>=numSortedAabbs)\n" " return;\n" -"\n" " if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))\n" " {\n" " int4 myPair;\n" @@ -92,8 +81,6 @@ static const char* sapCL= \ " myPair.y = yIndex;\n" " myPair.z = NEW_PAIR_MARKER;\n" " myPair.w = NEW_PAIR_MARKER;\n" -"\n" -"\n" " int curPair = atomic_inc (pairCount);\n" " if (curPair=numObjects)\n" " return;\n" -"\n" " sortedAabbs[i] = aabbs[sortData[i].y];\n" "}\n" -"\n" -"\n" -"\n" "__kernel void prepareSumVarianceKernel( __global const btAabbCL* aabbs, __global float4* sum, __global float4* sum2,int numAabbs)\n" "{\n" " int i = get_global_id(0);\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp index d33cf9fdb..c90c4c685 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp @@ -1,3 +1,4 @@ + #if 0 /* Bullet Continuous Collision Detection and Physics Library @@ -18,77 +19,22 @@ subject to the following restrictions: #include "b3ContactCache.h" #include "Bullet3Common/b3Transform.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" b3Scalar gContactBreakingThreshold = b3Scalar(0.02); -b3Scalar m_contactBreakingThreshold; -b3Scalar m_contactProcessingThreshold; ///gContactCalcArea3Points will approximate the convex hull area using 3 points ///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower bool gContactCalcArea3Points = true; -b3ContactCache::b3ContactCache() -:m_index1a(0) -{ -} - - -#ifdef DEBUG_PERSISTENCY -#include -void b3ContactCache::DebugPersistency() -{ - int i; - printf("DebugPersistency : numPoints %d\n",m_cachedPoints); - for (i=0;i1) - printf("error in clearUserCache\n"); - } - } - btAssert(occurance<=0); -#endif //DEBUG_PERSISTENCY - - if (pt.m_userPersistentData && gContactDestroyedCallback) - { - (*gContactDestroyedCallback)(pt.m_userPersistentData); - pt.m_userPersistentData = 0; - } - -#ifdef DEBUG_PERSISTENCY - DebugPersistency(); -#endif - } - - -} - -static inline b3Scalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,const btVector3 &p2,const btVector3 &p3) +static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) { // It calculates possible 3 area constructed from random 4 points and returns the biggest one. - btVector3 a[3],b[3]; + b3Vector3 a[3],b[3]; a[0] = p0 - p1; a[1] = p0 - p2; a[2] = p0 - p3; @@ -97,14 +43,16 @@ static inline b3Scalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,c b[2] = p1 - p2; //todo: Following 3 cross production can be easily optimized by SIMD. - btVector3 tmp0 = a[0].cross(b[0]); - btVector3 tmp1 = a[1].cross(b[1]); - btVector3 tmp2 = a[2].cross(b[2]); + b3Vector3 tmp0 = a[0].cross(b[0]); + b3Vector3 tmp1 = a[1].cross(b[1]); + b3Vector3 tmp2 = a[2].cross(b[2]); - return btMax(btMax(tmp0.length2(),tmp1.length2()),tmp2.length2()); + return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); } +#if 0 -int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt) +//using localPointA for all points +int b3ContactCache::sortCachedPoints(const b3Vector3& pt) { //calculate 4 possible cases areas, and take biggest area //also need to keep 'deepest' @@ -129,32 +77,32 @@ int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt) { if (maxPenetrationIndex != 0) { - btVector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; - btVector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - btVector3 cross = a0.cross(b0); + b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; + b3Vector3 cross = a0.cross(b0); res0 = cross.length2(); } if (maxPenetrationIndex != 1) { - btVector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; - btVector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - btVector3 cross = a1.cross(b1); + b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; + b3Vector3 cross = a1.cross(b1); res1 = cross.length2(); } if (maxPenetrationIndex != 2) { - btVector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; - btVector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; - btVector3 cross = a2.cross(b2); + b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 cross = a2.cross(b2); res2 = cross.length2(); } if (maxPenetrationIndex != 3) { - btVector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; - btVector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; - btVector3 cross = a3.cross(b3); + b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 cross = a3.cross(b3); res3 = cross.length2(); } } @@ -176,23 +124,23 @@ int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt) res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA); } } - btVector4 maxvec(res0,res1,res2,res3); + b3Vector4 maxvec(res0,res1,res2,res3); int biggestarea = maxvec.closestAxis4(); return biggestarea; } -int b3ContactCache::getCacheEntry(const btManifoldPoint& newPoint) const +int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const { b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold(); int size = getNumContacts(); int nearestPoint = -1; for( int i = 0; i < size; i++ ) { - const btManifoldPoint &mp = m_pointCache[i]; + const b3Vector3 &mp = m_pointCache[i]; - btVector3 diffA = mp.m_localPointA- newPoint.m_localPointA; + b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA; const b3Scalar distToManiPoint = diffA.dot(diffA); if( distToManiPoint < shortestDist ) { @@ -203,12 +151,9 @@ int b3ContactCache::getCacheEntry(const btManifoldPoint& newPoint) const return nearestPoint; } -int b3ContactCache::addManifoldPoint(const btManifoldPoint& newPoint, bool isPredictive) +int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint) { - if (!isPredictive) - { - btAssert(validContactDistance(newPoint)); - } + b3Assert(validContactDistance(newPoint)); int insertIndex = getNumContacts(); if (insertIndex == MANIFOLD_CACHE_SIZE) @@ -230,73 +175,80 @@ int b3ContactCache::addManifoldPoint(const btManifoldPoint& newPoint, bool isPre if (insertIndex<0) insertIndex=0; - btAssert(m_pointCache[insertIndex].m_userPersistentData==0); + //b3Assert(m_pointCache[insertIndex].m_userPersistentData==0); m_pointCache[insertIndex] = newPoint; return insertIndex; } -b3Scalar b3ContactCache::getContactBreakingThreshold() const +#endif + +bool b3ContactCache::validContactDistance(const b3Vector3& pt) { - return m_contactBreakingThreshold; + return pt.w <= gContactBreakingThreshold; } - - -void b3ContactCache::refreshContactPoints(const btTransform& trA,const btTransform& trB) +void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i) { - int i; -#ifdef DEBUG_PERSISTENCY - printf("refreshContactPoints posA = (%f,%f,%f) posB = (%f,%f,%f)\n", - trA.getOrigin().getX(), - trA.getOrigin().getY(), - trA.getOrigin().getZ(), - trB.getOrigin().getX(), - trB.getOrigin().getY(), - trB.getOrigin().getZ()); -#endif //DEBUG_PERSISTENCY - /// first refresh worldspace positions and distance - for (i=getNumContacts()-1;i>=0;i--) + int numContacts = b3Contact4Data_getNumPoints(&newContactCache); + if (i!=(numContacts-1)) { - btManifoldPoint &manifoldPoint = m_pointCache[i]; - manifoldPoint.m_positionWorldOnA = trA( manifoldPoint.m_localPointA ); - manifoldPoint.m_positionWorldOnB = trB( manifoldPoint.m_localPointB ); - manifoldPoint.m_distance1 = (manifoldPoint.m_positionWorldOnA - manifoldPoint.m_positionWorldOnB).dot(manifoldPoint.m_normalWorldOnB); - manifoldPoint.m_lifeTime++; + b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]); + b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]); + b3Swap(newContactCache.m_worldPos[i],newContactCache.m_worldPos[numContacts-1]); + } + b3Contact4Data_setNumPoints(&newContactCache,numContacts-1); + +} + +void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts) +{ + + int numContacts = b3Contact4Data_getNumPoints(&contacts); + + + int i; + /// first refresh worldspace positions and distance + for (i=numContacts-1;i>=0;i--) + { + b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); + b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); + contacts.m_worldPos[i] = worldPosB; + float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormal); + contacts.m_worldPos[i].w = distance; } /// then b3Scalar distance2d; - btVector3 projectedDifference,projectedPoint; - for (i=getNumContacts()-1;i>=0;i--) + b3Vector3 projectedDifference,projectedPoint; + for (i=numContacts-1;i>=0;i--) { - - btManifoldPoint &manifoldPoint = m_pointCache[i]; + b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); + b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); + b3Vector3&pt = contacts.m_worldPos[i]; //contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction) - if (!validContactDistance(manifoldPoint)) + if (!validContactDistance(pt)) { - removeContactPoint(i); + removeContactPoint(contacts,i); } else { //contact also becomes invalid when relative movement orthogonal to normal exceeds margin - projectedPoint = manifoldPoint.m_positionWorldOnA - manifoldPoint.m_normalWorldOnB * manifoldPoint.m_distance1; - projectedDifference = manifoldPoint.m_positionWorldOnB - projectedPoint; + projectedPoint = contacts.m_worldPos[i] - contacts.m_worldNormal * contacts.m_worldPos[i].w; + projectedDifference = contacts.m_worldPos[i] - projectedPoint; distance2d = projectedDifference.dot(projectedDifference); - if (distance2d > getContactBreakingThreshold()*getContactBreakingThreshold() ) + if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold ) { - removeContactPoint(i); + removeContactPoint(contacts,i); } else { - //contact point processed callback - if (gContactProcessedCallback) - (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); + ////contact point processed callback + //if (gContactProcessedCallback) + // (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); } } } -#ifdef DEBUG_PERSISTENCY - DebugPersistency(); -#endif // -} + +} diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h index 7b3a19ea9..d6c9b0a07 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h @@ -1,7 +1,7 @@ /* Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ +Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. @@ -18,23 +18,15 @@ subject to the following restrictions: #define B3_CONTACT_CACHE_H -#include "LinearMath/btVector3.h" -#include "LinearMath/btTransform.h" -#include "btManifoldPoint.h" -class btCollisionObject; -#include "LinearMath/btAlignedAllocator.h" +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Common/b3AlignedAllocator.h" -struct btCollisionResult; ///maximum contact breaking and merging threshold extern b3Scalar gContactBreakingThreshold; -//the enum starts at 1024 to avoid type conflicts with btTypedConstraint -enum btContactManifoldTypes -{ - MIN_CONTACT_MANIFOLD_TYPE = 1024, - BT_PERSISTENT_MANIFOLD_TYPE -}; + #define MANIFOLD_CACHE_SIZE 4 @@ -45,8 +37,6 @@ enum btContactManifoldTypes ///reduces the cache to 4 points, when more then 4 points are added, using following rules: ///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points ///note that some pairs of objects might have more then one contact manifold. - - B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache { @@ -54,163 +44,36 @@ B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache /// sort cached points so most isolated points come first - int sortCachedPoints(const btManifoldPoint& pt); + int sortCachedPoints(const b3Vector3& pt); - int findContactPoint(const btManifoldPoint* unUsed, int numUnused,const btManifoldPoint& pt); + public: - BT_DECLARE_ALIGNED_ALLOCATOR(); - - - int m_index1a; - - b3ContactCache(); - - b3ContactCache(const btCollisionObject* body0,const btCollisionObject* body1,int , b3Scalar contactBreakingThreshold,b3Scalar contactProcessingThreshold) - : btTypedObject(BT_PERSISTENT_MANIFOLD_TYPE), - m_body0(body0),m_body1(body1),m_cachedPoints(0), - m_contactBreakingThreshold(contactBreakingThreshold), - m_contactProcessingThreshold(contactProcessingThreshold) - { - } - - B3_FORCE_INLINE const btCollisionObject* getBody0() const { return m_body0;} - B3_FORCE_INLINE const btCollisionObject* getBody1() const { return m_body1;} - - void setBodies(const btCollisionObject* body0,const btCollisionObject* body1) - { - m_body0 = body0; - m_body1 = body1; - } - - void clearUserCache(btManifoldPoint& pt); - -#ifdef DEBUG_PERSISTENCY - void DebugPersistency(); -#endif // - - B3_FORCE_INLINE int getNumContacts() const { return m_cachedPoints;} - /// the setNumContacts API is usually not used, except when you gather/fill all contacts manually - void setNumContacts(int cachedPoints) - { - m_cachedPoints = cachedPoints; - } - - - B3_FORCE_INLINE const btManifoldPoint& getContactPoint(int index) const - { - btAssert(index < m_cachedPoints); - return m_pointCache[index]; - } - - B3_FORCE_INLINE btManifoldPoint& getContactPoint(int index) - { - btAssert(index < m_cachedPoints); - return m_pointCache[index]; - } + B3_DECLARE_ALIGNED_ALLOCATOR(); - void setContactBreakingThreshold(b3Scalar contactBreakingThreshold) - { - m_contactBreakingThreshold = contactBreakingThreshold; - } - - void setContactProcessingThreshold(b3Scalar contactProcessingThreshold) - { - m_contactProcessingThreshold = contactProcessingThreshold; - } - + int addManifoldPoint( const b3Vector3& newPoint); - - int getCacheEntry(const btManifoldPoint& newPoint) const; - - int addManifoldPoint( const btManifoldPoint& newPoint, bool isPredictive=false); - - void removeContactPoint (int index) + /*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex) { - clearUserCache(m_pointCache[index]); - - int lastUsedIndex = getNumContacts() - 1; -// m_pointCache[index] = m_pointCache[lastUsedIndex]; - if(index != lastUsedIndex) - { - m_pointCache[index] = m_pointCache[lastUsedIndex]; - //get rid of duplicated userPersistentData pointer - m_pointCache[lastUsedIndex].m_userPersistentData = 0; - m_pointCache[lastUsedIndex].m_appliedImpulse = 0.f; - m_pointCache[lastUsedIndex].m_lateralFrictionInitialized = false; - m_pointCache[lastUsedIndex].m_appliedImpulseLateral1 = 0.f; - m_pointCache[lastUsedIndex].m_appliedImpulseLateral2 = 0.f; - m_pointCache[lastUsedIndex].m_lifeTime = 0; - } - - btAssert(m_pointCache[lastUsedIndex].m_userPersistentData==0); - m_cachedPoints--; - } - void replaceContactPoint(const btManifoldPoint& newPoint,int insertIndex) - { - btAssert(validContactDistance(newPoint)); - -#define MAINTAIN_PERSISTENCY 1 -#ifdef MAINTAIN_PERSISTENCY - int lifeTime = m_pointCache[insertIndex].getLifeTime(); - b3Scalar appliedImpulse = m_pointCache[insertIndex].m_appliedImpulse; - b3Scalar appliedLateralImpulse1 = m_pointCache[insertIndex].m_appliedImpulseLateral1; - b3Scalar appliedLateralImpulse2 = m_pointCache[insertIndex].m_appliedImpulseLateral2; -// bool isLateralFrictionInitialized = m_pointCache[insertIndex].m_lateralFrictionInitialized; - - - - btAssert(lifeTime>=0); - void* cache = m_pointCache[insertIndex].m_userPersistentData; - + b3Assert(validContactDistance(newPoint)); m_pointCache[insertIndex] = newPoint; - - m_pointCache[insertIndex].m_userPersistentData = cache; - m_pointCache[insertIndex].m_appliedImpulse = appliedImpulse; - m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1; - m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2; - - m_pointCache[insertIndex].m_appliedImpulse = appliedImpulse; - m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1; - m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2; - - - m_pointCache[insertIndex].m_lifeTime = lifeTime; -#else - clearUserCache(m_pointCache[insertIndex]); - m_pointCache[insertIndex] = newPoint; - -#endif } + */ + - bool validContactDistance(const btManifoldPoint& pt) const - { - return pt.m_distance1 <= getContactBreakingThreshold(); - } + static bool validContactDistance(const b3Vector3& pt); + /// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin - void refreshContactPoints( const btTransform& trA,const btTransform& trB); + static void refreshContactPoints( const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache); + static void removeContactPoint(struct b3Contact4Data& newContactCache,int i); - B3_FORCE_INLINE void clearManifold() - { - int i; - for (i=0;i//memcpy #include "b3ConvexPolyhedronCL.h" - +#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" typedef b3AlignedObjectArray b3VertexArray; @@ -1603,7 +1604,7 @@ int computeContactConvexConvex( b3AlignedObjectArray& pairs, transB.setRotation(rigidBodies[bodyIndexB].m_quat); float maximumDistanceSquared = 1e30f; - b3Vector3 resultPointOnB; + b3Vector3 resultPointOnBWorld; b3Vector3 sepAxis2(0,1,0); b3Scalar distance2 = 1e30f; @@ -1618,7 +1619,7 @@ int computeContactConvexConvex( b3AlignedObjectArray& pairs, maximumDistanceSquared, sepAxis2, distance2, - resultPointOnB); + resultPointOnBWorld); if (result2) @@ -1627,31 +1628,58 @@ int computeContactConvexConvex( b3AlignedObjectArray& pairs, { contactIndex = nGlobalContactsOut; globalContactsOut.expand(); - b3Contact4& contact = globalContactsOut.at(nGlobalContactsOut); - contact.m_batchIdx = 0;//i; - contact.m_bodyAPtrAndSignBit = (rigidBodies.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; - contact.m_bodyBPtrAndSignBit = (rigidBodies.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + b3Contact4& newContact = globalContactsOut.at(nGlobalContactsOut); + newContact.m_batchIdx = 0;//i; + newContact.m_bodyAPtrAndSignBit = (rigidBodies.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; + newContact.m_bodyBPtrAndSignBit = (rigidBodies.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; - contact.m_frictionCoeffCmp = 45874; - contact.m_restituitionCoeffCmp = 0; + newContact.m_frictionCoeffCmp = 45874; + newContact.m_restituitionCoeffCmp = 0; - int numPoints = 1; - if (pairs[pairIndex].z>=0) + int numPoints = 0; + if (0)//pairs[pairIndex].z>=0) { - printf("add existing points?\n"); + //printf("add existing points?\n"); + //refresh - } - for (int p=0;pgetCacheEntry(newPt); + if (insertIndex >= 0) + { + //const btManifoldPoint& oldPoint = m_manifoldPtr->getContactPoint(insertIndex); + m_manifoldPtr->replaceContactPoint(newPt,insertIndex); + } else + { + insertIndex = m_manifoldPtr->addManifoldPoint(newPt); + } + */ + + int p=numPoints; + if (numPoints<3) + { + numPoints++; + } + { + resultPointOnBWorld.w = distance2; + newContact.m_worldPos[p] = resultPointOnBWorld; + b3Vector3 resultPointOnAWorld = resultPointOnBWorld+distance2*sepAxis2; + //newContact.m_localPosA[p] = transA.inverse()*resultPointOnAWorld; + // newContact.m_localPosB[p] = transB.inverse()*resultPointOnBWorld; + newContact.m_worldNormal = sepAxis2; } //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); - contact.m_worldNormal.w = (b3Scalar)numPoints; + newContact.m_worldNormal.w = (b3Scalar)numPoints; nGlobalContactsOut++; } else { @@ -1797,7 +1825,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* return; -//#define CHECK_ON_HOST + #ifdef CHECK_ON_HOST b3AlignedObjectArray hostAabbs; clAabbsWS.copyToHost(hostAabbs); @@ -1909,9 +1937,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, - oldHostContacts); + int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, + // oldHostContacts); + if (contactIndex>=0) { diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h index 23f4865d8..d51084e41 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -2,17 +2,13 @@ static const char* bvhTraversalKernelCL= \ "//keep this enum in sync with the CPU version (in btCollidable.h)\n" "//written by Erwin Coumans\n" -"\n" "#define SHAPE_CONVEX_HULL 3\n" "#define SHAPE_CONCAVE_TRIMESH 5\n" "#define TRIANGLE_NUM_CONVEX_FACES 5\n" "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" "#define SHAPE_SPHERE 7\n" -"\n" "typedef unsigned int u32;\n" -"\n" "#define MAX_NUM_PARTS_IN_BITS 10\n" -"\n" "///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" "///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" "typedef struct\n" @@ -23,7 +19,6 @@ static const char* bvhTraversalKernelCL= \ " //4 bytes\n" " int m_escapeIndexOrTriangleIndex;\n" "} btQuantizedBvhNode;\n" -"\n" "typedef struct\n" "{\n" " float4 m_aabbMin;\n" @@ -33,9 +28,7 @@ static const char* bvhTraversalKernelCL= \ " int m_numSubTrees;\n" " int m_nodeOffset;\n" " int m_subTreeOffset;\n" -"\n" "} b3BvhInfo;\n" -"\n" "/*\n" " bool isLeafNode() const\n" " {\n" @@ -62,7 +55,6 @@ static const char* bvhTraversalKernelCL= \ " return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n" " }\n" "*/\n" -"\n" "int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" "{\n" " unsigned int x=0;\n" @@ -70,7 +62,6 @@ static const char* bvhTraversalKernelCL= \ " // Get only the lower bits where the triangle index is stored\n" " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" "}\n" -"\n" "int isLeaf(const btQuantizedBvhNode* rootNode)\n" "{\n" " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" @@ -81,7 +72,6 @@ static const char* bvhTraversalKernelCL= \ "{\n" " return -rootNode->m_escapeIndexOrTriangleIndex;\n" "}\n" -"\n" "typedef struct\n" "{\n" " //12 bytes\n" @@ -93,7 +83,6 @@ static const char* bvhTraversalKernelCL= \ " int m_subtreeSize;\n" " int m_padding[3];\n" "} btBvhSubtreeInfo;\n" -"\n" "///keep this in sync with btCollidable.h\n" "typedef struct\n" "{\n" @@ -103,7 +92,6 @@ static const char* bvhTraversalKernelCL= \ " int m_shapeIndex;\n" " \n" "} btCollidableGpu;\n" -"\n" "typedef struct\n" "{\n" " float4 m_childPosition;\n" @@ -113,21 +101,17 @@ static const char* bvhTraversalKernelCL= \ " int m_unused1;\n" " int m_unused2;\n" "} btGpuChildShape;\n" -"\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " float4 m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_collidableIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} BodyData;\n" -"\n" "typedef struct \n" "{\n" " union\n" @@ -143,8 +127,6 @@ static const char* bvhTraversalKernelCL= \ " int m_maxIndices[4];\n" " };\n" "} btAabbCL;\n" -"\n" -"\n" "int testQuantizedAabbAgainstQuantizedAabb(\n" " const unsigned short int* aabbMin1,\n" " const unsigned short int* aabbMax1,\n" @@ -170,13 +152,10 @@ static const char* bvhTraversalKernelCL= \ " //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" " //return overlap;\n" "}\n" -"\n" -"\n" "void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" "{\n" " float4 clampedPoint = max(point2,bvhAabbMin);\n" " clampedPoint = min (clampedPoint, bvhAabbMax);\n" -"\n" " float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" " if (isMax)\n" " {\n" @@ -189,10 +168,7 @@ static const char* bvhTraversalKernelCL= \ " out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" " out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" " }\n" -"\n" "}\n" -"\n" -"\n" "// work-in-progress\n" "__kernel void bvhTraversalKernel( __global const int4* pairs, \n" " __global const BodyData* rigidBodies, \n" @@ -223,7 +199,6 @@ static const char* bvhTraversalKernelCL= \ " \n" " if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" " return;\n" -"\n" " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" " \n" " if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" @@ -231,9 +206,7 @@ static const char* bvhTraversalKernelCL= \ " shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" " )\n" " return;\n" -"\n" " b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" -"\n" " float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" " float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" " float4 bvhQuantization = bvhInfo.m_quantization;\n" @@ -241,7 +214,6 @@ static const char* bvhTraversalKernelCL= \ " __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" " __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" " \n" -"\n" " unsigned short int quantizedQueryAabbMin[3];\n" " unsigned short int quantizedQueryAabbMax[3];\n" " quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" @@ -308,6 +280,5 @@ static const char* bvhTraversalKernelCL= \ " }\n" " }\n" " }\n" -"\n" "}\n" ; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl index 4d79ffb33..f8297a696 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl @@ -345,7 +345,7 @@ void computeContactSphereConvex(int pairIndex, __global const float4* convexVertices, __global const int* convexIndices, __global const btGpuFace* faces, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int maxContactCapacity, float4 spherePos2, @@ -466,9 +466,9 @@ void computeContactSphereConvex(int pairIndex, if (1)//dstIdx < maxContactCapacity) { - __global b3Contact4Data* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = normalOnSurfaceB1; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; @@ -590,7 +590,7 @@ int computeContactPlaneConvex(int pairIndex, __global const float4* convexVertices, __global const int* convexIndices, __global const btGpuFace* faces, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int maxContactCapacity, float4 posB, @@ -692,11 +692,11 @@ int computeContactPlaneConvex(int pairIndex, if (dstIdx < maxContactCapacity) { resultIndex = dstIdx; - __global b3Contact4Data* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = planeNormalWorld; //c->setFrictionCoeff(0.7); //c->setRestituitionCoeff(0.f); - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; @@ -732,7 +732,7 @@ void computeContactPlaneSphere(int pairIndex, __global const BodyData* rigidBodies, __global const btCollidableGpu* collidables, __global const btGpuFace* faces, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int maxContactCapacity) { @@ -775,9 +775,9 @@ void computeContactPlaneSphere(int pairIndex, if (dstIdx < maxContactCapacity) { - __global b3Contact4Data* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = normalOnSurfaceB1; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; @@ -798,7 +798,7 @@ __kernel void primitiveContactsKernel( __global int4* pairs, __global const float4* uniqueEdges, __global const btGpuFace* faces, __global const int* indices, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numPairs, int maxContactCapacity) { @@ -953,9 +953,9 @@ __kernel void primitiveContactsKernel( __global int4* pairs, if (dstIdx < maxContactCapacity) { - __global b3Contact4Data* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = -normalOnSurfaceB; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = pairs[pairIndex].x; int bodyB = pairs[pairIndex].y; @@ -987,7 +987,7 @@ __kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCo __global const int* indices, __global btAabbCL* aabbs, __global const btGpuChildShape* gpuChildShapes, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numCompoundPairs, int maxContactCapacity ) @@ -1166,7 +1166,7 @@ void computeContactSphereTriangle(int pairIndex, __global const BodyData* rigidBodies, __global const btCollidableGpu* collidables, const float4* triangleVertices, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int maxContactCapacity, float4 spherePos2, @@ -1293,9 +1293,9 @@ void computeContactSphereTriangle(int pairIndex, if (dstIdx < maxContactCapacity) { - __global b3Contact4Data* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = normalOnSurfaceB1; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; @@ -1325,7 +1325,7 @@ __kernel void findConcaveSphereContactsKernel( __global int4* concavePairs, __global const btGpuFace* faces, __global const int* indices, __global btAabbCL* aabbs, - __global b3Contact4Data* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numConcavePairs, int maxContactCapacity ) diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index b4807312e..089e02832 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -1,67 +1,62 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project static const char* primitiveContactsKernelsCL= \ -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" "struct MyTest\n" "{\n" " int bla;\n" "};\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" " typedef float4 b3Float4;\n" -"\n" -"\n" -"\n" -"\n" -"typedef struct\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" "{\n" " b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" " b3Float4 m_worldNormal; // w: m_nPoints\n" -" unsigned int m_coeffs;\n" -" unsigned int m_batchIdx;\n" -"\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" " int m_bodyBPtrAndSignBit;\n" -"\n" " int m_childIndexA;\n" " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -"\n" -"} b3Contact4Data;\n" -"\n" -"\n" -"\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#define SHAPE_CONVEX_HULL 3\n" "#define SHAPE_PLANE 4\n" "#define SHAPE_CONCAVE_TRIMESH 5\n" "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" "#define SHAPE_SPHERE 7\n" -"\n" -"\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile __global int*\n" "#endif\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -75,15 +70,9 @@ static const char* primitiveContactsKernelsCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" "typedef unsigned int u32;\n" -"\n" -"\n" -"\n" -"\n" "typedef struct \n" "{\n" " union\n" @@ -99,7 +88,6 @@ static const char* primitiveContactsKernelsCL= \ " int m_maxIndices[4];\n" " };\n" "} btAabbCL;\n" -"\n" "///keep this in sync with btCollidable.h\n" "typedef struct\n" "{\n" @@ -109,7 +97,6 @@ static const char* primitiveContactsKernelsCL= \ " int m_shapeIndex;\n" " \n" "} btCollidableGpu;\n" -"\n" "typedef struct\n" "{\n" " float4 m_childPosition;\n" @@ -119,23 +106,18 @@ static const char* primitiveContactsKernelsCL= \ " int m_unused1;\n" " int m_unused2;\n" "} btGpuChildShape;\n" -"\n" "#define GET_NPOINTS(x) (x).m_worldNormal.w\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " float4 m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_collidableIdx; \n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} BodyData;\n" -"\n" -"\n" "typedef struct \n" "{\n" " float4 m_localCenter;\n" @@ -152,48 +134,37 @@ static const char* primitiveContactsKernelsCL= \ " int m_uniqueEdgesOffset;\n" " int m_numUniqueEdges;\n" " int m_unused;\n" -"\n" "} ConvexPolyhedronCL;\n" -"\n" "typedef struct\n" "{\n" " float4 m_plane;\n" " int m_indexOffset;\n" " int m_numIndices;\n" "} btGpuFace;\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "__inline\n" "float fastDiv(float numerator, float denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "// return numerator/denominator; \n" "}\n" -"\n" "__inline\n" "float4 fastDiv4(float4 numerator, float4 denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "}\n" -"\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "//#define dot3F4 dot\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -201,35 +172,23 @@ static const char* primitiveContactsKernelsCL= \ " float4 b1 = make_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -240,7 +199,6 @@ static const char* primitiveContactsKernelsCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -257,32 +215,27 @@ static const char* primitiveContactsKernelsCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtInvRotate(const Quaternion q, float4 vec)\n" "{\n" " return qtRotate( qtInvert( q ), vec );\n" "}\n" -"\n" "__inline\n" "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" "{\n" " return qtRotate( *orientation, *p ) + (*translation);\n" "}\n" -"\n" "void trInverse(float4 translationIn, Quaternion orientationIn,\n" " float4* translationOut, Quaternion* orientationOut)\n" "{\n" " *orientationOut = qtInvert(orientationIn);\n" " *translationOut = qtRotate(*orientationOut, -translationIn);\n" "}\n" -"\n" "void trMul(float4 translationA, Quaternion orientationA,\n" " float4 translationB, Quaternion orientationB,\n" " float4* translationOut, Quaternion* orientationOut)\n" @@ -290,17 +243,12 @@ static const char* primitiveContactsKernelsCL= \ " *orientationOut = qtMul(orientationA,orientationB);\n" " *translationOut = transform(&translationB,&translationA,&orientationA);\n" "}\n" -"\n" -"\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" " return fastNormalize4( n );\n" "}\n" -"\n" -"\n" "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" "{\n" " return make_float4( a.x + (b.x - a.x) * t,\n" @@ -308,8 +256,6 @@ static const char* primitiveContactsKernelsCL= \ " a.z + (b.z - a.z) * t,\n" " 0.f);\n" "}\n" -"\n" -"\n" "float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" "{\n" " float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" @@ -317,9 +263,6 @@ static const char* primitiveContactsKernelsCL= \ " *closestPointOnFace = point - dist * n;\n" " return dist;\n" "}\n" -"\n" -"\n" -"\n" "inline bool IsPointInPolygon(float4 p, \n" " const btGpuFace* face,\n" " __global const float4* baseVertex,\n" @@ -331,17 +274,14 @@ static const char* primitiveContactsKernelsCL= \ " float4 ab;\n" " float4 ap;\n" " float4 v;\n" -"\n" " float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" " \n" " if (face->m_numIndices<2)\n" " return false;\n" -"\n" " \n" " float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" " \n" " b = v0;\n" -"\n" " for(unsigned i=0; i != face->m_numIndices; ++i)\n" " {\n" " a = b;\n" @@ -350,7 +290,6 @@ static const char* primitiveContactsKernelsCL= \ " ab = b-a;\n" " ap = p-a;\n" " v = cross3(ab,plane);\n" -"\n" " if (dot(ap, v) > 0.f)\n" " {\n" " float ab_m2 = dot(ab, ab);\n" @@ -375,10 +314,6 @@ static const char* primitiveContactsKernelsCL= \ " }\n" " return true;\n" "}\n" -"\n" -"\n" -"\n" -"\n" "void computeContactSphereConvex(int pairIndex,\n" " int bodyIndexA, int bodyIndexB, \n" " int collidableIndexA, int collidableIndexB, \n" @@ -388,7 +323,7 @@ static const char* primitiveContactsKernelsCL= \ " __global const float4* convexVertices,\n" " __global const int* convexIndices,\n" " __global const btGpuFace* faces,\n" -" __global b3Contact4Data* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int maxContactCapacity,\n" " float4 spherePos2,\n" @@ -397,25 +332,19 @@ static const char* primitiveContactsKernelsCL= \ " float4 quat\n" " )\n" "{\n" -"\n" " float4 invPos;\n" " float4 invOrn;\n" -"\n" " trInverse(pos,quat, &invPos,&invOrn);\n" -"\n" " float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -"\n" " int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" " int numFaces = convexShapes[shapeIndex].m_numFaces;\n" " float4 closestPnt = (float4)(0, 0, 0, 0);\n" " float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" " float minDist = -1000000.f;\n" " bool bCollide = true;\n" -"\n" " for ( int f = 0; f < numFaces; f++ )\n" " {\n" " btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" -"\n" " // set up a plane equation \n" " float4 planeEqn;\n" " float4 n1 = face.m_plane;\n" @@ -427,21 +356,17 @@ static const char* primitiveContactsKernelsCL= \ " // compute a signed distance from the vertex in cloth to the face of rigidbody.\n" " float4 pntReturn;\n" " float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" -"\n" " // If the distance is positive, the plane is a separating plane. \n" " if ( dist > radius )\n" " {\n" " bCollide = false;\n" " break;\n" " }\n" -"\n" -"\n" " if (dist>0)\n" " {\n" " //might hit an edge or vertex\n" " float4 out;\n" " float4 zeroPos = make_float4(0,0,0,0);\n" -"\n" " bool isInPoly = IsPointInPolygon(spherePos,\n" " &face,\n" " &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" @@ -488,9 +413,7 @@ static const char* primitiveContactsKernelsCL= \ " }\n" " \n" " }\n" -"\n" " \n" -"\n" " if (bCollide && minDist > -10000)\n" " {\n" " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" @@ -500,35 +423,28 @@ static const char* primitiveContactsKernelsCL= \ " if (actualDepth<=0.f)\n" " {\n" " \n" -"\n" " pOnB1.w = actualDepth;\n" -"\n" " int dstIdx;\n" " AppendInc( nGlobalContactsOut, dstIdx );\n" " \n" " \n" " if (1)//dstIdx < maxContactCapacity)\n" " {\n" -" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = normalOnSurfaceB1;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" " c->m_worldPos[0] = pOnB1;\n" " c->m_childIndexA = -1;\n" " c->m_childIndexB = -1;\n" -"\n" " GET_NPOINTS(*c) = 1;\n" " } \n" -"\n" " }\n" " }//if (hasCollision)\n" -"\n" "}\n" " \n" -"\n" -"\n" "int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" "{\n" " if( nPoints == 0 )\n" @@ -621,9 +537,7 @@ static const char* primitiveContactsKernelsCL= \ " return 4;\n" " \n" "}\n" -"\n" "#define MAX_PLANE_CONVEX_POINTS 64\n" -"\n" "int computeContactPlaneConvex(int pairIndex,\n" " int bodyIndexA, int bodyIndexB, \n" " int collidableIndexA, int collidableIndexB, \n" @@ -633,7 +547,7 @@ static const char* primitiveContactsKernelsCL= \ " __global const float4* convexVertices,\n" " __global const int* convexIndices,\n" " __global const btGpuFace* faces,\n" -" __global b3Contact4Data* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int maxContactCapacity,\n" " float4 posB,\n" @@ -641,7 +555,6 @@ static const char* primitiveContactsKernelsCL= \ " )\n" "{\n" " int resultIndex=-1;\n" -"\n" " int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" " __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" " \n" @@ -649,10 +562,8 @@ static const char* primitiveContactsKernelsCL= \ " posA = rigidBodies[bodyIndexA].m_pos;\n" " Quaternion ornA;\n" " ornA = rigidBodies[bodyIndexA].m_quat;\n" -"\n" " int numContactsOut = 0;\n" " int numWorldVertsB1= 0;\n" -"\n" " float4 planeEq;\n" " planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" " float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" @@ -674,18 +585,13 @@ static const char* primitiveContactsKernelsCL= \ " trInverse(posB,ornB,&invPosB,&invOrnB);\n" " trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); \n" " }\n" -"\n" " \n" " float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" " float maxDot = -1e30;\n" " int hitVertex=-1;\n" " float4 hitVtx;\n" -"\n" -"\n" -"\n" " float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" " int numPoints = 0;\n" -"\n" " int4 contactIdx;\n" " contactIdx=make_int4(0,1,2,3);\n" " \n" @@ -694,8 +600,6 @@ static const char* primitiveContactsKernelsCL= \ " {\n" " float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" " float curDot = dot(vtx,planeNormalInConvex);\n" -"\n" -"\n" " if (curDot>maxDot)\n" " {\n" " hitVertex=i;\n" @@ -705,7 +609,6 @@ static const char* primitiveContactsKernelsCL= \ " if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" " numPoints--;\n" " }\n" -"\n" " if (numPoints4)\n" " {\n" " numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" " }\n" -"\n" " if (numReducedPoints>0)\n" " {\n" " int dstIdx;\n" " AppendInc( nGlobalContactsOut, dstIdx );\n" -"\n" " if (dstIdx < maxContactCapacity)\n" " {\n" " resultIndex = dstIdx;\n" -" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = planeNormalWorld;\n" " //c->setFrictionCoeff(0.7);\n" " //c->setRestituitionCoeff(0.f);\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" " c->m_childIndexA = -1;\n" " c->m_childIndexB = -1;\n" -"\n" " switch (numReducedPoints)\n" " {\n" " case 4:\n" @@ -764,18 +662,15 @@ static const char* primitiveContactsKernelsCL= \ " GET_NPOINTS(*c) = numReducedPoints;\n" " }//if (dstIdx < numPairs)\n" " } \n" -"\n" " return resultIndex;\n" "}\n" -"\n" -"\n" "void computeContactPlaneSphere(int pairIndex,\n" " int bodyIndexA, int bodyIndexB, \n" " int collidableIndexA, int collidableIndexB, \n" " __global const BodyData* rigidBodies, \n" " __global const btCollidableGpu* collidables,\n" " __global const btGpuFace* faces,\n" -" __global b3Contact4Data* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int maxContactCapacity)\n" "{\n" @@ -812,15 +707,14 @@ static const char* primitiveContactsKernelsCL= \ " float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" " float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" " pOnB1.w = distance;\n" -"\n" " int dstIdx;\n" " AppendInc( nGlobalContactsOut, dstIdx );\n" " \n" " if (dstIdx < maxContactCapacity)\n" " {\n" -" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = normalOnSurfaceB1;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" @@ -831,8 +725,6 @@ static const char* primitiveContactsKernelsCL= \ " }//if (dstIdx < numPairs)\n" " }//if (hasCollision)\n" "}\n" -"\n" -"\n" "__kernel void primitiveContactsKernel( __global int4* pairs, \n" " __global const BodyData* rigidBodies, \n" " __global const btCollidableGpu* collidables,\n" @@ -841,27 +733,23 @@ static const char* primitiveContactsKernelsCL= \ " __global const float4* uniqueEdges,\n" " __global const btGpuFace* faces,\n" " __global const int* indices,\n" -" __global b3Contact4Data* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int numPairs, int maxContactCapacity)\n" "{\n" -"\n" " int i = get_global_id(0);\n" " int pairIndex = i;\n" " \n" " float4 worldVertsB1[64];\n" " float4 worldVertsB2[64];\n" " int capacityWorldVerts = 64; \n" -"\n" " float4 localContactsOut[64];\n" " int localContactCapacity=64;\n" " \n" " float minDist = -1e30f;\n" " float maxDist = 0.02f;\n" -"\n" " if (i=0)\n" " pairs[pairIndex].z = contactIndex;\n" -"\n" " return;\n" " }\n" -"\n" -"\n" " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" " {\n" -"\n" " float4 posA;\n" " posA = rigidBodies[bodyIndexA].m_pos;\n" " Quaternion ornA;\n" " ornA = rigidBodies[bodyIndexA].m_quat;\n" -"\n" -"\n" " int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" " rigidBodies,collidables,convexShapes,vertices,indices,\n" " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -"\n" " if (contactIndex>=0)\n" " pairs[pairIndex].z = contactIndex;\n" -"\n" " return;\n" " }\n" -"\n" " if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" " {\n" @@ -913,22 +791,15 @@ static const char* primitiveContactsKernelsCL= \ " rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" " return;\n" " }\n" -"\n" -"\n" " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" " {\n" -"\n" -"\n" " computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" " rigidBodies,collidables,\n" " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -"\n" " return;\n" " }\n" -"\n" " \n" -"\n" " \n" " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" " collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" @@ -938,14 +809,11 @@ static const char* primitiveContactsKernelsCL= \ " float sphereRadius = collidables[collidableIndexA].m_radius;\n" " float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" " float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" -"\n" " computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" " spherePos,sphereRadius,convexPos,convexOrn);\n" -"\n" " return;\n" " }\n" -"\n" " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" " {\n" @@ -954,7 +822,6 @@ static const char* primitiveContactsKernelsCL= \ " float sphereRadius = collidables[collidableIndexB].m_radius;\n" " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"\n" " computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" " spherePos,sphereRadius,convexPos,convexOrn);\n" @@ -974,7 +841,6 @@ static const char* primitiveContactsKernelsCL= \ " float radiusB = collidables[collidableIndexB].m_radius;\n" " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"\n" " float4 diff = posA-posB;\n" " float len = length(diff);\n" " \n" @@ -996,9 +862,9 @@ static const char* primitiveContactsKernelsCL= \ " \n" " if (dstIdx < maxContactCapacity)\n" " {\n" -" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = -normalOnSurfaceB;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = pairs[pairIndex].x;\n" " int bodyB = pairs[pairIndex].y;\n" @@ -1010,15 +876,10 @@ static const char* primitiveContactsKernelsCL= \ " GET_NPOINTS(*c) = 1;\n" " }//if (dstIdx < numPairs)\n" " }//if ( len <= (radiusA+radiusB))\n" -"\n" " return;\n" " }//SHAPE_SPHERE SHAPE_SPHERE\n" -"\n" " }// if (i -10000)\n" " {\n" " \n" " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" " float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" " float actualDepth = minDist-radius;\n" -"\n" " \n" " if (actualDepth<=0.f)\n" " {\n" " pOnB1.w = actualDepth;\n" " int dstIdx;\n" -"\n" " \n" " float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" " if (lenSqr>FLT_EPSILON)\n" @@ -1336,28 +1160,21 @@ static const char* primitiveContactsKernelsCL= \ " \n" " if (dstIdx < maxContactCapacity)\n" " {\n" -" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = normalOnSurfaceB1;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" " c->m_worldPos[0] = pOnB1;\n" -"\n" " c->m_childIndexA = -1;\n" " c->m_childIndexB = faceIndex;\n" -"\n" " GET_NPOINTS(*c) = 1;\n" " } \n" " }\n" -"\n" " }\n" " }//if (hasCollision)\n" -"\n" "}\n" -"\n" -"\n" -"\n" "// work-in-progress\n" "__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n" " __global const BodyData* rigidBodies,\n" @@ -1368,26 +1185,21 @@ static const char* primitiveContactsKernelsCL= \ " __global const btGpuFace* faces,\n" " __global const int* indices,\n" " __global btAabbCL* aabbs,\n" -" __global b3Contact4Data* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int numConcavePairs, int maxContactCapacity\n" " )\n" "{\n" -"\n" " int i = get_global_id(0);\n" " if (i>=numConcavePairs)\n" " return;\n" " int pairIdx = i;\n" -"\n" " int bodyIndexA = concavePairs[i].x;\n" " int bodyIndexB = concavePairs[i].y;\n" -"\n" " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"\n" " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"\n" " if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" " {\n" " int f = concavePairs[i].z;\n" @@ -1400,18 +1212,15 @@ static const char* primitiveContactsKernelsCL= \ " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" " verticesA[i] = vert;\n" " }\n" -"\n" " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" " float sphereRadius = collidables[collidableIndexB].m_radius;\n" " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"\n" " computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" " rigidBodies,collidables,\n" " verticesA,\n" " globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" " spherePos,sphereRadius,convexPos,convexOrn, f);\n" -"\n" " return;\n" " }\n" "}\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl index 1d48b39af..62088cd88 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -41,22 +41,7 @@ typedef unsigned int u32; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; // w: m_nPoints - - u32 m_coeffs; - u32 m_batchIdx; - int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - float m_unused1; - int m_unused2; - -} Contact4; +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" ///keep this in sync with btCollidable.h @@ -891,7 +876,7 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, __global const float4* separatingNormalsWorld, __global const int* contactCounts, __global const int* contactOffsets, - __global Contact4* restrict contactsOut, + __global struct b3Contact4Data* restrict contactsOut, counter32_t nContactsOut, int numPairs, int pairIndex @@ -922,9 +907,9 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, AppendInc( nContactsOut, dstIdx ); //if ((dstIdx+nContacts) < capacity) { - __global Contact4* c = contactsOut + dstIdx; + __global struct b3Contact4Data* c = contactsOut + dstIdx; c->m_worldNormal = normal; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = idx; int bodyA = pairs[pairIndex].x; int bodyB = pairs[pairIndex].y; @@ -970,7 +955,7 @@ __kernel void clipHullHullKernel( __global int4* pairs, __global const int* indices, __global const float4* separatingNormals, __global const int* hasSeparatingAxis, - __global Contact4* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numPairs, int contactCapacity) @@ -1037,9 +1022,9 @@ __kernel void clipHullHullKernel( __global int4* pairs, { pairs[pairIndex].z = dstIdx; - __global Contact4* c = globalContactsOut+ dstIdx; + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; c->m_worldNormal = normal; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = pairs[pairIndex].x; int bodyB = pairs[pairIndex].y; @@ -1073,7 +1058,7 @@ __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPai __global const btGpuChildShape* gpuChildShapes, __global const float4* gpuCompoundSepNormalsOut, __global const int* gpuHasCompoundSepNormalsOut, - __global Contact4* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numCompoundPairs, int maxContactCapacity) { @@ -1170,9 +1155,9 @@ __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPai AppendInc( nGlobalContactsOut, dstIdx ); if ((dstIdx+nReducedContacts) < maxContactCapacity) { - __global Contact4* c = globalContactsOut+ dstIdx; + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; c->m_worldNormal = normal; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = gpuCompoundPairs[pairIndex].x; int bodyB = gpuCompoundPairs[pairIndex].y; @@ -1200,7 +1185,7 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs, __global const btCollidableGpu* collidables, __global const float4* separatingNormals, __global const int* hasSeparatingAxis, - __global Contact4* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numPairs) { @@ -1246,9 +1231,9 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs, if (dstIdx < numPairs) { - __global Contact4* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = normalOnSurfaceB; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = pairs[pairIndex].x; int bodyB = pairs[pairIndex].y; @@ -1275,7 +1260,7 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, __global const int* indices, __global const btGpuChildShape* gpuChildShapes, __global const float4* separatingNormals, - __global Contact4* restrict globalContactsOut, + __global struct b3Contact4Data* restrict globalContactsOut, counter32_t nGlobalContactsOut, int numConcavePairs) { @@ -1479,9 +1464,9 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, AppendInc( nGlobalContactsOut, dstIdx ); //if ((dstIdx+nReducedContacts) < capacity) { - __global Contact4* c = globalContactsOut+ dstIdx; + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; c->m_worldNormal = normal; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = concavePairsIn[pairIndex].x; int bodyB = concavePairsIn[pairIndex].y; @@ -1747,7 +1732,7 @@ __kernel void clipFacesAndContactReductionKernel( __global int4* pairs, __global const BodyData* rigidBodies, __global const float4* separatingNormals, __global const int* hasSeparatingAxis, - __global Contact4* globalContactsOut, + __global struct b3Contact4Data* globalContactsOut, __global int4* clippingFacesOut, __global float4* worldVertsA1, __global float4* worldNormalsA1, @@ -1860,7 +1845,7 @@ __kernel void newContactReductionKernel( __global int4* pairs, __global const BodyData* rigidBodies, __global const float4* separatingNormals, __global const int* hasSeparatingAxis, - __global Contact4* globalContactsOut, + __global struct b3Contact4Data* globalContactsOut, __global int4* clippingFaces, __global float4* worldVertsB2, volatile __global int* nGlobalContactsOut, @@ -1901,9 +1886,9 @@ __kernel void newContactReductionKernel( __global int4* pairs, if (dstIdx < numPairs) { - __global Contact4* c = &globalContactsOut[dstIdx]; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; c->m_worldNormal = normal; - c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = pairs[pairIndex].x; int bodyB = pairs[pairIndex].y; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 2d0b341cc..547b0e374 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -1,26 +1,21 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project static const char* satClipKernelsCL= \ "#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"\n" "#define SHAPE_CONVEX_HULL 3\n" "#define SHAPE_PLANE 4\n" "#define SHAPE_CONCAVE_TRIMESH 5\n" "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" "#define SHAPE_SPHERE 7\n" -"\n" -"\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile __global int*\n" "#endif\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -34,32 +29,51 @@ static const char* satClipKernelsCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" "typedef unsigned int u32;\n" -"\n" -"\n" -"\n" -"typedef struct\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" "{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal; // w: m_nPoints\n" -"\n" -" u32 m_coeffs;\n" -" u32 m_batchIdx;\n" +" int bla;\n" +"};\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" +" typedef float4 b3Float4;\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormal; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" " int m_bodyBPtrAndSignBit;\n" -"\n" " int m_childIndexA;\n" " int m_childIndexB;\n" -" float m_unused1;\n" +" int m_unused1;\n" " int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" -"\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "///keep this in sync with btCollidable.h\n" "typedef struct\n" "{\n" @@ -69,7 +83,6 @@ static const char* satClipKernelsCL= \ " int m_shapeIndex;\n" " \n" "} btCollidableGpu;\n" -"\n" "typedef struct\n" "{\n" " float4 m_childPosition;\n" @@ -79,23 +92,18 @@ static const char* satClipKernelsCL= \ " int m_unused1;\n" " int m_unused2;\n" "} btGpuChildShape;\n" -"\n" "#define GET_NPOINTS(x) (x).m_worldNormal.w\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " float4 m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_collidableIdx; \n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} BodyData;\n" -"\n" -"\n" "typedef struct \n" "{\n" " float4 m_localCenter;\n" @@ -112,48 +120,37 @@ static const char* satClipKernelsCL= \ " int m_uniqueEdgesOffset;\n" " int m_numUniqueEdges;\n" " int m_unused;\n" -"\n" "} ConvexPolyhedronCL;\n" -"\n" "typedef struct\n" "{\n" " float4 m_plane;\n" " int m_indexOffset;\n" " int m_numIndices;\n" "} btGpuFace;\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "__inline\n" "float fastDiv(float numerator, float denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "// return numerator/denominator; \n" "}\n" -"\n" "__inline\n" "float4 fastDiv4(float4 numerator, float4 denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "}\n" -"\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "//#define dot3F4 dot\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -161,35 +158,23 @@ static const char* satClipKernelsCL= \ " float4 b1 = make_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -200,7 +185,6 @@ static const char* satClipKernelsCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -217,35 +201,27 @@ static const char* satClipKernelsCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtInvRotate(const Quaternion q, float4 vec)\n" "{\n" " return qtRotate( qtInvert( q ), vec );\n" "}\n" -"\n" "__inline\n" "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" "{\n" " return qtRotate( *orientation, *p ) + (*translation);\n" "}\n" -"\n" -"\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" " return fastNormalize4( n );\n" "}\n" -"\n" -"\n" "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" "{\n" " return make_float4( a.x + (b.x - a.x) * t,\n" @@ -253,9 +229,6 @@ static const char* satClipKernelsCL= \ " a.z + (b.z - a.z) * t,\n" " 0.f);\n" "}\n" -"\n" -"\n" -"\n" "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" "int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" "{\n" @@ -303,9 +276,6 @@ static const char* satClipKernelsCL= \ " }\n" " return numVertsOut;\n" "}\n" -"\n" -"\n" -"\n" "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" "int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" "{\n" @@ -316,18 +286,14 @@ static const char* satClipKernelsCL= \ "//double-check next test\n" "// if (numVertsIn < 2)\n" "// return 0;\n" -"\n" " float4 firstVertex=pVtxIn[numVertsIn-1];\n" " float4 endVertex = pVtxIn[0];\n" " \n" " ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -"\n" " for (ve = 0; ve < numVertsIn; ve++)\n" " {\n" " endVertex=pVtxIn[ve];\n" -"\n" " de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -"\n" " if (ds<0)\n" " {\n" " if (de<0)\n" @@ -355,8 +321,6 @@ static const char* satClipKernelsCL= \ " }\n" " return numVertsOut;\n" "}\n" -"\n" -"\n" "int clipFaceAgainstHull(const float4 separatingNormal, __global const ConvexPolyhedronCL* hullA, \n" " const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" " float4* worldVertsB2, int capacityWorldVertsB2,\n" @@ -368,13 +332,11 @@ static const char* satClipKernelsCL= \ " int contactCapacity)\n" "{\n" " int numContactsOut = 0;\n" -"\n" " float4* pVtxIn = worldVertsB1;\n" " float4* pVtxOut = worldVertsB2;\n" " \n" " int numVertsIn = numWorldVertsB1;\n" " int numVertsOut = 0;\n" -"\n" " int closestFaceA=-1;\n" " {\n" " float dmin = FLT_MAX;\n" @@ -396,9 +358,7 @@ static const char* satClipKernelsCL= \ " }\n" " if (closestFaceA<0)\n" " return numContactsOut;\n" -"\n" " btGpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n" -"\n" " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" " int numVerticesA = polyA.m_numIndices;\n" " for(int e0=0;e0m_faceOffset+closestFaceA];\n" -"\n" " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" " int numVerticesA = polyA.m_numIndices;\n" " for(int e0=0;e0m_numFaces;face++)\n" " {\n" @@ -592,7 +531,6 @@ static const char* satClipKernelsCL= \ " }\n" " }\n" " }\n" -"\n" " {\n" " const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n" " const int numVertices = polyB.m_numIndices;\n" @@ -602,7 +540,6 @@ static const char* satClipKernelsCL= \ " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" " }\n" " }\n" -"\n" " if (closestFaceB>=0)\n" " {\n" " numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" @@ -611,11 +548,8 @@ static const char* satClipKernelsCL= \ " faces,\n" " indices,localContactsOut,localContactCapacity);\n" " }\n" -"\n" " return numContactsOut;\n" "}\n" -"\n" -"\n" "int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" " const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" @@ -632,11 +566,8 @@ static const char* satClipKernelsCL= \ "{\n" " int numContactsOut = 0;\n" " int numWorldVertsB1= 0;\n" -"\n" -"\n" " int closestFaceB=-1;\n" " float dmax = -FLT_MAX;\n" -"\n" " {\n" " for(int face=0;facem_numFaces;face++)\n" " {\n" @@ -651,7 +582,6 @@ static const char* satClipKernelsCL= \ " }\n" " }\n" " }\n" -"\n" " {\n" " const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" " const int numVertices = polyB.m_numIndices;\n" @@ -661,7 +591,6 @@ static const char* satClipKernelsCL= \ " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" " }\n" " }\n" -"\n" " if (closestFaceB>=0)\n" " {\n" " numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" @@ -671,15 +600,12 @@ static const char* satClipKernelsCL= \ " verticesB,facesB,indicesB,\n" " localContactsOut,localContactCapacity);\n" " }\n" -"\n" " return numContactsOut;\n" "}\n" -"\n" "#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; }\n" "#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset a[ie].x )? a[0].x: a[ie].x;\n" @@ -856,14 +766,12 @@ static const char* satClipKernelsCL= \ " a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" " a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" " }\n" -"\n" " idx[0] = (int)a[0].x & 0xff;\n" " idx[1] = (int)a[0].y & 0xff;\n" " idx[2] = (int)a[0].z & 0xff;\n" " idx[3] = (int)a[0].w & 0xff;\n" " }\n" " }\n" -"\n" " {\n" " float2 h[64];\n" " PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" @@ -871,26 +779,20 @@ static const char* satClipKernelsCL= \ " max00 = h[0];\n" " }\n" " }\n" -"\n" " contactIdx[0] = idx[0];\n" " contactIdx[1] = idx[1];\n" " contactIdx[2] = idx[2];\n" " contactIdx[3] = idx[3];\n" -"\n" -"\n" " return 4;\n" " }\n" "}\n" -"\n" -"\n" -"\n" "__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n" " __global const BodyData* rigidBodies, \n" " __global const float4* closestPointsWorld,\n" " __global const float4* separatingNormalsWorld,\n" " __global const int* contactCounts,\n" " __global const int* contactOffsets,\n" -" __global Contact4* restrict contactsOut,\n" +" __global struct b3Contact4Data* restrict contactsOut,\n" " counter32_t nContactsOut,\n" " int numPairs,\n" " int pairIndex\n" @@ -908,22 +810,19 @@ static const char* satClipKernelsCL= \ " {\n" " localPoints[i] = pointsIn[i];\n" " }\n" -"\n" " int contactIdx[4];// = {-1,-1,-1,-1};\n" " contactIdx[0] = -1;\n" " contactIdx[1] = -1;\n" " contactIdx[2] = -1;\n" " contactIdx[3] = -1;\n" -"\n" " int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" -"\n" " int dstIdx;\n" " AppendInc( nContactsOut, dstIdx );\n" " //if ((dstIdx+nContacts) < capacity)\n" " {\n" -" __global Contact4* c = contactsOut + dstIdx;\n" +" __global struct b3Contact4Data* c = contactsOut + dstIdx;\n" " c->m_worldNormal = normal;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = idx;\n" " int bodyA = pairs[pairIndex].x;\n" " int bodyB = pairs[pairIndex].y;\n" @@ -939,15 +838,12 @@ static const char* satClipKernelsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" "void trInverse(float4 translationIn, Quaternion orientationIn,\n" " float4* translationOut, Quaternion* orientationOut)\n" "{\n" " *orientationOut = qtInvert(orientationIn);\n" " *translationOut = qtRotate(*orientationOut, -translationIn);\n" "}\n" -"\n" "void trMul(float4 translationA, Quaternion orientationA,\n" " float4 translationB, Quaternion orientationB,\n" " float4* translationOut, Quaternion* orientationOut)\n" @@ -955,10 +851,6 @@ static const char* satClipKernelsCL= \ " *orientationOut = qtMul(orientationA,orientationB);\n" " *translationOut = transform(&translationB,&translationA,&orientationA);\n" "}\n" -"\n" -"\n" -"\n" -"\n" "__kernel void clipHullHullKernel( __global int4* pairs, \n" " __global const BodyData* rigidBodies, \n" " __global const btCollidableGpu* collidables,\n" @@ -969,43 +861,35 @@ static const char* satClipKernelsCL= \ " __global const int* indices,\n" " __global const float4* separatingNormals,\n" " __global const int* hasSeparatingAxis,\n" -" __global Contact4* restrict globalContactsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" " counter32_t nGlobalContactsOut,\n" " int numPairs,\n" " int contactCapacity)\n" "{\n" -"\n" " int i = get_global_id(0);\n" " int pairIndex = i;\n" " \n" " float4 worldVertsB1[64];\n" " float4 worldVertsB2[64];\n" " int capacityWorldVerts = 64; \n" -"\n" " float4 localContactsOut[64];\n" " int localContactCapacity=64;\n" " \n" " float minDist = -1e30f;\n" " float maxDist = 0.02f;\n" -"\n" " if (im_worldNormal = normal;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = pairs[pairIndex].x;\n" " int bodyB = pairs[pairIndex].y;\n" @@ -1046,7 +928,6 @@ static const char* satClipKernelsCL= \ " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" " c->m_childIndexA = -1;\n" " c->m_childIndexB = -1;\n" -"\n" " for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]];\n" @@ -1057,10 +938,7 @@ static const char* satClipKernelsCL= \ " }// if (numContactsOut>0)\n" " }// if (hasSeparatingAxis[i])\n" " }// if (im_worldNormal = normal;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = gpuCompoundPairs[pairIndex].x;\n" " int bodyB = gpuCompoundPairs[pairIndex].y;\n" @@ -1189,21 +1061,16 @@ static const char* satClipKernelsCL= \ " }// if (numContactsOut>0)\n" " }// if (gpuHasCompoundSepNormalsOut[i])\n" " }// if (im_worldNormal = normalOnSurfaceB;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = pairs[pairIndex].x;\n" " int bodyB = pairs[pairIndex].y;\n" @@ -1256,14 +1121,12 @@ static const char* satClipKernelsCL= \ " c->m_worldPos[0] = contactPosB;\n" " c->m_childIndexA = -1;\n" " c->m_childIndexB = -1;\n" -"\n" " GET_NPOINTS(*c) = 1;\n" " }//if (dstIdx < numPairs)\n" " }//if ( len <= (radiusA+radiusB))\n" " }//SHAPE_SPHERE SHAPE_SPHERE\n" " }//if (im_worldNormal = normal;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = concavePairsIn[pairIndex].x;\n" " int bodyB = concavePairsIn[pairIndex].y;\n" @@ -1498,12 +1342,6 @@ static const char* satClipKernelsCL= \ " }// if (numContactsOut>0)\n" " }// if (i0)\n" " {\n" -"\n" " __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" " float4 normal = -separatingNormals[i];\n" " \n" @@ -1899,21 +1713,17 @@ static const char* satClipKernelsCL= \ " \n" " if (dstIdx < numPairs)\n" " {\n" -"\n" -" __global Contact4* c = &globalContactsOut[dstIdx];\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " c->m_worldNormal = normal;\n" -" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" " c->m_batchIdx = pairIndex;\n" " int bodyA = pairs[pairIndex].x;\n" " int bodyB = pairs[pairIndex].y;\n" -"\n" " pairs[pairIndex].w = dstIdx;\n" -"\n" " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" " c->m_childIndexA =-1;\n" " c->m_childIndexB =-1;\n" -"\n" " switch (nReducedContacts)\n" " {\n" " case 4:\n" @@ -1939,10 +1749,7 @@ static const char* satClipKernelsCL= \ " }// if (numContactsOut>0)\n" " }// if (hasSeparatingAxis[i])\n" " }// if (im_numVertices;\n" -"\n" " const float4 localDir = qtInvRotate(orn,*dir);\n" " float offset = dot(pos,*dir);\n" " for(int i=0;im_numVertices;\n" -"\n" " const float4 localDir = qtInvRotate(orn,*dir);\n" " float offset = dot(pos,*dir);\n" " for(int i=0;i1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" " return false;\n" " return true;\n" "}\n" -"\n" -"\n" -"\n" "bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" " const float4 posA1,\n" " const float4 ornA,\n" @@ -298,7 +243,6 @@ static const char* satKernelsCL= \ " const float4* uniqueEdgesA, \n" " const btGpuFace* facesA,\n" " const int* indicesA,\n" -"\n" " __global const float4* verticesB, \n" " __global const float4* uniqueEdgesB, \n" " __global const btGpuFace* facesB,\n" @@ -307,7 +251,6 @@ static const char* satKernelsCL= \ " float* dmin)\n" "{\n" " int i = get_global_id(0);\n" -"\n" " float4 posA = posA1;\n" " posA.w = 0.f;\n" " float4 posB = posB1;\n" @@ -339,7 +282,6 @@ static const char* satKernelsCL= \ " }\n" " return true;\n" "}\n" -"\n" "bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" " const float4 posA1,\n" " const float4 ornA,\n" @@ -358,7 +300,6 @@ static const char* satKernelsCL= \ " float* dmin)\n" "{\n" " int i = get_global_id(0);\n" -"\n" " float4 posA = posA1;\n" " posA.w = 0.f;\n" " float4 posB = posB1;\n" @@ -390,9 +331,6 @@ static const char* satKernelsCL= \ " }\n" " return true;\n" "}\n" -"\n" -"\n" -"\n" "bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" " const float4 posA1,\n" " const float4 ornA,\n" @@ -411,36 +349,28 @@ static const char* satKernelsCL= \ " float* dmin)\n" "{\n" " int i = get_global_id(0);\n" -"\n" " float4 posA = posA1;\n" " posA.w = 0.f;\n" " float4 posB = posB1;\n" " posB.w = 0.f;\n" -"\n" " int curPlaneTests=0;\n" -"\n" " int curEdgeEdge = 0;\n" " // Test edges\n" " for(int e0=0;e0m_numUniqueEdges;e0++)\n" " {\n" " const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" " float4 edge0World = qtRotate(ornA,edge0);\n" -"\n" " for(int e1=0;e1m_numUniqueEdges;e1++)\n" " {\n" " const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" " float4 edge1World = qtRotate(ornB,edge1);\n" -"\n" -"\n" " float4 crossje = cross3(edge0World,edge1World);\n" -"\n" " curEdgeEdge++;\n" " if(!IsAlmostZero(crossje))\n" " {\n" " crossje = normalize3(crossje);\n" " if (dot3F4(DeltaC2,crossje)<0)\n" " crossje *= -1.f;\n" -"\n" " float dist;\n" " bool result = true;\n" " {\n" @@ -456,10 +386,8 @@ static const char* satKernelsCL= \ " float d1 = Max1 - Min0;\n" " dist = d00.0f)\n" " {\n" @@ -477,8 +403,6 @@ static const char* satKernelsCL= \ " }\n" " return true;\n" "}\n" -"\n" -"\n" "inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" " const float4 posA,const float4 ornA,\n" " const float4 posB,const float4 ornB,\n" @@ -488,17 +412,13 @@ static const char* satKernelsCL= \ " float Min1,Max1;\n" " project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" " project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" -"\n" " if(Max0m_numFaces;\n" " // Test normals from hullA\n" @@ -545,8 +463,6 @@ static const char* satKernelsCL= \ " }\n" " }\n" " }\n" -"\n" -"\n" " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" " {\n" " *sep = -(*sep);\n" @@ -554,10 +470,6 @@ static const char* satKernelsCL= \ " \n" " return true;\n" "}\n" -"\n" -"\n" -"\n" -"\n" "bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" " const float4 posA1,\n" " const float4 ornA,\n" @@ -572,29 +484,22 @@ static const char* satKernelsCL= \ " float* dmin)\n" "{\n" " int i = get_global_id(0);\n" -"\n" " float4 posA = posA1;\n" " posA.w = 0.f;\n" " float4 posB = posB1;\n" " posB.w = 0.f;\n" -"\n" " int curPlaneTests=0;\n" -"\n" " int curEdgeEdge = 0;\n" " // Test edges\n" " for(int e0=0;e0m_numUniqueEdges;e0++)\n" " {\n" " const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" " float4 edge0World = qtRotate(ornA,edge0);\n" -"\n" " for(int e1=0;e1m_numUniqueEdges;e1++)\n" " {\n" " const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" " float4 edge1World = qtRotate(ornB,edge1);\n" -"\n" -"\n" " float4 crossje = cross3(edge0World,edge1World);\n" -"\n" " curEdgeEdge++;\n" " if(!IsAlmostZero(crossje))\n" " {\n" @@ -617,10 +522,8 @@ static const char* satKernelsCL= \ " float d1 = Max1 - Min0;\n" " dist = d00.0f)\n" " {\n" @@ -638,8 +539,6 @@ static const char* satKernelsCL= \ " }\n" " return true;\n" "}\n" -"\n" -"\n" "// work-in-progress\n" "__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" " __global const BodyData* rigidBodies, \n" @@ -656,13 +555,11 @@ static const char* satKernelsCL= \ " int numCompoundPairs\n" " )\n" "{\n" -"\n" " int i = get_global_id(0);\n" " if (i=numConcavePairs)\n" " return;\n" " int pairIdx = i;\n" -"\n" " int bodyIndexA = concavePairs[i].x;\n" " int bodyIndexB = concavePairs[i].y;\n" -"\n" " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"\n" " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"\n" " if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" " collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" " {\n" " concavePairs[pairIdx].w = -1;\n" " return;\n" " }\n" -"\n" -"\n" -"\n" " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" " int numActualConcaveConvexTests = 0;\n" " \n" @@ -1100,12 +951,10 @@ static const char* satKernelsCL= \ " bool overlap = false;\n" " \n" " ConvexPolyhedronCL convexPolyhedronA;\n" -"\n" " //add 3 vertices of the triangle\n" " convexPolyhedronA.m_numVertices = 3;\n" " convexPolyhedronA.m_vertexOffset = 0;\n" " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"\n" " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" " float4 triMinAabb, triMaxAabb;\n" " btAabbCL triAabb;\n" @@ -1122,9 +971,7 @@ static const char* satKernelsCL= \ " \n" " triAabb.m_min = min(triAabb.m_min,vert); \n" " triAabb.m_max = max(triAabb.m_max,vert); \n" -"\n" " }\n" -"\n" " overlap = true;\n" " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" @@ -1135,10 +982,8 @@ static const char* satKernelsCL= \ " float dmin = FLT_MAX;\n" " int hasSeparatingAxis=5;\n" " float4 sepAxis=make_float4(1,2,3,4);\n" -"\n" " int localCC=0;\n" " numActualConcaveConvexTests++;\n" -"\n" " //a triangle has 3 unique edges\n" " convexPolyhedronA.m_numUniqueEdges = 3;\n" " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" @@ -1147,8 +992,6 @@ static const char* satKernelsCL= \ " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"\n" -"\n" " convexPolyhedronA.m_faceOffset = 0;\n" " \n" " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" @@ -1157,7 +1000,6 @@ static const char* satKernelsCL= \ " int indicesA[3+3+2+2+2];\n" " int curUsedIndices=0;\n" " int fidx=0;\n" -"\n" " //front size of triangle\n" " {\n" " facesA[fidx].m_indexOffset=curUsedIndices;\n" @@ -1189,7 +1031,6 @@ static const char* satKernelsCL= \ " facesA[fidx].m_numIndices=3;\n" " }\n" " fidx++;\n" -"\n" " bool addEdgePlanes = true;\n" " if (addEdgePlanes)\n" " {\n" @@ -1202,7 +1043,6 @@ static const char* satKernelsCL= \ " \n" " float4 edgeNormal = normalize(cross(normal,v1-v0));\n" " float c = -dot(edgeNormal,v0);\n" -"\n" " facesA[fidx].m_numIndices = 2;\n" " facesA[fidx].m_indexOffset=curUsedIndices;\n" " indicesA[curUsedIndices++]=i;\n" @@ -1218,22 +1058,15 @@ static const char* satKernelsCL= \ " }\n" " convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" " convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"\n" -"\n" " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" " posA.w = 0.f;\n" " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" " posB.w = 0.f;\n" -"\n" " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"\n" " \n" -"\n" -"\n" " ///////////////////\n" " ///compound shape support\n" -"\n" " if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" " {\n" " int compoundChild = concavePairs[pairIdx].w;\n" @@ -1248,14 +1081,11 @@ static const char* satKernelsCL= \ " shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" " }\n" " //////////////////\n" -"\n" " float4 c0local = convexPolyhedronA.m_localCenter;\n" " float4 c0 = transform(&c0local, &posA, &ornA);\n" " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" " float4 c1 = transform(&c1local,&posB,&ornB);\n" " const float4 DeltaC2 = c0 - c1;\n" -"\n" -"\n" " bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" " posA,ornA,\n" " posB,ornB,\n" @@ -1276,7 +1106,6 @@ static const char* satKernelsCL= \ " vertices,uniqueEdges,faces,indices,\n" " verticesA,uniqueEdgesA,facesA,indicesA,\n" " &sepAxis,&dmin);\n" -"\n" " if (!sepB)\n" " {\n" " hasSeparatingAxis = 0;\n" diff --git a/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h index f85b19074..9c9e84713 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h @@ -2,59 +2,45 @@ static const char* boundSearchKernelsCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" "typedef unsigned int u32;\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" "#define GET_GROUP_SIZE get_local_size(0)\n" "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"\n" "typedef struct\n" "{\n" " u32 m_key; \n" " u32 m_value;\n" "}SortData;\n" -"\n" -"\n" -"\n" "typedef struct\n" "{\n" " u32 m_nSrc;\n" " u32 m_nDst;\n" " u32 m_padding[2];\n" "} ConstBuffer;\n" -"\n" -"\n" -"\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "__kernel\n" "void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n" " unsigned int nSrc, unsigned int nDst)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nSrc )\n" " {\n" " SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n" " SortData end; end.m_key = nDst; end.m_value = nDst;\n" -"\n" " SortData iData = (gIdx==0)? first: src[gIdx-1];\n" " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" -"\n" " if( iData.m_key != jData.m_key )\n" " {\n" "// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n" @@ -65,23 +51,18 @@ static const char* boundSearchKernelsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "__kernel\n" "void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n" " unsigned int nSrc, unsigned int nDst)\n" "{\n" " int gIdx = GET_GLOBAL_IDX+1;\n" -"\n" " if( gIdx < nSrc+1 )\n" " {\n" " SortData first; first.m_key = 0; first.m_value = 0;\n" " SortData end; end.m_key = nDst; end.m_value = nDst;\n" -"\n" " SortData iData = src[gIdx-1];\n" " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" -"\n" " if( iData.m_key != jData.m_key )\n" " {\n" " u32 k = iData.m_key;\n" @@ -91,7 +72,6 @@ static const char* boundSearchKernelsCL= \ " }\n" " }\n" "}\n" -"\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "__kernel\n" "void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n" @@ -99,11 +79,9 @@ static const char* boundSearchKernelsCL= \ "{\n" " int gIdx = GET_GLOBAL_IDX;\n" " \n" -"\n" " if( gIdx < nDst )\n" " {\n" " C[gIdx] = A[gIdx] - B[gIdx];\n" " }\n" "}\n" -"\n" ; diff --git a/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h index 25ed76d9a..4f8b96e48 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h @@ -2,23 +2,18 @@ static const char* fillKernelsCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"\n" "typedef unsigned int u32;\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" @@ -28,11 +23,9 @@ static const char* fillKernelsCL= \ "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" "#define AtomInc(x) atom_inc(&(x))\n" "#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"\n" "#define make_uint4 (uint4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" "typedef struct\n" "{\n" " union\n" @@ -45,66 +38,54 @@ static const char* fillKernelsCL= \ " int m_n;\n" " int m_padding[2];\n" "} ConstBuffer;\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < num_elements )\n" " {\n" " dstInt[ offset+gIdx ] = value;\n" " }\n" "}\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < num_elements )\n" " {\n" " dstFloat[ offset+gIdx ] = value;\n" " }\n" "}\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < num )\n" " {\n" " dstInt[ offset+gIdx ] = value;\n" " }\n" "}\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < num )\n" " {\n" " dstInt2[ gIdx + offset] = make_int2( value.x, value.y );\n" " }\n" "}\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(64,1,1)))\n" "void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < num )\n" " {\n" " dstInt4[ offset+gIdx ] = value;\n" " }\n" "}\n" -"\n" ; diff --git a/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h index 2c6190973..27baab833 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h @@ -2,33 +2,27 @@ static const char* prefixScanKernelsCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" "typedef unsigned int u32;\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" "#define GET_GROUP_SIZE get_local_size(0)\n" "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"\n" "// takahiro end\n" "#define WG_SIZE 128 \n" "#define m_numElems x\n" "#define m_numBlocks y\n" "#define m_numScanBlocks z\n" -"\n" "/*typedef struct\n" "{\n" " uint m_numElems;\n" @@ -37,7 +31,6 @@ static const char* prefixScanKernelsCL= \ " uint m_padding[1];\n" "} ConstBuffer;\n" "*/\n" -"\n" "u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize)\n" "{\n" " u32 blocksum;\n" @@ -52,17 +45,13 @@ static const char* prefixScanKernelsCL= \ " data[bi] += data[ai];\n" " }\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " if( lIdx == 0 )\n" " {\n" " blocksum = data[ n-1 ];\n" " data[ n-1 ] = 0;\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " offset >>= 1;\n" " for(int nActive=1; nActive>=1 )\n" " {\n" @@ -77,27 +66,20 @@ static const char* prefixScanKernelsCL= \ " }\n" " }\n" " GROUP_LDS_BARRIER;\n" -"\n" " return blocksum;\n" "}\n" -"\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "__kernel\n" "void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer,\n" " uint4 cb)\n" "{\n" " __local u32 ldsData[WG_SIZE*2];\n" -"\n" " int gIdx = GET_GLOBAL_IDX;\n" " int lIdx = GET_LOCAL_IDX;\n" -"\n" " ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" " ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" -"\n" " u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -"\n" " if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum;\n" -"\n" " if( (2*gIdx) < cb.m_numElems )\n" " {\n" " dst[2*gIdx] = ldsData[2*lIdx];\n" @@ -107,25 +89,20 @@ static const char* prefixScanKernelsCL= \ " dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" " }\n" "}\n" -"\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "__kernel\n" "void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb)\n" "{\n" " const u32 blockSize = WG_SIZE*2;\n" -"\n" " int myIdx = GET_GROUP_IDX+1;\n" " int lIdx = GET_LOCAL_IDX;\n" -"\n" " u32 iBlockSum = blockSum[myIdx];\n" -"\n" " int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" " for(int i=myIdx*blockSize+lIdx; i>= 1;\n" " for(int nActive=1; nActive>=1 )\n" " {\n" @@ -77,27 +66,20 @@ static const char* prefixScanKernelsFloat4CL= \ " }\n" " }\n" " GROUP_LDS_BARRIER;\n" -"\n" " return blocksum;\n" "}\n" -"\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "__kernel\n" "void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb)\n" "{\n" " __local float4 ldsData[WG_SIZE*2];\n" -"\n" " int gIdx = GET_GLOBAL_IDX;\n" " int lIdx = GET_LOCAL_IDX;\n" -"\n" " ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" " ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" -"\n" " float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -"\n" " if( lIdx == 0 ) \n" " sumBuffer[GET_GROUP_IDX] = sum;\n" -"\n" " if( (2*gIdx) < cb.m_numElems )\n" " {\n" " dst[2*gIdx] = ldsData[2*lIdx];\n" @@ -107,25 +89,20 @@ static const char* prefixScanKernelsFloat4CL= \ " dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" " }\n" "}\n" -"\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "__kernel\n" "void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb)\n" "{\n" " const u32 blockSize = WG_SIZE*2;\n" -"\n" " int myIdx = GET_GROUP_IDX+1;\n" " int lIdx = GET_LOCAL_IDX;\n" -"\n" " float4 iBlockSum = blockSum[myIdx];\n" -"\n" " int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" " for(int i=myIdx*blockSize+lIdx; i>(keyIdx*8)) & 0xff;}\n" -"\n" "u32 bit8Scan(u32 v)\n" "{\n" " return (v<<8) + (v<<16) + (v<<24);\n" "}\n" -"\n" "//===\n" -"\n" -"\n" -"\n" -"\n" "#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb )\n" "{\n" " __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" -"\n" " u32 gIdx = GET_GLOBAL_IDX;\n" " u32 lIdx = GET_LOCAL_IDX;\n" " u32 wgIdx = GET_GROUP_IDX;\n" @@ -219,21 +178,15 @@ static const char* radixSort32KernelsCL= \ " const int n = cb.m_n;\n" " const int nWGs = cb.m_nWGs;\n" " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -"\n" " for(int i=0; i>(startBit+ibit)) & 0x3, \n" " (sortData[2]>>(startBit+ibit)) & 0x3, \n" " (sortData[3]>>(startBit+ibit)) & 0x3);\n" -"\n" " u32 key4;\n" " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" " {\n" @@ -470,22 +395,17 @@ static const char* radixSort32KernelsCL= \ " sKeyPacked[1] |= 1<<(8*b.y);\n" " sKeyPacked[2] |= 1<<(8*b.z);\n" " sKeyPacked[3] |= 1<<(8*b.w);\n" -"\n" " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" " }\n" -"\n" " u32 rankPacked;\n" " u32 sumPacked;\n" " {\n" " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " u32 newOffset[4] = { 0,0,0,0 };\n" " {\n" " u32 sumScanned = bit8Scan( sumPacked );\n" -"\n" " u32 scannedKeys[4];\n" " scannedKeys[0] = 1<<(8*b.x);\n" " scannedKeys[1] = 1<<(8*b.y);\n" @@ -500,7 +420,6 @@ static const char* radixSort32KernelsCL= \ " sum4 += tmp;\n" " }\n" " }\n" -"\n" " {\n" " u32 sumPlusRank = sumScanned + rankPacked;\n" " { u32 ie = b.x;\n" @@ -521,31 +440,23 @@ static const char* radixSort32KernelsCL= \ " }\n" " }\n" " }\n" -"\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " {\n" " ldsSortData[newOffset[0]] = sortData[0];\n" " ldsSortData[newOffset[1]] = sortData[1];\n" " ldsSortData[newOffset[2]] = sortData[2];\n" " ldsSortData[newOffset[3]] = sortData[3];\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " u32 dstAddr = 4*lIdx;\n" " sortData[0] = ldsSortData[dstAddr+0];\n" " sortData[1] = ldsSortData[dstAddr+1];\n" " sortData[2] = ldsSortData[dstAddr+2];\n" " sortData[3] = ldsSortData[dstAddr+3];\n" -"\n" " GROUP_LDS_BARRIER;\n" " }\n" " }\n" "}\n" -"\n" "#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" @@ -553,34 +464,25 @@ static const char* radixSort32KernelsCL= \ " __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" " __local u32 localHistogramToCarry[NUM_BUCKET];\n" " __local u32 localHistogram[NUM_BUCKET*2];\n" -"\n" " u32 gIdx = GET_GLOBAL_IDX;\n" " u32 lIdx = GET_LOCAL_IDX;\n" " u32 wgIdx = GET_GROUP_IDX;\n" " u32 wgSize = GET_GROUP_SIZE;\n" -"\n" " const int n = cb.m_n;\n" " const int nWGs = cb.m_nWGs;\n" " const int startBit = cb.m_startBit;\n" " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -"\n" " if( lIdx < (NUM_BUCKET) )\n" " {\n" " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -"\n" " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" -"\n" " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -"\n" " for(int iblock=0; iblock>startBit) & 0xf;\n" -"\n" " { // create histogram\n" " u32 setIdx = lIdx/16;\n" " if( lIdx < NUM_BUCKET )\n" @@ -603,12 +502,10 @@ static const char* radixSort32KernelsCL= \ " }\n" " ldsSortData[lIdx] = 0;\n" " GROUP_LDS_BARRIER;\n" -"\n" " for(int i=0; i>(startBit+ibit)) & 0x3, \n" " (sortData[2]>>(startBit+ibit)) & 0x3, \n" " (sortData[3]>>(startBit+ibit)) & 0x3);\n" -"\n" " u32 key4;\n" " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" " {\n" @@ -707,22 +597,17 @@ static const char* radixSort32KernelsCL= \ " sKeyPacked[1] |= 1<<(8*b.y);\n" " sKeyPacked[2] |= 1<<(8*b.z);\n" " sKeyPacked[3] |= 1<<(8*b.w);\n" -"\n" " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" " }\n" -"\n" " u32 rankPacked;\n" " u32 sumPacked;\n" " {\n" " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " u32 newOffset[4] = { 0,0,0,0 };\n" " {\n" " u32 sumScanned = bit8Scan( sumPacked );\n" -"\n" " u32 scannedKeys[4];\n" " scannedKeys[0] = 1<<(8*b.x);\n" " scannedKeys[1] = 1<<(8*b.y);\n" @@ -737,7 +622,6 @@ static const char* radixSort32KernelsCL= \ " sum4 += tmp;\n" " }\n" " }\n" -"\n" " {\n" " u32 sumPlusRank = sumScanned + rankPacked;\n" " { u32 ie = b.x;\n" @@ -758,42 +642,30 @@ static const char* radixSort32KernelsCL= \ " }\n" " }\n" " }\n" -"\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " {\n" " ldsSortData[newOffset[0]] = sortData[0];\n" " ldsSortData[newOffset[1]] = sortData[1];\n" " ldsSortData[newOffset[2]] = sortData[2];\n" " ldsSortData[newOffset[3]] = sortData[3];\n" -"\n" " ldsSortVal[newOffset[0]] = sortVal[0];\n" " ldsSortVal[newOffset[1]] = sortVal[1];\n" " ldsSortVal[newOffset[2]] = sortVal[2];\n" " ldsSortVal[newOffset[3]] = sortVal[3];\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " u32 dstAddr = 4*lIdx;\n" " sortData[0] = ldsSortData[dstAddr+0];\n" " sortData[1] = ldsSortData[dstAddr+1];\n" " sortData[2] = ldsSortData[dstAddr+2];\n" " sortData[3] = ldsSortData[dstAddr+3];\n" -"\n" " sortVal[0] = ldsSortVal[dstAddr+0];\n" " sortVal[1] = ldsSortVal[dstAddr+1];\n" " sortVal[2] = ldsSortVal[dstAddr+2];\n" " sortVal[3] = ldsSortVal[dstAddr+3];\n" -"\n" " GROUP_LDS_BARRIER;\n" " }\n" " }\n" "}\n" -"\n" -"\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" @@ -802,39 +674,28 @@ static const char* radixSort32KernelsCL= \ " __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" " __local u32 localHistogramToCarry[NUM_BUCKET];\n" " __local u32 localHistogram[NUM_BUCKET*2];\n" -"\n" " u32 gIdx = GET_GLOBAL_IDX;\n" " u32 lIdx = GET_LOCAL_IDX;\n" " u32 wgIdx = GET_GROUP_IDX;\n" " u32 wgSize = GET_GROUP_SIZE;\n" -"\n" " const int n = cb.m_n;\n" " const int nWGs = cb.m_nWGs;\n" " const int startBit = cb.m_startBit;\n" " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -"\n" " if( lIdx < (NUM_BUCKET) )\n" " {\n" " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" " \n" -"\n" " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -"\n" " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" -"\n" " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -"\n" " for(int iblock=0; iblock>startBit) & 0xf;\n" -"\n" " { // create histogram\n" " u32 setIdx = lIdx/16;\n" " if( lIdx < NUM_BUCKET )\n" @@ -862,12 +720,10 @@ static const char* radixSort32KernelsCL= \ " }\n" " ldsSortData[lIdx] = 0;\n" " GROUP_LDS_BARRIER;\n" -"\n" " for(int i=0; i0)\n" @@ -996,11 +839,9 @@ static const char* radixSort32KernelsCL= \ " \n" " for (int c=0;c0)\n" @@ -1045,11 +883,9 @@ static const char* radixSort32KernelsCL= \ " \n" " for (int c=0;c 0.0f)\n" " {\n" " float t = (-B - sqrt(D))/A;\n" -"\n" " if ( (t >= 0.0f) && (t < (*hitFraction)) )\n" " {\n" " *hitFraction = t;\n" @@ -243,7 +196,6 @@ static const char* rayCastKernelCL= \ " }\n" " return false;\n" "}\n" -"\n" "float4 setInterpolate3(float4 from, float4 to, float t)\n" "{\n" " float s = 1.0f - t;\n" @@ -252,7 +204,6 @@ static const char* rayCastKernelCL= \ " result.w = 0.f; \n" " return result; \n" "}\n" -"\n" "__kernel void rayCastKernel( \n" " int numRays, \n" " const __global b3RayInfo* rays, \n" @@ -263,23 +214,18 @@ static const char* rayCastKernelCL= \ " __global const b3GpuFace* faces,\n" " __global const ConvexPolyhedronCL* convexShapes )\n" "{\n" -"\n" " int i = get_global_id(0);\n" " if (i>=numRays)\n" " return;\n" -"\n" " hitResults[i].m_hitFraction = 1.f;\n" -"\n" " float4 rayFrom = rays[i].m_from;\n" " float4 rayTo = rays[i].m_to;\n" " float hitFraction = 1.f;\n" " float4 hitPoint;\n" " float4 hitNormal;\n" " int hitBodyIndex= -1;\n" -"\n" " int cachedCollidableIndex = -1;\n" " Collidable cachedCollidable;\n" -"\n" " for (int b=0;b=0)\n" " {\n" " hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" @@ -336,6 +280,5 @@ static const char* rayCastKernelCL= \ " hitResults[i].m_hitNormal = normalize(hitNormal);\n" " hitResults[i].m_hitResult0 = hitBodyIndex;\n" " }\n" -"\n" "}\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 8b8aa751e..66514fb9e 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -148,8 +148,8 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); b3Assert(solveFrictionProg); - //cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH,true); + cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); + b3Assert(solverSetup2Prog); @@ -886,7 +886,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (b3GpuBatchContacts) { B3_PROFILE("gpu batchContacts"); - maxNumBatches = 50;//250; + maxNumBatches = 250;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); } else { diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 24d6befc4..be662b192 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -33,7 +33,7 @@ subject to the following restrictions: #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" -bool useDbvt = false; +bool useDbvt = false;//true; bool useBullet2CpuSolver = true; bool dumpContactStats = false; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl index ef091021a..1c8c48a58 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl @@ -13,6 +13,7 @@ subject to the following restrictions: */ //Originally written by Takahiro Harada +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" #pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable @@ -64,22 +65,7 @@ typedef unsigned char u8; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - int m_bodyA;//sign bit set for fixed objects - int m_bodyB; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -}Contact4; typedef struct { @@ -133,7 +119,7 @@ u32 tryWrite(__local u32* buff, int idx) } // batching on the GPU -__kernel void CreateBatches( __global const Contact4* gConstraints, __global Contact4* gConstraintsOut, +__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut, __global const u32* gN, __global const u32* gStart, int m_staticIdx ) { @@ -186,8 +172,8 @@ __kernel void CreateBatches( __global const Contact4* gConstraints, __global Con int dstIdx; AtomInc1( ldsRingEnd, dstIdx ); - int a = gConstraints[m_start+srcIdx].m_bodyA; - int b = gConstraints[m_start+srcIdx].m_bodyB; + int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit; + int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit; ldsRingElem[dstIdx].m_a = (a>b)? b:a; ldsRingElem[dstIdx].m_b = (a>b)? a:b; ldsRingElem[dstIdx].m_idx = srcIdx; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index 7bcbc4f5e..b26580bd4 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -2,37 +2,71 @@ static const char* batchingKernelsCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" +" typedef float4 b3Float4;\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormal; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile __global int*\n" "#endif\n" -"\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -46,43 +80,16 @@ static const char* batchingKernelsCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" -"\n" -"\n" -"typedef struct \n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyA;//sign bit set for fixed objects\n" -" int m_bodyB;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"}Contact4;\n" -"\n" "typedef struct \n" "{\n" " int m_n;\n" @@ -90,24 +97,19 @@ static const char* batchingKernelsCL= \ " int m_staticIdx;\n" " int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct \n" "{\n" " int m_a;\n" " int m_b;\n" " u32 m_idx;\n" "}Elem;\n" -"\n" "#define STACK_SIZE (WG_SIZE*10)\n" "//#define STACK_SIZE (WG_SIZE)\n" "#define RING_SIZE 1024\n" "#define RING_SIZE_MASK (RING_SIZE-1)\n" "#define CHECK_SIZE (WG_SIZE)\n" -"\n" -"\n" "#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" "#define RING_END ldsTmp\n" -"\n" "u32 readBuf(__local u32* buff, int idx)\n" "{\n" " idx = idx % (32*CHECK_SIZE);\n" @@ -115,7 +117,6 @@ static const char* batchingKernelsCL= \ " int bufIdx = idx/32;\n" " return buff[bufIdx] & (1<> bitIdx)&1) == 0;\n" "}\n" -"\n" "// batching on the GPU\n" -"__kernel void CreateBatches( __global const Contact4* gConstraints, __global Contact4* gConstraintsOut,\n" +"__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" " __global const u32* gN, __global const u32* gStart, \n" " int m_staticIdx )\n" "{\n" @@ -148,7 +147,6 @@ static const char* batchingKernelsCL= \ " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" " __local u32 ldsGEnd;\n" " __local u32 ldsDstEnd;\n" -"\n" " int wgIdx = GET_GROUP_IDX;\n" " int lIdx = GET_LOCAL_IDX;\n" " \n" @@ -168,7 +166,6 @@ static const char* batchingKernelsCL= \ " for(int ie=0; ie<50; ie++)\n" " {\n" " ldsFixedBuffer[lIdx] = 0;\n" -"\n" " for(int giter=0; giter<4; giter++)\n" " {\n" " int ringCap = GET_RING_CAPACITY;\n" @@ -188,8 +185,8 @@ static const char* batchingKernelsCL= \ " int dstIdx;\n" " AtomInc1( ldsRingEnd, dstIdx );\n" " \n" -" int a = gConstraints[m_start+srcIdx].m_bodyA;\n" -" int b = gConstraints[m_start+srcIdx].m_bodyB;\n" +" int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" +" int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" " ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" " ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" " ldsRingElem[dstIdx].m_idx = srcIdx;\n" @@ -198,37 +195,31 @@ static const char* batchingKernelsCL= \ " ringCap = GET_RING_CAPACITY;\n" " }\n" " }\n" -"\n" " GROUP_LDS_BARRIER;\n" " \n" " // 2. fill stack\n" " __local Elem* dst = ldsRingElem;\n" " if( lIdx == 0 ) RING_END = 0;\n" -"\n" " int srcIdx=lIdx;\n" " int end = ldsRingEnd;\n" -"\n" " {\n" " for(int ii=0; iim_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile __global int*\n" "#endif\n" -"\n" "#define SIMD_WIDTH 64\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -47,43 +81,16 @@ static const char* batchingKernelsNewCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" -"\n" -"\n" -"typedef struct \n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;//sign bit set for fixed objects\n" -" int m_bodyBPtrAndSignBit;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"}Contact4;\n" -"\n" "typedef struct \n" "{\n" " int m_n;\n" @@ -91,20 +98,14 @@ static const char* batchingKernelsNewCL= \ " int m_staticIdx;\n" " int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct \n" "{\n" " int m_a;\n" " int m_b;\n" " u32 m_idx;\n" "}Elem;\n" -"\n" -"\n" -"\n" -"\n" -"\n" "// batching on the GPU\n" -"__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" +"__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" "{\n" " int wgIdx = GET_GROUP_IDX;\n" " int lIdx = GET_LOCAL_IDX;\n" @@ -122,13 +123,7 @@ static const char* batchingKernelsNewCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" "#define CHECK_SIZE (WG_SIZE)\n" -"\n" -"\n" -"\n" -"\n" "u32 readBuf(__local u32* buff, int idx)\n" "{\n" " idx = idx % (32*CHECK_SIZE);\n" @@ -136,7 +131,6 @@ static const char* batchingKernelsNewCL= \ " int bufIdx = idx/32;\n" " return buff[bufIdx] & (1<> bitIdx)&1) == 0;\n" "}\n" -"\n" -"\n" "// batching on the GPU\n" -"__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n" +"__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n" "{\n" " int wgIdx = GET_GROUP_IDX;\n" " int lIdx = GET_LOCAL_IDX;\n" " const int numConstraints = gN[wgIdx];\n" " const int m_start = gStart[wgIdx];\n" -" \n" +" b3Contact4Data_t tmp;\n" " \n" " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" " \n" @@ -175,12 +166,11 @@ static const char* batchingKernelsNewCL= \ " {\n" " \n" " \n" -" __global Contact4* cs = &gConstraints[m_start]; \n" +" __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" " \n" " \n" " int numValidConstraints = 0;\n" " int batchIdx = 0;\n" -"\n" " while( numValidConstraints < numConstraints)\n" " {\n" " int nCurrentBatch = 0;\n" @@ -188,10 +178,8 @@ static const char* batchingKernelsNewCL= \ " \n" " for(int i=0; im_quat);\n" " return newTrans;\n" "}*/\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " v = mymake_float4(v.xyz,0.f);\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -240,7 +177,6 @@ static const char* solveConstraintRowsCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -257,30 +193,23 @@ static const char* solveConstraintRowsCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" -"\n" "__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" "{\n" " body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" " body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" "}\n" -"\n" -"\n" "void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" "{\n" " float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" " float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" " float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" -"\n" " deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" " deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" -"\n" " float sum = c->m_appliedImpulse + deltaImpulse;\n" " if (sum < c->m_lowerLimit)\n" " {\n" @@ -296,12 +225,9 @@ static const char* solveConstraintRowsCL= \ " {\n" " c->m_appliedImpulse = sum;\n" " }\n" -"\n" " internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" " internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" -"\n" "}\n" -"\n" "__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" " __global b3BatchConstraint* batchConstraints,\n" " __global b3SolverConstraint* rows,\n" @@ -315,7 +241,6 @@ static const char* solveConstraintRowsCL= \ " int b = get_global_id(0);\n" " if (b>=numConstraintsInBatch)\n" " return;\n" -"\n" " __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" " int originalConstraintIndex = c->m_originalConstraintIndex;\n" " if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" @@ -329,16 +254,13 @@ static const char* solveConstraintRowsCL= \ " }\n" " }\n" "};\n" -"\n" "__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numBodies)\n" " return;\n" -"\n" " __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" " __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" -"\n" " solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" " solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" @@ -350,7 +272,6 @@ static const char* solveConstraintRowsCL= \ " solverBody->m_linearVelocity = bodyCL->m_linVel;\n" " solverBody->m_angularVelocity = bodyCL->m_angVel;\n" "}\n" -"\n" "__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" "{\n" " int cid = get_global_id(0);\n" @@ -370,17 +291,12 @@ static const char* solveConstraintRowsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" -"\n" "__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numConstraints)\n" " return;\n" -"\n" " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -"\n" " switch (constraint->m_constraintType)\n" " {\n" " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" @@ -398,7 +314,6 @@ static const char* solveConstraintRowsCL= \ " }\n" " }\n" "}\n" -"\n" "__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" " __global b3BatchConstraint* batchConstraints, \n" " __global b3GpuGenericConstraint* constraints,\n" @@ -408,26 +323,18 @@ static const char* solveConstraintRowsCL= \ " int i = get_global_id(0);\n" " if (i>=numConstraints)\n" " return;\n" -"\n" " int rbA = constraints[i].m_rbA;\n" " int rbB = constraints[i].m_rbB;\n" -"\n" " batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass? rbA : -rbA;\n" " batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass? rbB : -rbB;\n" " batchConstraints[i].m_batchId = -1;\n" " batchConstraints[i].m_originalConstraintIndex = i;\n" -"\n" "}\n" -"\n" -"\n" -"\n" -"\n" "typedef struct\n" "{\n" " // integrator parameters: frames per second (1/stepsize), default error\n" " // reduction parameter (0..1).\n" " float fps,erp;\n" -"\n" " // for the first and second body, pointers to two (linear and angular)\n" " // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" " // been initialized to 0 on entry. if the second body is zero then the\n" @@ -441,7 +348,6 @@ static const char* solveConstraintRowsCL= \ " {\n" " __global float4* m_J1angularAxisFloat4;\n" " __global float* m_J1angularAxis;\n" -"\n" " };\n" " union\n" " {\n" @@ -455,17 +361,14 @@ static const char* solveConstraintRowsCL= \ " };\n" " // elements to jump from one row to the next in J's\n" " int rowskip;\n" -"\n" " // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" " // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" " // set to a constant value (typically very small or zero) value on entry.\n" " __global float* m_constraintError;\n" " __global float* cfm;\n" -"\n" " // lo and hi limits for variables (set to -/+ infinity on entry).\n" " __global float* m_lowerLimit;\n" " __global float* m_upperLimit;\n" -"\n" " // findex vector for variables. see the LCP solver interface for a\n" " // description of what this does. this is set to -1 on entry.\n" " // note that the returned indexes are relative to the first index of\n" @@ -473,39 +376,28 @@ static const char* solveConstraintRowsCL= \ " __global int *findex;\n" " // number of solver iterations\n" " int m_numIterations;\n" -"\n" " //damping of the velocity\n" " float m_damping;\n" "} b3GpuConstraintInfo2;\n" -"\n" -"\n" "void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" "{\n" " *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" " *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" " *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" "}\n" -"\n" -"\n" "void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" "{\n" " float4 posA = bodies[constraint->m_rbA].m_pos;\n" " Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" -"\n" " float4 posB = bodies[constraint->m_rbB].m_pos;\n" " Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" -"\n" -"\n" -"\n" " // anchor points in global coordinates with respect to body PORs.\n" " \n" " // set jacobian\n" " info->m_J1linearAxis[0] = 1;\n" " info->m_J1linearAxis[info->rowskip+1] = 1;\n" " info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" -"\n" " float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" -"\n" " {\n" " __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" " __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" @@ -533,18 +425,15 @@ static const char* solveConstraintRowsCL= \ " // set right hand side\n" "// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" " float currERP = info->erp;\n" -"\n" " float k = info->fps * currERP;\n" " int j;\n" " float4 result = a2 + posB - a1 - posA;\n" " float* resultPtr = &result;\n" -"\n" " for (j=0; j<3; j++)\n" " {\n" " info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" " }\n" "}\n" -"\n" "Quaternion nearest( Quaternion first, Quaternion qd)\n" "{\n" " Quaternion diff,sum;\n" @@ -555,7 +444,6 @@ static const char* solveConstraintRowsCL= \ " return qd;\n" " return (-qd);\n" "}\n" -"\n" "float b3Acos(float x) \n" "{ \n" " if (x<-1) \n" @@ -564,7 +452,6 @@ static const char* solveConstraintRowsCL= \ " x=1;\n" " return acos(x); \n" "}\n" -"\n" "float getAngle(Quaternion orn)\n" "{\n" " if (orn.w>=1.f)\n" @@ -572,7 +459,6 @@ static const char* solveConstraintRowsCL= \ " float s = 2.f * b3Acos(orn.w);\n" " return s;\n" "}\n" -"\n" "void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" "{\n" " Quaternion orn1 = nearest(orn0,orn1a);\n" @@ -588,17 +474,12 @@ static const char* solveConstraintRowsCL= \ " else\n" " *axis /= sqrt(len);\n" "}\n" -"\n" -"\n" -"\n" "void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" "{\n" " Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" " Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" -"\n" " int s = info->rowskip;\n" " int start_index = start_row * s;\n" -"\n" " // 3 rows to make body rotations equal\n" " info->m_J1angularAxis[start_index] = 1;\n" " info->m_J1angularAxis[start_index + s + 1] = 1;\n" @@ -626,16 +507,12 @@ static const char* solveConstraintRowsCL= \ " info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" " }\n" " \n" -"\n" "}\n" -"\n" -"\n" "__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numBodies)\n" " return;\n" -"\n" " if (bodies[i].m_invMass)\n" " {\n" "// if (length(solverBodies[i].m_deltaLinearVelocity)=numConstraints)\n" " return;\n" @@ -675,17 +549,12 @@ static const char* solveConstraintRowsCL= \ " \n" " __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -"\n" " __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" " __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" -"\n" " int solverBodyIdA = constraint->m_rbA;\n" " int solverBodyIdB = constraint->m_rbB;\n" -"\n" " __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" " __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" -"\n" -"\n" " if (rbA->m_invMass)\n" " {\n" " batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" @@ -695,7 +564,6 @@ static const char* solveConstraintRowsCL= \ "// m_staticIdx = 0;\n" " batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" " }\n" -"\n" " if (rbB->m_invMass)\n" " {\n" " batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" @@ -705,14 +573,11 @@ static const char* solveConstraintRowsCL= \ "// m_staticIdx = 0;\n" " batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" " }\n" -"\n" " if (info1)\n" " {\n" " int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" "// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" " // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" -"\n" -"\n" " int j;\n" " for ( j=0;jm_deltaLinearVelocity = (float4)(0,0,0,0);\n" " bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" " bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" @@ -755,12 +618,8 @@ static const char* solveConstraintRowsCL= \ " bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" " bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" " bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" -"\n" " int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -"\n" " \n" -"\n" -"\n" " b3GpuConstraintInfo2 info2;\n" " info2.fps = 1.f/timeStep;\n" " info2.erp = globalErp;\n" @@ -769,7 +628,6 @@ static const char* solveConstraintRowsCL= \ " info2.m_J2linearAxisFloat4 = 0;\n" " info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" " info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -"\n" " ///the size of b3SolverConstraint needs be a multiple of float\n" "// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" " info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" @@ -779,7 +637,6 @@ static const char* solveConstraintRowsCL= \ " info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" " info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" " info2.m_numIterations = globalNumIterations;\n" -"\n" " switch (constraint->m_constraintType)\n" " {\n" " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" @@ -790,37 +647,29 @@ static const char* solveConstraintRowsCL= \ " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" " {\n" " getInfo2Point2Point(constraint,&info2,bodies);\n" -"\n" " getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" -"\n" " break;\n" " }\n" -"\n" " default:\n" " {\n" " }\n" " }\n" -"\n" " ///finalize the constraint setup\n" " for ( j=0;jm_upperLimit>=constraint->m_breakingImpulseThreshold)\n" " {\n" " solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" " }\n" -"\n" " if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" " {\n" " solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" " }\n" -"\n" "// solverConstraint->m_originalContactPoint = constraint;\n" " \n" " Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" " {\n" -"\n" " //float4 angularFactorA(1,1,1);\n" " float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" " solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" @@ -828,11 +677,9 @@ static const char* solveConstraintRowsCL= \ " \n" " Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" " {\n" -"\n" " float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" " solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" " }\n" -"\n" " {\n" " //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" " //because it gets multiplied iMJlB\n" @@ -840,7 +687,6 @@ static const char* solveConstraintRowsCL= \ " float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" " float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" " float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" -"\n" " float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" " sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" " sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" @@ -854,17 +700,13 @@ static const char* solveConstraintRowsCL= \ " solverConstraint->m_jacDiagABInv = 0.f;\n" " }\n" " }\n" -"\n" -"\n" " ///fix rhs\n" " ///todo: add force/torque accelerators\n" " {\n" " float rel_vel;\n" " float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" " float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" -"\n" " rel_vel = vel1Dotn+vel2Dotn;\n" -"\n" " float restitution = 0.f;\n" " float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" " float velocityError = restitution - rel_vel * info2.m_damping;\n" @@ -872,7 +714,6 @@ static const char* solveConstraintRowsCL= \ " float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" " solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" " solverConstraint->m_appliedImpulse = 0.f;\n" -"\n" " }\n" " }\n" " }\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl index c877dbdd3..a8d4e1c08 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl @@ -204,22 +204,7 @@ typedef struct u32 m_paddings[1]; } Constraint4; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -} Contact4; typedef struct { diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h index b05019489..cb836040d 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h @@ -2,37 +2,29 @@ static const char* solveContactCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile global int*\n" "#endif\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -46,43 +38,28 @@ static const char* solveContactCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define mymake_float4 (float4)\n" "//#define make_float2 (float2)\n" "//#define make_uint4 (uint4)\n" "//#define make_int4 (int4)\n" "//#define make_uint2 (uint2)\n" "//#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Vector\n" "///////////////////////////////////////\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -90,10 +67,6 @@ static const char* solveContactCL= \ " float4 b1 = mymake_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" @@ -102,33 +75,17 @@ static const char* solveContactCL= \ "// float length = sqrtf(dot3F4(a, a));\n" "// return 1.f/length * a;\n" "}\n" -"\n" -"\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Matrix3x3\n" "///////////////////////////////////////\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "}Matrix3x3;\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b);\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b);\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b)\n" "{\n" @@ -139,54 +96,39 @@ static const char* solveContactCL= \ " ans.w = 0.f;\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b)\n" "{\n" " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"\n" " float4 ans;\n" " ans.x = dot3F4( a, colx );\n" " ans.y = dot3F4( a, coly );\n" " ans.z = dot3F4( a, colz );\n" " return ans;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " Quaternion m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_shapeIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" "typedef struct\n" "{\n" " float4 m_linear;\n" @@ -195,34 +137,13 @@ static const char* solveContactCL= \ " float m_jacCoeffInv[4];\n" " float m_b[4];\n" " float m_appliedRambdaDt[4];\n" -"\n" " float m_fJacCoeffInv[2]; \n" " float m_fAppliedRambdaDt[2]; \n" -"\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" -"\n" " int m_batchIdx;\n" " u32 m_paddings[1];\n" "} Constraint4;\n" -"\n" -"typedef struct\n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -" \n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" "typedef struct\n" "{\n" " int m_nConstraints;\n" @@ -231,7 +152,6 @@ static const char* solveContactCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct\n" "{\n" " int m_solveFriction;\n" @@ -240,27 +160,20 @@ static const char* solveContactCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBufferBatchSolve;\n" -"\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" " *linear = mymake_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" -"\n" "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"\n" "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" "{\n" " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" "}\n" -"\n" -"\n" "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"\n" "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" "{\n" @@ -271,32 +184,25 @@ static const char* solveContactCL= \ " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" "}\n" -"\n" -"\n" "void solveContact(__global Constraint4* cs,\n" " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n" -"\n" "void solveContact(__global Constraint4* cs,\n" " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" "{\n" " float minRambdaDt = 0;\n" " float maxRambdaDt = FLT_MAX;\n" -"\n" " for(int ic=0; ic<4; ic++)\n" " {\n" " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" -"\n" " float4 angular0, angular1, linear;\n" " float4 r0 = cs->m_worldPos[ic] - posA;\n" " float4 r1 = cs->m_worldPos[ic] - posB;\n" " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" -"\n" " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" " *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" " rambdaDt *= cs->m_jacCoeffInv[ic];\n" -"\n" " {\n" " float prevSum = cs->m_appliedRambdaDt[ic];\n" " float updated = prevSum;\n" @@ -306,19 +212,16 @@ static const char* solveContactCL= \ " rambdaDt = updated - prevSum;\n" " cs->m_appliedRambdaDt[ic] = updated;\n" " }\n" -"\n" " float4 linImp0 = invMassA*linear*rambdaDt;\n" " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -"\n" " *linVelA += linImp0;\n" " *angVelA += angImp0;\n" " *linVelB += linImp1;\n" " *angVelB += angImp1;\n" " }\n" "}\n" -"\n" "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" "{\n" @@ -347,29 +250,24 @@ static const char* solveContactCL= \ " q[0].z = a*k;\n" " }\n" "}\n" -"\n" "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" "{\n" " //float frictionCoeff = ldsCs[0].m_linear.w;\n" " int aIdx = ldsCs[0].m_bodyA;\n" " int bIdx = ldsCs[0].m_bodyB;\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -"\n" " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" -"\n" " if (gBodies[aIdx].m_invMass)\n" " {\n" " gBodies[aIdx].m_linVel = linVelA;\n" @@ -390,27 +288,18 @@ static const char* solveContactCL= \ " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" " \n" " }\n" -"\n" "}\n" -"\n" -"\n" -"\n" "typedef struct \n" "{\n" " int m_valInt0;\n" " int m_valInt1;\n" " int m_valInt2;\n" " int m_valInt3;\n" -"\n" " float m_val0;\n" " float m_val1;\n" " float m_val2;\n" " float m_val3;\n" "} SolverDebugInfo;\n" -"\n" -"\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "void BatchSolveKernelContact(__global Body* gBodies,\n" @@ -427,33 +316,26 @@ static const char* solveContactCL= \ " __local int ldsCurBatch;\n" " __local int ldsNextBatch;\n" " __local int ldsStart;\n" -"\n" " int lIdx = GET_LOCAL_IDX;\n" " int wgIdx = GET_GROUP_IDX;\n" -"\n" "// int gIdx = GET_GLOBAL_IDX;\n" "// debugInfo[gIdx].m_valInt0 = gIdx;\n" " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -"\n" -"\n" " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -"\n" " //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" " //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" " //int cellIdx = xIdx+yIdx*nSplit;\n" " \n" " if( gN[cellIdx] == 0 ) \n" " return;\n" -"\n" " \n" " \n" " const int start = gOffsets[cellIdx];\n" " const int end = start + gN[cellIdx];\n" -"\n" " \n" " \n" " \n" @@ -463,10 +345,7 @@ static const char* solveContactCL= \ " ldsNextBatch = 0;\n" " ldsStart = start;\n" " }\n" -"\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " int idx=ldsStart+lIdx;\n" " while (ldsCurBatch < maxBatch)\n" " {\n" @@ -475,7 +354,6 @@ static const char* solveContactCL= \ " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" " {\n" " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -"\n" " idx+=64;\n" " } else\n" " {\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl index 21a302813..b4181b52c 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl @@ -204,22 +204,7 @@ typedef struct u32 m_paddings[1]; } Constraint4; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -} Contact4; typedef struct { diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h index 90097914c..02d31f0d0 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h @@ -2,37 +2,29 @@ static const char* solveFrictionCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile global int*\n" "#endif\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -46,43 +38,28 @@ static const char* solveFrictionCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define mymake_float4 (float4)\n" "//#define make_float2 (float2)\n" "//#define make_uint4 (uint4)\n" "//#define make_int4 (int4)\n" "//#define make_uint2 (uint2)\n" "//#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Vector\n" "///////////////////////////////////////\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -90,10 +67,6 @@ static const char* solveFrictionCL= \ " float4 b1 = mymake_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" @@ -102,33 +75,17 @@ static const char* solveFrictionCL= \ "// float length = sqrtf(dot3F4(a, a));\n" "// return 1.f/length * a;\n" "}\n" -"\n" -"\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Matrix3x3\n" "///////////////////////////////////////\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "}Matrix3x3;\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b);\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b);\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b)\n" "{\n" @@ -139,54 +96,39 @@ static const char* solveFrictionCL= \ " ans.w = 0.f;\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b)\n" "{\n" " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"\n" " float4 ans;\n" " ans.x = dot3F4( a, colx );\n" " ans.y = dot3F4( a, coly );\n" " ans.z = dot3F4( a, colz );\n" " return ans;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " Quaternion m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_shapeIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" "typedef struct\n" "{\n" " float4 m_linear;\n" @@ -195,34 +137,13 @@ static const char* solveFrictionCL= \ " float m_jacCoeffInv[4];\n" " float m_b[4];\n" " float m_appliedRambdaDt[4];\n" -"\n" " float m_fJacCoeffInv[2]; \n" " float m_fAppliedRambdaDt[2]; \n" -"\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" -"\n" " int m_batchIdx;\n" " u32 m_paddings[1];\n" "} Constraint4;\n" -"\n" -"typedef struct\n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" "typedef struct\n" "{\n" " int m_nConstraints;\n" @@ -231,7 +152,6 @@ static const char* solveFrictionCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct\n" "{\n" " int m_solveFriction;\n" @@ -240,27 +160,20 @@ static const char* solveFrictionCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBufferBatchSolve;\n" -"\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" " *linear = mymake_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" -"\n" "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"\n" "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" "{\n" " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" "}\n" -"\n" -"\n" "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"\n" "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" "{\n" @@ -299,33 +212,26 @@ static const char* solveFrictionCL= \ " q[0].z = a*k;\n" " }\n" "}\n" -"\n" -"\n" "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" "{\n" " float frictionCoeff = ldsCs[0].m_linear.w;\n" " int aIdx = ldsCs[0].m_bodyA;\n" " int bIdx = ldsCs[0].m_bodyB;\n" -"\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" " \n" -"\n" " {\n" " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -"\n" " float sum = 0;\n" " for(int j=0; j<4; j++)\n" " {\n" @@ -337,7 +243,6 @@ static const char* solveFrictionCL= \ " maxRambdaDt[j] = frictionCoeff*sum;\n" " minRambdaDt[j] = -maxRambdaDt[j];\n" " }\n" -"\n" " \n" "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" @@ -397,11 +302,9 @@ static const char* solveFrictionCL= \ " }\n" " }\n" " }\n" -"\n" " \n" " \n" " }\n" -"\n" " if (gBodies[aIdx].m_invMass)\n" " {\n" " gBodies[aIdx].m_linVel = linVelA;\n" @@ -421,25 +324,18 @@ static const char* solveFrictionCL= \ " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" " }\n" " \n" -"\n" "}\n" -"\n" "typedef struct \n" "{\n" " int m_valInt0;\n" " int m_valInt1;\n" " int m_valInt2;\n" " int m_valInt3;\n" -"\n" " float m_val0;\n" " float m_val1;\n" " float m_val2;\n" " float m_val3;\n" "} SolverDebugInfo;\n" -"\n" -"\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" "void BatchSolveKernelFriction(__global Body* gBodies,\n" @@ -456,28 +352,21 @@ static const char* solveFrictionCL= \ " __local int ldsCurBatch;\n" " __local int ldsNextBatch;\n" " __local int ldsStart;\n" -"\n" " int lIdx = GET_LOCAL_IDX;\n" " int wgIdx = GET_GROUP_IDX;\n" -"\n" "// int gIdx = GET_GLOBAL_IDX;\n" "// debugInfo[gIdx].m_valInt0 = gIdx;\n" " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -"\n" -"\n" " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -"\n" " \n" " if( gN[cellIdx] == 0 ) \n" " return;\n" -"\n" " const int start = gOffsets[cellIdx];\n" " const int end = start + gN[cellIdx];\n" -"\n" " \n" " if( lIdx == 0 )\n" " {\n" @@ -485,10 +374,7 @@ static const char* solveFrictionCL= \ " ldsNextBatch = 0;\n" " ldsStart = start;\n" " }\n" -"\n" -"\n" " GROUP_LDS_BARRIER;\n" -"\n" " int idx=ldsStart+lIdx;\n" " while (ldsCurBatch < maxBatch)\n" " {\n" @@ -496,9 +382,7 @@ static const char* solveFrictionCL= \ " {\n" " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" " {\n" -"\n" " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -"\n" " idx+=64;\n" " } else\n" " {\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl index 73e431acd..6fed05031 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl @@ -14,6 +14,7 @@ subject to the following restrictions: */ //Originally written by Takahiro Harada +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" #pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable @@ -403,22 +404,7 @@ typedef struct u32 m_paddings[1]; } Constraint4; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -} Contact4; typedef struct { @@ -525,7 +511,7 @@ void btPlaneSpace1 (float4 n, float4* p, float4* q); void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, + __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff, Constraint4* dstC ) { dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); @@ -622,7 +608,7 @@ typedef struct __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, +void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, int nContacts, float dt, float positionDrift, diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index 7bda1f710..3cba942d1 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -2,37 +2,71 @@ static const char* solverSetupCL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" +" typedef float4 b3Float4;\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormal; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile global int*\n" "#endif\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -46,22 +80,15 @@ static const char* solverSetupCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Vector\n" "///////////////////////////////////////\n" @@ -71,52 +98,43 @@ static const char* solverSetupCL= \ " return native_divide(numerator, denominator); \n" "// return numerator/denominator; \n" "}\n" -"\n" "__inline\n" "float4 fastDiv4(float4 numerator, float4 denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "}\n" -"\n" "__inline\n" "float fastSqrtf(float f2)\n" "{\n" " return native_sqrt(f2);\n" "// return sqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastRSqrt(float f2)\n" "{\n" " return native_rsqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastLength4(float4 v)\n" "{\n" " return fast_length(v);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "__inline\n" "float sqrtf(float a)\n" "{\n" "// return sqrt(a);\n" " return native_sqrt(a);\n" "}\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -124,26 +142,22 @@ static const char* solverSetupCL= \ " float4 b1 = make_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" "__inline\n" "float length3(const float4 a)\n" "{\n" " return sqrtf(dot3F4(a,a));\n" "}\n" -"\n" "__inline\n" "float dot4(const float4 a, const float4 b)\n" "{\n" " return dot( a, b );\n" "}\n" -"\n" "// for height\n" "__inline\n" "float dot3w1(const float4 point, const float4 eqn)\n" "{\n" " return dot3F4(point,eqn) + eqn.w;\n" "}\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" @@ -152,14 +166,12 @@ static const char* solverSetupCL= \ "// float length = sqrtf(dot3F4(a, a));\n" "// return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 normalize4(const float4 a)\n" "{\n" " float length = sqrtf(dot4(a, a));\n" " return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" "{\n" @@ -170,34 +182,25 @@ static const char* solverSetupCL= \ " eqn.w = -dot3F4(eqn,a);\n" " return eqn;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Matrix3x3\n" "///////////////////////////////////////\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "}Matrix3x3;\n" -"\n" "__inline\n" "Matrix3x3 mtZero();\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity();\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b);\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b);\n" -"\n" "__inline\n" "Matrix3x3 mtZero()\n" "{\n" @@ -207,7 +210,6 @@ static const char* solverSetupCL= \ " m.m_row[2] = (float4)(0.f);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity()\n" "{\n" @@ -217,7 +219,6 @@ static const char* solverSetupCL= \ " m.m_row[2] = (float4)(0,0,1,0);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m)\n" "{\n" @@ -227,7 +228,6 @@ static const char* solverSetupCL= \ " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" "{\n" @@ -248,7 +248,6 @@ static const char* solverSetupCL= \ " }\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b)\n" "{\n" @@ -259,44 +258,32 @@ static const char* solverSetupCL= \ " ans.w = 0.f;\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b)\n" "{\n" " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"\n" " float4 ans;\n" " ans.x = dot3F4( a, colx );\n" " ans.y = dot3F4( a, coly );\n" " ans.z = dot3F4( a, colz );\n" " return ans;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" "__inline\n" "Matrix3x3 qtGetRotationMatrix(Quaternion q);\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -307,7 +294,6 @@ static const char* solverSetupCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -324,67 +310,52 @@ static const char* solverSetupCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtInvRotate(const Quaternion q, float4 vec)\n" "{\n" " return qtRotate( qtInvert( q ), vec );\n" "}\n" -"\n" "__inline\n" "Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n" "{\n" " float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" " Matrix3x3 out;\n" -"\n" " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" " out.m_row[0].w = 0.f;\n" -"\n" " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" " out.m_row[1].w = 0.f;\n" -"\n" " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" " out.m_row[2].w = 0.f;\n" -"\n" " return out;\n" "}\n" -"\n" -"\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " Quaternion m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_shapeIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" "typedef struct\n" "{\n" " float4 m_linear;\n" @@ -393,34 +364,13 @@ static const char* solverSetupCL= \ " float m_jacCoeffInv[4];\n" " float m_b[4];\n" " float m_appliedRambdaDt[4];\n" -"\n" " float m_fJacCoeffInv[2]; \n" " float m_fAppliedRambdaDt[2]; \n" -"\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" -"\n" " int m_batchIdx;\n" " u32 m_paddings[1];\n" "} Constraint4;\n" -"\n" -"typedef struct\n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" "typedef struct\n" "{\n" " int m_nConstraints;\n" @@ -429,7 +379,6 @@ static const char* solverSetupCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct\n" "{\n" " int m_solveFriction;\n" @@ -438,22 +387,16 @@ static const char* solverSetupCL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBufferBatchSolve;\n" -"\n" -"\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" " *linear = make_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" -"\n" -"\n" "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" "{\n" " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" "}\n" -"\n" -"\n" "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" "{\n" @@ -464,27 +407,18 @@ static const char* solverSetupCL= \ " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" "}\n" -"\n" -"\n" -"\n" " \n" -"\n" -"\n" "typedef struct \n" "{\n" " int m_valInt0;\n" " int m_valInt1;\n" " int m_valInt2;\n" " int m_valInt3;\n" -"\n" " float m_val0;\n" " float m_val1;\n" " float m_val2;\n" " float m_val3;\n" "} SolverDebugInfo;\n" -"\n" -"\n" -"\n" "typedef struct\n" "{\n" " int m_nContacts;\n" @@ -492,8 +426,6 @@ static const char* solverSetupCL= \ " float m_scale;\n" " int m_nSplit;\n" "} ConstBufferSSD;\n" -"\n" -"\n" "void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" " void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" "{\n" @@ -522,84 +454,68 @@ static const char* solverSetupCL= \ " q[0].z = a*k;\n" " }\n" "}\n" -"\n" -"\n" "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,\n" +" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" " Constraint4* dstC )\n" "{\n" " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -"\n" " float dtInv = 1.f/dt;\n" " for(int ic=0; ic<4; ic++)\n" " {\n" " dstC->m_appliedRambdaDt[ic] = 0.f;\n" " }\n" " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -"\n" -"\n" " dstC->m_linear = -src->m_worldNormal;\n" " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" " for(int ic=0; ic<4; ic++)\n" " {\n" " float4 r0 = src->m_worldPos[ic] - posA;\n" " float4 r1 = src->m_worldPos[ic] - posB;\n" -"\n" " if( ic >= src->m_worldNormal.w )//npoints\n" " {\n" " dstC->m_jacCoeffInv[ic] = 0.f;\n" " continue;\n" " }\n" -"\n" " float relVelN;\n" " {\n" " float4 linear, angular0, angular1;\n" " setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n" -"\n" " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -"\n" " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" " linVelA, angVelA, linVelB, angVelB);\n" -"\n" " float e = 0.f;//src->getRestituitionCoeff();\n" " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -"\n" " dstC->m_b[ic] = e*relVelN;\n" " //float penetration = src->m_worldPos[ic].w;\n" " dstC->m_b[ic] += (src->m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" " dstC->m_appliedRambdaDt[ic] = 0.f;\n" " }\n" " }\n" -"\n" " if( src->m_worldNormal.w > 0 )//npoints\n" " { // prepare friction\n" " float4 center = make_float4(0.f);\n" " for(int i=0; im_worldNormal.w; i++) \n" " center += src->m_worldPos[i];\n" " center /= (float)src->m_worldNormal.w;\n" -"\n" " float4 tangent[2];\n" " btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n" " \n" " float4 r[2];\n" " r[0] = center - posA;\n" " r[1] = center - posB;\n" -"\n" " for(int i=0; i<2; i++)\n" " {\n" " float4 linear, angular0, angular1;\n" " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -"\n" " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" " }\n" " dstC->m_center = center;\n" " }\n" -"\n" " for(int i=0; i<4; i++)\n" " {\n" " if( im_worldNormal.w )\n" @@ -612,7 +528,6 @@ static const char* solverSetupCL= \ " }\n" " }\n" "}\n" -"\n" "typedef struct\n" "{\n" " int m_nContacts;\n" @@ -620,10 +535,9 @@ static const char* solverSetupCL= \ " float m_positionDrift;\n" " float m_positionConstraintCoeff;\n" "} ConstBufferCTC;\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n" +"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n" "int nContacts,\n" "float dt,\n" "float positionDrift,\n" @@ -636,33 +550,23 @@ static const char* solverSetupCL= \ " {\n" " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -"\n" " Constraint4 cs;\n" -"\n" " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" " &cs );\n" " \n" " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -"\n" " gConstraintOut[gIdx] = cs;\n" " }\n" "}\n" -"\n" -"\n" -"\n" -"\n" -"\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl index 5fbc24da8..518f708ce 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl @@ -14,6 +14,8 @@ subject to the following restrictions: //Originally written by Takahiro Harada +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" + #pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable @@ -377,22 +379,7 @@ typedef struct u32 m_paddings[1]; } Constraint4; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -} Contact4; typedef struct { @@ -435,7 +422,7 @@ typedef struct // others __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb ) +void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb ) { int nContacts = cb.x; int gIdx = GET_GLOBAL_IDX; @@ -448,7 +435,7 @@ void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __globa } __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts) +void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts) { int gIdx = GET_GLOBAL_IDX; @@ -462,7 +449,7 @@ void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global i } __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts) +void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) { int gIdx = GET_GLOBAL_IDX; @@ -478,7 +465,7 @@ void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global i } __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts) +void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) { int gIdx = GET_GLOBAL_IDX; @@ -496,7 +483,7 @@ void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* s __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts) +void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) { int gIdx = GET_GLOBAL_IDX; @@ -552,7 +539,7 @@ static __constant const int gridTable8x8[] = __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, +void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, int nContacts,float scale,int4 nSplit,int staticIdx) { @@ -613,7 +600,7 @@ int nContacts,float scale,int4 nSplit,int staticIdx) __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb ) +void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb ) { int gIdx = GET_GLOBAL_IDX; if( gIdx < cb.x ) diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index 7d9961005..8dbadde70 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -2,37 +2,71 @@ static const char* solverSetup2CL= \ "/*\n" "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Takahiro Harada\n" -"\n" -"\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" +" typedef float4 b3Float4;\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormal; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile global int*\n" "#endif\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -46,22 +80,15 @@ static const char* solverSetup2CL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Vector\n" "///////////////////////////////////////\n" @@ -71,52 +98,43 @@ static const char* solverSetup2CL= \ " return native_divide(numerator, denominator); \n" "// return numerator/denominator; \n" "}\n" -"\n" "__inline\n" "float4 fastDiv4(float4 numerator, float4 denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "}\n" -"\n" "__inline\n" "float fastSqrtf(float f2)\n" "{\n" " return native_sqrt(f2);\n" "// return sqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastRSqrt(float f2)\n" "{\n" " return native_rsqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastLength4(float4 v)\n" "{\n" " return fast_length(v);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "__inline\n" "float sqrtf(float a)\n" "{\n" "// return sqrt(a);\n" " return native_sqrt(a);\n" "}\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -124,26 +142,22 @@ static const char* solverSetup2CL= \ " float4 b1 = make_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" "__inline\n" "float length3(const float4 a)\n" "{\n" " return sqrtf(dot3F4(a,a));\n" "}\n" -"\n" "__inline\n" "float dot4(const float4 a, const float4 b)\n" "{\n" " return dot( a, b );\n" "}\n" -"\n" "// for height\n" "__inline\n" "float dot3w1(const float4 point, const float4 eqn)\n" "{\n" " return dot3F4(point,eqn) + eqn.w;\n" "}\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" @@ -152,14 +166,12 @@ static const char* solverSetup2CL= \ "// float length = sqrtf(dot3F4(a, a));\n" "// return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 normalize4(const float4 a)\n" "{\n" " float length = sqrtf(dot4(a, a));\n" " return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" "{\n" @@ -170,34 +182,25 @@ static const char* solverSetup2CL= \ " eqn.w = -dot3F4(eqn,a);\n" " return eqn;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Matrix3x3\n" "///////////////////////////////////////\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "}Matrix3x3;\n" -"\n" "__inline\n" "Matrix3x3 mtZero();\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity();\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b);\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b);\n" -"\n" "__inline\n" "Matrix3x3 mtZero()\n" "{\n" @@ -207,7 +210,6 @@ static const char* solverSetup2CL= \ " m.m_row[2] = (float4)(0.f);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity()\n" "{\n" @@ -217,7 +219,6 @@ static const char* solverSetup2CL= \ " m.m_row[2] = (float4)(0,0,1,0);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m)\n" "{\n" @@ -227,7 +228,6 @@ static const char* solverSetup2CL= \ " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" "{\n" @@ -248,7 +248,6 @@ static const char* solverSetup2CL= \ " }\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b)\n" "{\n" @@ -259,43 +258,30 @@ static const char* solverSetup2CL= \ " ans.w = 0.f;\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b)\n" "{\n" " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"\n" " float4 ans;\n" " ans.x = dot3F4( a, colx );\n" " ans.y = dot3F4( a, coly );\n" " ans.z = dot3F4( a, colz );\n" " return ans;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -306,7 +292,6 @@ static const char* solverSetup2CL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -323,43 +308,33 @@ static const char* solverSetup2CL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtInvRotate(const Quaternion q, float4 vec)\n" "{\n" " return qtRotate( qtInvert( q ), vec );\n" "}\n" -"\n" -"\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " Quaternion m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_shapeIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" "typedef struct\n" "{\n" " float4 m_linear;\n" @@ -368,34 +343,13 @@ static const char* solverSetup2CL= \ " float m_jacCoeffInv[4];\n" " float m_b[4];\n" " float m_appliedRambdaDt[4];\n" -"\n" " float m_fJacCoeffInv[2]; \n" " float m_fAppliedRambdaDt[2]; \n" -"\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" -"\n" " int m_batchIdx;\n" " u32 m_paddings[1];\n" "} Constraint4;\n" -"\n" -"typedef struct\n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" "typedef struct\n" "{\n" " int m_nConstraints;\n" @@ -404,7 +358,6 @@ static const char* solverSetup2CL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBuffer;\n" -"\n" "typedef struct\n" "{\n" " int m_solveFriction;\n" @@ -413,47 +366,35 @@ static const char* solverSetup2CL= \ " int m_nSplit;\n" "// int m_paddings[1];\n" "} ConstBufferBatchSolve;\n" -"\n" -"\n" " \n" -"\n" -"\n" "typedef struct \n" "{\n" " int m_valInt0;\n" " int m_valInt1;\n" " int m_valInt2;\n" " int m_valInt3;\n" -"\n" " float m_val0;\n" " float m_val1;\n" " float m_val2;\n" " float m_val3;\n" "} SolverDebugInfo;\n" -"\n" -"\n" -"\n" -"\n" "// others\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )\n" +"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" "{\n" " int nContacts = cb.x;\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nContacts )\n" " {\n" " int srcIdx = sortData[gIdx].y;\n" " out[gIdx] = in[srcIdx];\n" " }\n" "}\n" -"\n" "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)\n" +"void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nContacts )\n" " {\n" " int2 sd;\n" @@ -462,12 +403,10 @@ static const char* solverSetup2CL= \ " sortDataOut[gIdx] = sd;\n" " }\n" "}\n" -"\n" "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n" +"void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nContacts )\n" " {\n" " int2 sdIn;\n" @@ -478,12 +417,10 @@ static const char* solverSetup2CL= \ " sortDataInOut[gIdx] = sdOut;\n" " }\n" "}\n" -"\n" "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n" +"void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nContacts )\n" " {\n" " int2 sdIn;\n" @@ -494,14 +431,11 @@ static const char* solverSetup2CL= \ " sortDataInOut[gIdx] = sdOut;\n" " }\n" "}\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n" +"void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" -"\n" " if( gIdx < nContacts )\n" " {\n" " int2 sdIn;\n" @@ -512,10 +446,6 @@ static const char* solverSetup2CL= \ " sortDataInOut[gIdx] = sdOut;\n" " }\n" "}\n" -"\n" -"\n" -"\n" -"\n" "typedef struct\n" "{\n" " int m_nContacts;\n" @@ -523,8 +453,6 @@ static const char* solverSetup2CL= \ " float m_scale;\n" " int m_nSplit;\n" "} ConstBufferSSD;\n" -"\n" -"\n" "static __constant const int gridTable4x4[] = \n" "{\n" " 0,1,17,16,\n" @@ -532,7 +460,6 @@ static const char* solverSetup2CL= \ " 17,18,32,3,\n" " 16,19,3,34\n" "};\n" -"\n" "static __constant const int gridTable8x8[] = \n" "{\n" " 0, 2, 3, 16, 17, 18, 19, 1,\n" @@ -545,18 +472,12 @@ static const char* solverSetup2CL= \ " 197,27,214,213,212,199,198,196\n" " \n" "};\n" -"\n" -"\n" -"\n" -"\n" "#define USE_SPATIAL_BATCHING 1\n" "#define USE_4x4_GRID 1\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" +"void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" "int nContacts,float scale,int4 nSplit,int staticIdx)\n" -"\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" " \n" @@ -564,13 +485,10 @@ static const char* solverSetup2CL= \ " {\n" " int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" " int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" -"\n" " int aIdx = abs(aPtrAndSignBit );\n" " int bIdx = abs(bPtrAndSignBit);\n" -"\n" " bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" " bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" -"\n" "#if USE_SPATIAL_BATCHING \n" " int idx = (aStatic)? bIdx: aIdx;\n" " float4 p = gBodies[idx].m_pos;\n" @@ -587,7 +505,6 @@ static const char* solverSetup2CL= \ " aa = bb;\n" " if (bStatic)\n" " bb = aa;\n" -"\n" " int gridIndex = aa + bb*4;\n" " int newIndex = gridTable4x4[gridIndex];\n" " #else//USE_4x4_GRID\n" @@ -597,13 +514,10 @@ static const char* solverSetup2CL= \ " aa = bb;\n" " if (bStatic)\n" " bb = aa;\n" -"\n" " int gridIndex = aa + bb*8;\n" " int newIndex = gridTable8x8[gridIndex];\n" " #endif//USE_4x4_GRID\n" "#endif//USE_SPATIAL_BATCHING\n" -"\n" -"\n" " gSortDataOut[gIdx].x = newIndex;\n" " gSortDataOut[gIdx].y = gIdx;\n" " }\n" @@ -612,10 +526,9 @@ static const char* solverSetup2CL= \ " gSortDataOut[gIdx].x = 0xffffffff;\n" " }\n" "}\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )\n" +"void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" "{\n" " int gIdx = GET_GLOBAL_IDX;\n" " if( gIdx < cb.x )\n" @@ -623,7 +536,4 @@ static const char* solverSetup2CL= \ " gOut[gIdx] = gIn[gIdx];\n" " }\n" "}\n" -"\n" -"\n" -"\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl index 19e58220f..ab0346d98 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl @@ -13,6 +13,8 @@ subject to the following restrictions: */ //Originally written by Erwin Coumans +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" + #pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable @@ -380,25 +382,10 @@ typedef struct u32 m_paddings; } Constraint4; -typedef struct -{ - float4 m_worldPos[4]; - float4 m_worldNormal; - u32 m_coeffs; - int m_batchIdx; - - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - - int m_childIndexA; - int m_childIndexB; - int m_unused1; - int m_unused2; - -} Contact4; -__kernel void CountBodiesKernel(__global Contact4* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) + +__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) { int i = GET_GLOBAL_IDX; @@ -844,7 +831,7 @@ __kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* of void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, + __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, Constraint4* dstC ) { dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); @@ -934,7 +921,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe __kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintSplitKernel(__global const Contact4* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, +void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, __global const unsigned int* bodyCount, int nContacts, float dt, diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index 2c31dc31d..4c8a43843 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -2,36 +2,71 @@ static const char* solverUtilsCL= \ "/*\n" "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"\n" "This software is provided 'as-is', without any express or implied warranty.\n" "In no event will the authors be held liable for any damages arising from the use of this software.\n" "Permission is granted to anyone to use this software for any purpose, \n" "including commercial applications, and to alter it and redistribute it freely, \n" "subject to the following restrictions:\n" -"\n" "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" "3. This notice may not be removed or altered from any source distribution.\n" "*/\n" "//Originally written by Erwin Coumans\n" -"\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else//bla\n" +" typedef float4 b3Float4;\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPos[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormal; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +" b3Float4 m_localPosA;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormal.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormal.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"\n" -"\n" "#ifdef cl_ext_atomic_counters_32\n" "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" "#else\n" "#define counter32_t volatile global int*\n" "#endif\n" -"\n" "typedef unsigned int u32;\n" "typedef unsigned short u16;\n" "typedef unsigned char u8;\n" -"\n" "#define GET_GROUP_IDX get_group_id(0)\n" "#define GET_LOCAL_IDX get_local_id(0)\n" "#define GET_GLOBAL_IDX get_global_id(0)\n" @@ -45,22 +80,15 @@ static const char* solverUtilsCL= \ "#define AtomAdd(x, value) atom_add(&(x), value)\n" "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"\n" -"\n" "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"\n" "#define make_float4 (float4)\n" "#define make_float2 (float2)\n" "#define make_uint4 (uint4)\n" "#define make_int4 (int4)\n" "#define make_uint2 (uint2)\n" "#define make_int2 (int2)\n" -"\n" -"\n" "#define max2 max\n" "#define min2 min\n" -"\n" -"\n" "///////////////////////////////////////\n" "// Vector\n" "///////////////////////////////////////\n" @@ -70,57 +98,47 @@ static const char* solverUtilsCL= \ " return native_divide(numerator, denominator); \n" "// return numerator/denominator; \n" "}\n" -"\n" "__inline\n" "float4 fastDiv4(float4 numerator, float4 denominator)\n" "{\n" " return native_divide(numerator, denominator); \n" "}\n" -"\n" "__inline\n" "float fastSqrtf(float f2)\n" "{\n" " return native_sqrt(f2);\n" "// return sqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastRSqrt(float f2)\n" "{\n" " return native_rsqrt(f2);\n" "}\n" -"\n" "__inline\n" "float fastLength4(float4 v)\n" "{\n" " return fast_length(v);\n" "}\n" -"\n" "__inline\n" "float4 fastNormalize4(float4 v)\n" "{\n" " return fast_normalize(v);\n" "}\n" -"\n" -"\n" "__inline\n" "float sqrtf(float a)\n" "{\n" "// return sqrt(a);\n" " return native_sqrt(a);\n" "}\n" -"\n" "__inline\n" "float4 cross3(float4 a1, float4 b1)\n" "{\n" -"\n" " float4 a=make_float4(a1.xyz,0.f);\n" " float4 b=make_float4(b1.xyz,0.f);\n" " //float4 a=a1;\n" " //float4 b=b1;\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -128,26 +146,22 @@ static const char* solverUtilsCL= \ " float4 b1 = make_float4(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" "__inline\n" "float length3(const float4 a)\n" "{\n" " return sqrtf(dot3F4(a,a));\n" "}\n" -"\n" "__inline\n" "float dot4(const float4 a, const float4 b)\n" "{\n" " return dot( a, b );\n" "}\n" -"\n" "// for height\n" "__inline\n" "float dot3w1(const float4 point, const float4 eqn)\n" "{\n" " return dot3F4(point,eqn) + eqn.w;\n" "}\n" -"\n" "__inline\n" "float4 normalize3(const float4 a)\n" "{\n" @@ -156,14 +170,12 @@ static const char* solverUtilsCL= \ "// float length = sqrtf(dot3F4(a, a));\n" "// return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 normalize4(const float4 a)\n" "{\n" " float length = sqrtf(dot4(a, a));\n" " return 1.f/length * a;\n" "}\n" -"\n" "__inline\n" "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" "{\n" @@ -174,34 +186,25 @@ static const char* solverUtilsCL= \ " eqn.w = -dot3F4(eqn,a);\n" " return eqn;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Matrix3x3\n" "///////////////////////////////////////\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "}Matrix3x3;\n" -"\n" "__inline\n" "Matrix3x3 mtZero();\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity();\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b);\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b);\n" -"\n" "__inline\n" "Matrix3x3 mtZero()\n" "{\n" @@ -211,7 +214,6 @@ static const char* solverUtilsCL= \ " m.m_row[2] = (float4)(0.f);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtIdentity()\n" "{\n" @@ -221,7 +223,6 @@ static const char* solverUtilsCL= \ " m.m_row[2] = (float4)(0,0,1,0);\n" " return m;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m)\n" "{\n" @@ -231,7 +232,6 @@ static const char* solverUtilsCL= \ " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" "{\n" @@ -252,7 +252,6 @@ static const char* solverUtilsCL= \ " }\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul1(Matrix3x3 a, float4 b)\n" "{\n" @@ -263,43 +262,30 @@ static const char* solverUtilsCL= \ " ans.w = 0.f;\n" " return ans;\n" "}\n" -"\n" "__inline\n" "float4 mtMul3(float4 a, Matrix3x3 b)\n" "{\n" " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"\n" " float4 ans;\n" " ans.x = dot3F4( a, colx );\n" " ans.y = dot3F4( a, coly );\n" " ans.z = dot3F4( a, colz );\n" " return ans;\n" "}\n" -"\n" "///////////////////////////////////////\n" "// Quaternion\n" "///////////////////////////////////////\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b);\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in);\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec);\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q);\n" -"\n" -"\n" -"\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -310,7 +296,6 @@ static const char* solverUtilsCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtNormalize(Quaternion in)\n" "{\n" @@ -327,43 +312,33 @@ static const char* solverUtilsCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtInvRotate(const Quaternion q, float4 vec)\n" "{\n" " return qtRotate( qtInvert( q ), vec );\n" "}\n" -"\n" -"\n" -"\n" -"\n" "#define WG_SIZE 64\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " Quaternion m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_shapeIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" "typedef struct\n" "{\n" " float4 m_linear;\n" @@ -372,35 +347,14 @@ static const char* solverUtilsCL= \ " float m_jacCoeffInv[4];\n" " float m_b[4];\n" " float m_appliedRambdaDt[4];\n" -"\n" " float m_fJacCoeffInv[2]; \n" " float m_fAppliedRambdaDt[2]; \n" -"\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" " int m_batchIdx;\n" " u32 m_paddings;\n" "} Constraint4;\n" -"\n" -"typedef struct\n" -"{\n" -" float4 m_worldPos[4];\n" -" float4 m_worldNormal;\n" -" u32 m_coeffs;\n" -" int m_batchIdx;\n" -"\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -"\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"\n" -"} Contact4;\n" -"\n" -"\n" -"__kernel void CountBodiesKernel(__global Contact4* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" +"__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" "{\n" " int i = GET_GLOBAL_IDX;\n" " \n" @@ -423,7 +377,6 @@ static const char* solverUtilsCL= \ " } \n" " }\n" "}\n" -"\n" "__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" "{\n" " int i = GET_GLOBAL_IDX;\n" @@ -434,8 +387,6 @@ static const char* solverUtilsCL= \ " angularVelocities[i] = make_float4(0);\n" " }\n" "}\n" -"\n" -"\n" "__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" "{\n" @@ -465,23 +416,16 @@ static const char* solverUtilsCL= \ " }//bodies[i].m_invMass\n" " }//im_jacCoeffInv[ic] == 0.f ) continue;\n" -"\n" " float4 angular0, angular1, linear;\n" " float4 r0 = cs->m_worldPos[ic] - posA;\n" " float4 r1 = cs->m_worldPos[ic] - posB;\n" " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" " \n" -"\n" -"\n" " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" " rambdaDt *= cs->m_jacCoeffInv[ic];\n" -"\n" " \n" " {\n" " float prevSum = cs->m_appliedRambdaDt[ic];\n" @@ -560,13 +492,11 @@ static const char* solverUtilsCL= \ " rambdaDt = updated - prevSum;\n" " cs->m_appliedRambdaDt[ic] = updated;\n" " }\n" -"\n" " \n" " float4 linImp0 = invMassA*linear*rambdaDt;\n" " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -"\n" " \n" " if (invMassA)\n" " {\n" @@ -580,32 +510,24 @@ static const char* solverUtilsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" "// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -"\n" -"\n" "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" "{\n" -"\n" " //float frictionCoeff = ldsCs[0].m_linear.w;\n" " int aIdx = ldsCs[0].m_bodyA;\n" " int bIdx = ldsCs[0].m_bodyB;\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -"\n" " \n" " float4 dLinVelA = make_float4(0,0,0,0);\n" " float4 dAngVelA = make_float4(0,0,0,0);\n" @@ -621,20 +543,16 @@ static const char* solverUtilsCL= \ " dLinVelA = deltaLinearVelocities[splitIndexA];\n" " dAngVelA = deltaAngularVelocities[splitIndexA];\n" " }\n" -"\n" " int bodyOffsetB = offsetSplitBodies[bIdx];\n" " int constraintOffsetB = contactConstraintOffsets[0].y;\n" " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -"\n" " if (invMassB)\n" " {\n" " dLinVelB = deltaLinearVelocities[splitIndexB];\n" " dAngVelB = deltaAngularVelocities[splitIndexB];\n" " }\n" -"\n" " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" " posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" -"\n" " if (invMassA)\n" " {\n" " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" @@ -645,10 +563,7 @@ static const char* solverUtilsCL= \ " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" " }\n" -"\n" "}\n" -"\n" -"\n" "__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" "float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" @@ -660,10 +575,6 @@ static const char* solverUtilsCL= \ " solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" " }\n" "}\n" -"\n" -"\n" -"\n" -"\n" "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n" " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" @@ -671,21 +582,17 @@ static const char* solverUtilsCL= \ " float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n" " int aIdx = ldsCs[0].m_bodyA;\n" " int bIdx = ldsCs[0].m_bodyB;\n" -"\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" " \n" -"\n" " float4 dLinVelA = make_float4(0,0,0,0);\n" " float4 dAngVelA = make_float4(0,0,0,0);\n" " float4 dLinVelB = make_float4(0,0,0,0);\n" @@ -700,24 +607,17 @@ static const char* solverUtilsCL= \ " dLinVelA = deltaLinearVelocities[splitIndexA];\n" " dAngVelA = deltaAngularVelocities[splitIndexA];\n" " }\n" -"\n" " int bodyOffsetB = offsetSplitBodies[bIdx];\n" " int constraintOffsetB = contactConstraintOffsets[0].y;\n" " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -"\n" " if (invMassB)\n" " {\n" " dLinVelB = deltaLinearVelocities[splitIndexB];\n" " dAngVelB = deltaAngularVelocities[splitIndexB];\n" " }\n" -"\n" -"\n" -"\n" -"\n" " {\n" " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -"\n" " float sum = 0;\n" " for(int j=0; j<4; j++)\n" " {\n" @@ -729,7 +629,6 @@ static const char* solverUtilsCL= \ " maxRambdaDt[j] = frictionCoeff*sum;\n" " minRambdaDt[j] = -maxRambdaDt[j];\n" " }\n" -"\n" " \n" "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" @@ -789,11 +688,9 @@ static const char* solverUtilsCL= \ " }\n" " }\n" " }\n" -"\n" " \n" " \n" " }\n" -"\n" " if (invMassA)\n" " {\n" " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" @@ -805,10 +702,7 @@ static const char* solverUtilsCL= \ " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" " }\n" " \n" -"\n" "}\n" -"\n" -"\n" "__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" @@ -821,8 +715,6 @@ static const char* solverUtilsCL= \ " solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" " }\n" "}\n" -"\n" -"\n" "__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" "{\n" @@ -841,85 +733,68 @@ static const char* solverUtilsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" -"\n" "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" +" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" " Constraint4* dstC )\n" "{\n" " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -"\n" " float dtInv = 1.f/dt;\n" " for(int ic=0; ic<4; ic++)\n" " {\n" " dstC->m_appliedRambdaDt[ic] = 0.f;\n" " }\n" " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -"\n" -"\n" " dstC->m_linear = -src->m_worldNormal;\n" " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" " for(int ic=0; ic<4; ic++)\n" " {\n" " float4 r0 = src->m_worldPos[ic] - posA;\n" " float4 r1 = src->m_worldPos[ic] - posB;\n" -"\n" " if( ic >= src->m_worldNormal.w )//npoints\n" " {\n" " dstC->m_jacCoeffInv[ic] = 0.f;\n" " continue;\n" " }\n" -"\n" " float relVelN;\n" " {\n" " float4 linear, angular0, angular1;\n" " setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n" -"\n" " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" " invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" -"\n" " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" " linVelA, angVelA, linVelB, angVelB);\n" -"\n" " float e = 0.f;//src->getRestituitionCoeff();\n" " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -"\n" " dstC->m_b[ic] = e*relVelN;\n" " //float penetration = src->m_worldPos[ic].w;\n" " dstC->m_b[ic] += (src->m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" " dstC->m_appliedRambdaDt[ic] = 0.f;\n" " }\n" " }\n" -"\n" " if( src->m_worldNormal.w > 0 )//npoints\n" " { // prepare friction\n" " float4 center = make_float4(0.f);\n" " for(int i=0; im_worldNormal.w; i++) \n" " center += src->m_worldPos[i];\n" " center /= (float)src->m_worldNormal.w;\n" -"\n" " float4 tangent[2];\n" " btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n" " \n" " float4 r[2];\n" " r[0] = center - posA;\n" " r[1] = center - posB;\n" -"\n" " for(int i=0; i<2; i++)\n" " {\n" " float4 linear, angular0, angular1;\n" " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -"\n" " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" " invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" " }\n" " dstC->m_center = center;\n" " }\n" -"\n" " for(int i=0; i<4; i++)\n" " {\n" " if( im_worldNormal.w )\n" @@ -932,11 +807,9 @@ static const char* solverUtilsCL= \ " }\n" " }\n" "}\n" -"\n" -"\n" "__kernel\n" "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintSplitKernel(__global const Contact4* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" +"void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" "__global const unsigned int* bodyCount,\n" "int nContacts,\n" "float dt,\n" @@ -950,30 +823,24 @@ static const char* solverUtilsCL= \ " {\n" " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -"\n" " float4 posA = gBodies[aIdx].m_pos;\n" " float4 linVelA = gBodies[aIdx].m_linVel;\n" " float4 angVelA = gBodies[aIdx].m_angVel;\n" " float invMassA = gBodies[aIdx].m_invMass;\n" " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -"\n" " float4 posB = gBodies[bIdx].m_pos;\n" " float4 linVelB = gBodies[bIdx].m_linVel;\n" " float4 angVelB = gBodies[bIdx].m_angVel;\n" " float invMassB = gBodies[bIdx].m_invMass;\n" " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -"\n" " Constraint4 cs;\n" -"\n" " float countA = invMassA ? (float)bodyCount[aIdx] : 1;\n" " float countB = invMassB ? (float)bodyCount[bIdx] : 1;\n" -"\n" " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" " &cs );\n" " \n" " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -"\n" " gConstraintOut[gIdx] = cs;\n" " }\n" "}\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index 899615af9..eef8e4659 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -1,15 +1,12 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project static const char* updateAabbsKernelCL= \ "#define SHAPE_CONVEX_HULL 3\n" -"\n" "typedef float4 Quaternion;\n" -"\n" "__inline\n" "float4 cross3(float4 a, float4 b)\n" "{\n" " return cross(a,b);\n" "}\n" -"\n" "__inline\n" "float dot3F4(float4 a, float4 b)\n" "{\n" @@ -17,8 +14,6 @@ static const char* updateAabbsKernelCL= \ " float4 b1 = (float4)(b.xyz,0.f);\n" " return dot(a1, b1);\n" "}\n" -"\n" -"\n" "__inline\n" "Quaternion qtMul(Quaternion a, Quaternion b)\n" "{\n" @@ -28,13 +23,11 @@ static const char* updateAabbsKernelCL= \ " ans.w = a.w*b.w - dot3F4(a, b);\n" " return ans;\n" "}\n" -"\n" "__inline\n" "Quaternion qtInvert(Quaternion q)\n" "{\n" " return (Quaternion)(-q.xyz, q.w);\n" "}\n" -"\n" "__inline\n" "float4 qtRotate(Quaternion q, float4 vec)\n" "{\n" @@ -44,34 +37,27 @@ static const char* updateAabbsKernelCL= \ " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"\n" "__inline\n" "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" "{\n" " return qtRotate( *orientation, *p ) + (*translation);\n" "}\n" -"\n" "typedef struct\n" "{\n" " float4 m_row[3];\n" "} Matrix3x3;\n" -"\n" "typedef unsigned int u32;\n" -"\n" -"\n" "typedef struct\n" "{\n" " float4 m_pos;\n" " float4 m_quat;\n" " float4 m_linVel;\n" " float4 m_angVel;\n" -"\n" " u32 m_collidableIdx;\n" " float m_invMass;\n" " float m_restituitionCoeff;\n" " float m_frictionCoeff;\n" "} Body;\n" -"\n" "typedef struct Collidable\n" "{\n" " int m_unused1;\n" @@ -79,40 +65,30 @@ static const char* updateAabbsKernelCL= \ " int m_shapeType;\n" " int m_shapeIndex;\n" "} Collidable;\n" -"\n" -"\n" "typedef struct\n" "{\n" " Matrix3x3 m_invInertia;\n" " Matrix3x3 m_initInvInertia;\n" "} Shape;\n" -"\n" -"\n" "__inline\n" "Matrix3x3 qtGetRotationMatrix(float4 quat)\n" "{\n" " float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" " Matrix3x3 out;\n" -"\n" " out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);\n" " out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);\n" " out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);\n" " out.m_row[0].w = 0.f;\n" -"\n" " out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);\n" " out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);\n" " out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);\n" " out.m_row[1].w = 0.f;\n" -"\n" " out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);\n" " out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);\n" " out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);\n" " out.m_row[2].w = 0.f;\n" -"\n" " return out;\n" "}\n" -"\n" -"\n" "typedef struct \n" "{\n" " float fx;\n" @@ -120,7 +96,6 @@ static const char* updateAabbsKernelCL= \ " float fz;\n" " int uw;\n" "} btAABBCL;\n" -"\n" "__inline\n" "Matrix3x3 mtTranspose(Matrix3x3 m)\n" "{\n" @@ -130,9 +105,6 @@ static const char* updateAabbsKernelCL= \ " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" " return out;\n" "}\n" -"\n" -"\n" -"\n" "__inline\n" "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" "{\n" @@ -153,8 +125,6 @@ static const char* updateAabbsKernelCL= \ " }\n" " return ans;\n" "}\n" -"\n" -"\n" "__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n" "{\n" " int nodeID = get_global_id(0);\n" diff --git a/test/OpenCL/KernelLaunch/main.cpp b/test/OpenCL/KernelLaunch/main.cpp new file mode 100644 index 000000000..a22ee3b77 --- /dev/null +++ b/test/OpenCL/KernelLaunch/main.cpp @@ -0,0 +1,221 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///original author: Erwin Coumans + +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include +#include + +#include "Bullet3Common/b3Vector3.h" + +typedef b3Vector3 b3Float4; +typedef struct b3Contact4Data b3Contact4Data_t; +struct b3Contact4Data +{ + b3Float4 m_worldPos[4]; + b3Float4 m_localPosA[4]; + b3Float4 m_localPosB[4]; + b3Float4 m_worldNormal; // w: m_nPoints + unsigned short m_restituitionCoeffCmp; + unsigned short m_frictionCoeffCmp; + int m_batchIdx; + int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr + int m_bodyBPtrAndSignBit; + int m_childIndexA; + int m_childIndexB; + int m_unused1; + int m_unused2; + +}; + + +#define MSTRINGIFY(A) #A + +static const char* s_testKernelString= MSTRINGIFY( + +struct MyTest +{ + int bla; +}; + +typedef float4 b3Float4; +typedef struct b3Contact4Data b3Contact4Data_t; +struct b3Contact4Data +{ + b3Float4 m_worldPos[4]; + b3Float4 m_localPosA[4]; + b3Float4 m_localPosB[4]; + b3Float4 m_worldNormal; // w: m_nPoints + unsigned short m_restituitionCoeffCmp; + unsigned short m_frictionCoeffCmp; + int m_batchIdx; + int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr + int m_bodyBPtrAndSignBit; + int m_childIndexA; + int m_childIndexB; + int m_unused1; + int m_unused2; + +}; +inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact) +{ + return (int)contact->m_worldNormal.w; +}; +inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints) +{ + contact->m_worldNormal.w = (float)numPoints; +}; + +typedef volatile __global int* my_counter32_t; + + +__kernel void testKernel( __global int* testData, __global b3Contact4Data_t* contactData, my_counter32_t numElements) +{ + int id = get_local_id(0); + int sz = sizeof(b3Contact4Data_t); + testData[id]=sz; + + __private b3Contact4Data_t tmp; + if (id==0) + { + tmp = contactData[1]; + contactData[1] = contactData[0]; + contactData[0] = tmp; + } +} + + + +); + + + +#include "Bullet3Common/b3Logging.h" + + +void myprintf(const char* msg) +{ + //OutputDebugStringA(msg); + printf(msg); +} + +int main(int argc, char* argv[]) +{ + b3SetCustomPrintfFunc(myprintf); + //b3SetCustomWarningMessageFunc(myprintf); + //b3SetCustomErrorMessageFunc(myprintf); + + b3Printf("test b3Printf\n"); + b3Warning("test warning\n"); + b3Error("test error\n"); + + int ciErrNum = 0; + + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; + const char* vendorSDK = b3OpenCLUtils::getSdkVendorName(); + + b3Printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK); + int numPlatforms = b3OpenCLUtils::getNumPlatforms(); + b3Printf("Num Platforms = %d\n", numPlatforms); + + for (int i=0;i deviceElements(context,queue); + b3OpenCLArray atomicCounter(context,queue); + b3OpenCLArray deviceContacts(context,queue); + b3AlignedObjectArray hostContacts; + + b3Contact4Data tmp; + int sz = sizeof(b3Contact4Data); + memset(&tmp,1,sz); + deviceContacts.push_back(tmp); + b3Contact4Data tmp2 = tmp; + memset(&tmp,2,sz); + deviceContacts.push_back(tmp); + b3Contact4Data tmp3 = tmp; + + + atomicCounter.push_back(0); + deviceElements.resize(numWorkItems); + b3LauncherCL run(queue,testKernel); + run.setBuffer(deviceElements.getBufferCL()); + run.setBuffer(deviceContacts.getBufferCL()); + run.setBuffer(atomicCounter.getBufferCL()); + + run.launch1D(numWorkItems); + + b3AlignedObjectArray hostElements; + deviceElements.copyToHost(hostElements); + deviceContacts.copyToHost(hostContacts); + tmp2 = hostContacts[0]; + tmp3 = hostContacts[1]; + + + printf("...\n"); + + } else + { + printf("kernel failed to compile\n"); + } + + + + } + } + + clReleaseContext(context); + } + + b3Printf("\npress \n"); + getchar(); + return 0; +} diff --git a/test/OpenCL/KernelLaunch/premake4.lua b/test/OpenCL/KernelLaunch/premake4.lua new file mode 100644 index 000000000..dda79ace5 --- /dev/null +++ b/test/OpenCL/KernelLaunch/premake4.lua @@ -0,0 +1,33 @@ +function createProject(vendor) + + hasCL = findOpenCL(vendor) + + if (hasCL) then + + project ("Test_OpenCL_kernel_launch_" .. vendor) + + initOpenCL(vendor) + + language "C++" + + + kind "ConsoleApp" + targetdir "../../../bin" + + includedirs {"../../../src"} + + files { + "main.cpp", + "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp", + "../../../src/Bullet3Common/b3AlignedAllocator.cpp", + "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h", + "../../../src/Bullet3Common/b3Logging.cpp", + } + + end +end +createProject("clew") +createProject("Apple") +createProject("AMD") +createProject("Intel") +createProject("NVIDIA")