change lcpp Lua preprocessor, to keep #defines and comments, remove empty lines
remove duplicate data in b3Contact4 (now in btContact4Data shared between CPU/C++ and OpenCL) OpenCL kernels use #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" Increase number of batches back to 250 (from 50), need to fix this hard coded number (see https://github.com/erwincoumans/bullet3/issues/12) Work towards GJK/EPA, in addition to SAT/clipping (early on)
This commit is contained in:
@@ -2,10 +2,7 @@
|
||||
static const char* createShadowMapInstancingFragmentShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout(location = 0) out float fragmentdepth;\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" fragmentdepth = gl_FragCoord.z;\n"
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
static const char* createShadowMapInstancingVertexShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout (location = 0) in vec4 position;\n"
|
||||
"layout (location = 1) in vec4 instance_position;\n"
|
||||
"layout (location = 2) in vec4 instance_quaternion;\n"
|
||||
@@ -11,11 +9,7 @@ static const char* createShadowMapInstancingVertexShader= \
|
||||
"layout (location = 4) in vec3 vertexnormal;\n"
|
||||
"layout (location = 5) in vec4 instance_color;\n"
|
||||
"layout (location = 6) in vec3 instance_scale;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uniform mat4 depthMVP;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n"
|
||||
"{\n"
|
||||
" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n"
|
||||
@@ -23,7 +17,6 @@ static const char* createShadowMapInstancingVertexShader= \
|
||||
" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n"
|
||||
" return vec4 ( im, re );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n"
|
||||
"{\n"
|
||||
" float cah = cos(angle*0.5);\n"
|
||||
@@ -45,8 +38,6 @@ static const char* createShadowMapInstancingVertexShader= \
|
||||
" vec4 temp = quatMul ( q, p );\n"
|
||||
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 q = instance_quaternion;\n"
|
||||
@@ -54,5 +45,4 @@ static const char* createShadowMapInstancingVertexShader= \
|
||||
" vec4 vertexPos = depthMVP * vec4( (instance_position+localcoord).xyz,1);\n"
|
||||
" gl_Position = vertexPos;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -2,26 +2,21 @@
|
||||
static const char* instancingFragmentShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"in Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"in Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"uniform sampler2D Diffuse;\n"
|
||||
"in vec3 lightDir,normal,ambient;\n"
|
||||
"out vec4 color;\n"
|
||||
"\n"
|
||||
"void main_textured(void)\n"
|
||||
"{\n"
|
||||
" color = vec4(0.1,0.2,0.3,0.3);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 texel = fragment.color*texture(Diffuse,vert.texcoord);//fragment.color;\n"
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
static const char* instancingVertexShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout (location = 0) in vec4 position;\n"
|
||||
"layout (location = 1) in vec4 instance_position;\n"
|
||||
"layout (location = 2) in vec4 instance_quaternion;\n"
|
||||
@@ -11,22 +9,16 @@ static const char* instancingVertexShader= \
|
||||
"layout (location = 4) in vec3 vertexnormal;\n"
|
||||
"layout (location = 5) in vec4 instance_color;\n"
|
||||
"layout (location = 6) in vec3 instance_scale;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"\n"
|
||||
"out Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"out Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n"
|
||||
"{\n"
|
||||
" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n"
|
||||
@@ -34,7 +26,6 @@ static const char* instancingVertexShader= \
|
||||
" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n"
|
||||
" return vec4 ( im, re );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n"
|
||||
"{\n"
|
||||
" float cah = cos(angle*0.5);\n"
|
||||
@@ -56,9 +47,7 @@ static const char* instancingVertexShader= \
|
||||
" vec4 temp = quatMul ( q, p );\n"
|
||||
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"out vec3 lightDir,normal,ambient;\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 q = instance_quaternion;\n"
|
||||
@@ -68,18 +57,15 @@ static const char* instancingVertexShader= \
|
||||
" vec4 local_normal = (quatRotate3( vertexnormal,q));\n"
|
||||
" vec3 light_pos = vec3(-0.3,0.1,0.1);\n"
|
||||
" normal = local_normal.xyz;//normalize(ModelViewMatrix * local_normal).xyz;\n"
|
||||
"\n"
|
||||
" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n"
|
||||
"// lightDir = normalize(vec3(gl_LightSource[0].position));\n"
|
||||
" \n"
|
||||
" vec4 axis = vec4(1,1,1,0);\n"
|
||||
" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n"
|
||||
" vec4 vertexPos = ProjectionMatrix * ModelViewMatrix *(instance_position+localcoord);\n"
|
||||
"\n"
|
||||
" gl_Position = vertexPos;\n"
|
||||
" \n"
|
||||
" fragment.color = instance_color;\n"
|
||||
" vert.texcoord = uvcoords;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -2,22 +2,16 @@
|
||||
static const char* pointSpriteFragmentShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"in Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"in vec3 ambient;\n"
|
||||
"\n"
|
||||
"out vec4 color;\n"
|
||||
"\n"
|
||||
"void main_textured(void)\n"
|
||||
"{\n"
|
||||
" color = fragment.color;//texture2D(Diffuse,vert.texcoord);//fragment.color;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec3 N;\n"
|
||||
|
||||
@@ -2,34 +2,23 @@
|
||||
static const char* pointSpriteVertexShader= \
|
||||
"#version 330\n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout (location = 0) in vec4 position;\n"
|
||||
"layout (location = 1) in vec4 instance_position;\n"
|
||||
"layout (location = 3) in vec2 uvcoords;\n"
|
||||
"layout (location = 4) in vec3 vertexnormal;\n"
|
||||
"layout (location = 5) in vec4 instance_color;\n"
|
||||
"layout (location = 6) in vec3 instance_scale;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uniform float screenWidth = 700.f;\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"\n"
|
||||
"out Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//\n"
|
||||
"// vector rotation via quaternion\n"
|
||||
"//\n"
|
||||
"\n"
|
||||
"out vec3 ambient;\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" ambient = vec3(0.3,.3,0.3);\n"
|
||||
@@ -41,7 +30,6 @@ static const char* pointSpriteVertexShader= \
|
||||
" float dist = length(posEye);\n"
|
||||
" float pointRadius = 1.f;\n"
|
||||
" gl_PointSize = instance_scale.x * pointRadius * (screenWidth / dist);\n"
|
||||
"\n"
|
||||
" gl_Position = vertexPos;\n"
|
||||
" \n"
|
||||
" fragment.color = instance_color;\n"
|
||||
|
||||
@@ -2,27 +2,19 @@
|
||||
static const char* useShadowMapInstancingFragmentShader= \
|
||||
"#version 330 core\n"
|
||||
"//precision highp float;\n"
|
||||
"\n"
|
||||
"in Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"in Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"uniform sampler2D Diffuse;\n"
|
||||
"uniform sampler2DShadow shadowMap;\n"
|
||||
"\n"
|
||||
"in vec3 lightDir,normal,ambient;\n"
|
||||
"in vec4 ShadowCoord;\n"
|
||||
"\n"
|
||||
"out vec4 color;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 texel = fragment.color*texture(Diffuse,vert.texcoord);//fragment.color;\n"
|
||||
@@ -41,8 +33,6 @@ static const char* useShadowMapInstancingFragmentShader= \
|
||||
" \n"
|
||||
" float bias = 0.005*tan(acos(intensity));\n"
|
||||
" bias = clamp(bias, 0,0.01);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float visibility = texture(shadowMap, vec3(ShadowCoord.xy,(ShadowCoord.z-bias)/ShadowCoord.w));\n"
|
||||
" \n"
|
||||
" intensity*=2;\n"
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
static const char* useShadowMapInstancingVertexShader= \
|
||||
"#version 330 \n"
|
||||
"precision highp float;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"layout (location = 0) in vec4 position;\n"
|
||||
"layout (location = 1) in vec4 instance_position;\n"
|
||||
"layout (location = 2) in vec4 instance_quaternion;\n"
|
||||
@@ -11,26 +9,19 @@ static const char* useShadowMapInstancingVertexShader= \
|
||||
"layout (location = 4) in vec3 vertexnormal;\n"
|
||||
"layout (location = 5) in vec4 instance_color;\n"
|
||||
"layout (location = 6) in vec3 instance_scale;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"uniform mat4 DepthBiasModelViewProjectionMatrix;\n"
|
||||
"uniform mat4 MVP;\n"
|
||||
"\n"
|
||||
"out vec4 ShadowCoord;\n"
|
||||
"\n"
|
||||
"out Fragment\n"
|
||||
"{\n"
|
||||
" vec4 color;\n"
|
||||
"} fragment;\n"
|
||||
"\n"
|
||||
"out Vert\n"
|
||||
"{\n"
|
||||
" vec2 texcoord;\n"
|
||||
"} vert;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"vec4 quatMul ( in vec4 q1, in vec4 q2 )\n"
|
||||
"{\n"
|
||||
" vec3 im = q1.w * q2.xyz + q1.xyz * q2.w + cross ( q1.xyz, q2.xyz );\n"
|
||||
@@ -38,7 +29,6 @@ static const char* useShadowMapInstancingVertexShader= \
|
||||
" float re = dot ( dt, vec4 ( -1.0, -1.0, -1.0, 1.0 ) );\n"
|
||||
" return vec4 ( im, re );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 quatFromAxisAngle(vec4 axis, in float angle)\n"
|
||||
"{\n"
|
||||
" float cah = cos(angle*0.5);\n"
|
||||
@@ -60,9 +50,7 @@ static const char* useShadowMapInstancingVertexShader= \
|
||||
" vec4 temp = quatMul ( q, p );\n"
|
||||
" return quatMul ( temp, vec4 ( -q.x, -q.y, -q.z, q.w ) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"out vec3 lightDir,normal,ambient;\n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 q = instance_quaternion;\n"
|
||||
@@ -72,18 +60,14 @@ static const char* useShadowMapInstancingVertexShader= \
|
||||
" vec4 worldNormal = (quatRotate3( vertexnormal,q));\n"
|
||||
" vec3 light_pos = vec3(-5.f,100,-40);\n"
|
||||
" normal = normalize(worldNormal).xyz;\n"
|
||||
"\n"
|
||||
" lightDir = normalize(light_pos);//gl_LightSource[0].position.xyz));\n"
|
||||
" \n"
|
||||
" vec4 axis = vec4(1,1,1,0);\n"
|
||||
" vec4 localcoord = quatRotate3( position.xyz*instance_scale,q);\n"
|
||||
" vec4 vertexPos = MVP* vec4((instance_position+localcoord).xyz,1);\n"
|
||||
"\n"
|
||||
" gl_Position = vertexPos;\n"
|
||||
" ShadowCoord = DepthBiasModelViewProjectionMatrix * vec4((instance_position+localcoord).xyz,1);\n"
|
||||
"\n"
|
||||
" fragment.color = instance_color;\n"
|
||||
" vert.texcoord = uvcoords;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -98,7 +98,7 @@ lcpp.LCPP_LUA = false -- whether to use lcpp to preprocess Lua code (l
|
||||
lcpp.LCPP_FFI = true -- whether to use lcpp as LuaJIT ffi PreProcessor (if used in luaJIT)
|
||||
lcpp.LCPP_TEST = false -- whether to run lcpp unit tests when loading lcpp module
|
||||
lcpp.ENV = {} -- static predefines (env-like)
|
||||
lcpp.FAST = false -- perf. tweaks when enabled. con: breaks minor stuff like __LINE__ macros
|
||||
lcpp.FAST = true -- perf. tweaks when enabled. con: breaks minor stuff like __LINE__ macros
|
||||
lcpp.DEBUG = false
|
||||
|
||||
-- PREDEFINES
|
||||
@@ -438,7 +438,7 @@ local function processLine(state, line)
|
||||
|
||||
|
||||
--[[ APPLY MACROS ]]--
|
||||
line = state:apply(line);
|
||||
--line = state:apply(line);
|
||||
|
||||
return line
|
||||
end
|
||||
@@ -470,7 +470,7 @@ local function processLine2(state, line)
|
||||
if elseif_ then state:elseBlock(state:parseExpr(elseif_)) end
|
||||
if else_ then state:elseBlock(true) end
|
||||
if endif then state:closeBlock() end
|
||||
return -- remove structural directives
|
||||
return line
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -520,7 +520,7 @@ local function processLine2(state, line)
|
||||
state:define(macroname, replacement)
|
||||
end
|
||||
|
||||
return
|
||||
return line
|
||||
end
|
||||
|
||||
-- ignore, because we dont have any pragma directives yet
|
||||
@@ -537,7 +537,7 @@ local function processLine2(state, line)
|
||||
|
||||
|
||||
--[[ APPLY MACROS ]]--
|
||||
line = state:apply(line);
|
||||
--line = state:apply(line);
|
||||
|
||||
return line
|
||||
end
|
||||
@@ -551,8 +551,11 @@ local function doWork(state)
|
||||
local input = state:getLine()
|
||||
if not input then break end
|
||||
local output = processLine(state, input)
|
||||
if not lcpp.FAST and not output then output = "" end -- output empty skipped lines
|
||||
if not lcpp.FAST and not output then
|
||||
output = "" end -- output empty skipped lines
|
||||
|
||||
if lcpp.DEBUG then output = output.." -- "..input end -- input as comment when DEBUG
|
||||
|
||||
if output then coroutine.yield(output) end
|
||||
end
|
||||
if (oldIndent ~= state:getIndent()) then error("indentation level must be balanced within a file. was:"..oldIndent.." is:"..state:getIndent()) end
|
||||
|
||||
@@ -116,7 +116,8 @@ if not _OPTIONS["ios"] then
|
||||
include "../Demos3/GpuGuiInitialize"
|
||||
|
||||
include "../test/OpenCL/BasicInitialize"
|
||||
-- include "../test/OpenCL/BroadphaseCollision"
|
||||
include "../test/OpenCL/KernelLaunch"--
|
||||
include "../test/OpenCL/BroadphaseCollision"
|
||||
-- include "../test/OpenCL/NarrowphaseCollision"
|
||||
include "../test/OpenCL/ParallelPrimitives"
|
||||
include "../test/OpenCL/RadixSortBenchmark"
|
||||
@@ -149,6 +150,7 @@ if not _OPTIONS["ios"] then
|
||||
end
|
||||
|
||||
|
||||
|
||||
if _OPTIONS["bullet2gpu"] then
|
||||
include "../src/LinearMath"
|
||||
include "../src/BulletCollision"
|
||||
|
||||
@@ -23,23 +23,6 @@ B3_ATTRIBUTE_ALIGNED16(struct) b3Contact4 : public b3Contact4Data
|
||||
{
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
b3Vector3 m_worldPos[4];
|
||||
b3Vector3 m_worldNormal;
|
||||
// float m_restituitionCoeff;
|
||||
// float m_frictionCoeff;
|
||||
unsigned short m_restituitionCoeffCmp;
|
||||
unsigned short m_frictionCoeffCmp;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
int getBodyA()const {return abs(m_bodyAPtrAndSignBit);}
|
||||
int getBodyB()const {return abs(m_bodyBPtrAndSignBit);}
|
||||
bool isBodyAFixed()const { return m_bodyAPtrAndSignBit<0;}
|
||||
|
||||
@@ -3,13 +3,16 @@
|
||||
|
||||
#include "Bullet3Common/shared/b3Float4.h"
|
||||
|
||||
typedef struct
|
||||
typedef struct b3Contact4Data b3Contact4Data_t;
|
||||
|
||||
struct b3Contact4Data
|
||||
{
|
||||
b3Float4 m_worldPos[4];
|
||||
// b3Float4 m_localPosB[4];
|
||||
b3Float4 m_worldNormal; // w: m_nPoints
|
||||
unsigned int m_coeffs;
|
||||
unsigned int m_batchIdx;
|
||||
|
||||
unsigned short m_restituitionCoeffCmp;
|
||||
unsigned short m_frictionCoeffCmp;
|
||||
int m_batchIdx;
|
||||
int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
@@ -18,6 +21,19 @@ typedef struct
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} b3Contact4Data;
|
||||
b3Float4 m_localPosA;
|
||||
};
|
||||
|
||||
inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)
|
||||
{
|
||||
return (int)contact->m_worldNormal.w;
|
||||
};
|
||||
|
||||
inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)
|
||||
{
|
||||
contact->m_worldNormal.w = (float)numPoints;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //B3_CONTACT4DATA_H
|
||||
@@ -2,22 +2,18 @@
|
||||
static const char* sapFastCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#define NEW_PAIR_MARKER -1\n"
|
||||
"#define REMOVED_PAIR_MARKER -2\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -33,7 +29,6 @@ static const char* sapFastCL= \
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -41,7 +36,6 @@ static const char* sapFastCL= \
|
||||
" unsigned int m_key;\n"
|
||||
" unsigned int x;\n"
|
||||
" };\n"
|
||||
"\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" unsigned int m_value;\n"
|
||||
@@ -49,8 +43,6 @@ static const char* sapFastCL= \
|
||||
" \n"
|
||||
" };\n"
|
||||
"}b3SortData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"/// conservative test for overlap between two aabbs\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
|
||||
@@ -65,7 +57,6 @@ static const char* sapFastCL= \
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void computePairsIncremental3dSapKernel( __global const uint2* objectMinMaxIndexGPUaxis0,\n"
|
||||
" __global const uint2* objectMinMaxIndexGPUaxis1,\n"
|
||||
" __global const uint2* objectMinMaxIndexGPUaxis2,\n"
|
||||
@@ -88,7 +79,6 @@ static const char* sapFastCL= \
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" __global const uint2* objectMinMaxIndexGPU[3][2];\n"
|
||||
" objectMinMaxIndexGPU[0][0]=objectMinMaxIndexGPUaxis0;\n"
|
||||
" objectMinMaxIndexGPU[1][0]=objectMinMaxIndexGPUaxis1;\n"
|
||||
@@ -96,7 +86,6 @@ static const char* sapFastCL= \
|
||||
" objectMinMaxIndexGPU[0][1]=objectMinMaxIndexGPUaxis0prev;\n"
|
||||
" objectMinMaxIndexGPU[1][1]=objectMinMaxIndexGPUaxis1prev;\n"
|
||||
" objectMinMaxIndexGPU[2][1]=objectMinMaxIndexGPUaxis2prev;\n"
|
||||
"\n"
|
||||
" __global const b3SortData* sortedAxisGPU[3][2];\n"
|
||||
" sortedAxisGPU[0][0] = sortedAxisGPU0;\n"
|
||||
" sortedAxisGPU[1][0] = sortedAxisGPU1;\n"
|
||||
@@ -104,20 +93,16 @@ static const char* sapFastCL= \
|
||||
" sortedAxisGPU[0][1] = sortedAxisGPU0prev;\n"
|
||||
" sortedAxisGPU[1][1] = sortedAxisGPU1prev;\n"
|
||||
" sortedAxisGPU[2][1] = sortedAxisGPU2prev;\n"
|
||||
"\n"
|
||||
" int m_currentBuffer = 0;\n"
|
||||
"\n"
|
||||
" for (int axis=0;axis<3;axis++)\n"
|
||||
" {\n"
|
||||
" //int i = checkObjects[a];\n"
|
||||
"\n"
|
||||
" unsigned int curMinIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].x;\n"
|
||||
" unsigned int curMaxIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].y;\n"
|
||||
" unsigned int prevMinIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].x;\n"
|
||||
" int dmin = curMinIndex - prevMinIndex;\n"
|
||||
" \n"
|
||||
" unsigned int prevMaxIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].y;\n"
|
||||
"\n"
|
||||
" int dmax = curMaxIndex - prevMaxIndex;\n"
|
||||
" \n"
|
||||
" for (int otherbuffer = 0;otherbuffer<2;otherbuffer++)\n"
|
||||
@@ -132,23 +117,18 @@ static const char* sapFastCL= \
|
||||
" if (otherIndex!=i)\n"
|
||||
" {\n"
|
||||
" bool otherIsMax = ((otherIndex2&1)!=0);\n"
|
||||
"\n"
|
||||
" if (otherIsMax)\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
" bool overlap = true;\n"
|
||||
"\n"
|
||||
" for (int ax=0;ax<3;ax++)\n"
|
||||
" {\n"
|
||||
" if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||\n"
|
||||
" (objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x))\n"
|
||||
" overlap=false;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // b3Assert(overlap2==overlap);\n"
|
||||
"\n"
|
||||
" bool prevOverlap = true;\n"
|
||||
"\n"
|
||||
" for (int ax=0;ax<3;ax++)\n"
|
||||
" {\n"
|
||||
" if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||\n"
|
||||
@@ -156,11 +136,8 @@ static const char* sapFastCL= \
|
||||
" prevOverlap=false;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" //b3Assert(overlap==overlap2);\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (dmin<0)\n"
|
||||
" {\n"
|
||||
" if (overlap && !prevOverlap)\n"
|
||||
@@ -185,10 +162,8 @@ static const char* sapFastCL= \
|
||||
" addedHostPairsGPU[curPair].y = newPair.y;\n"
|
||||
" addedHostPairsGPU[curPair].z = NEW_PAIR_MARKER;\n"
|
||||
" addedHostPairsGPU[curPair].w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" } \n"
|
||||
" else\n"
|
||||
@@ -216,7 +191,6 @@ static const char* sapFastCL= \
|
||||
" removedHostPairsGPU[curPair].y = removedPair.y;\n"
|
||||
" removedHostPairsGPU[curPair].z = REMOVED_PAIR_MARKER;\n"
|
||||
" removedHostPairsGPU[curPair].w = REMOVED_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
@@ -240,7 +214,6 @@ static const char* sapFastCL= \
|
||||
" {\n"
|
||||
" \n"
|
||||
" bool overlap = true;\n"
|
||||
"\n"
|
||||
" for (int ax=0;ax<3;ax++)\n"
|
||||
" {\n"
|
||||
" if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||\n"
|
||||
@@ -248,9 +221,7 @@ static const char* sapFastCL= \
|
||||
" overlap=false;\n"
|
||||
" }\n"
|
||||
" //b3Assert(overlap2==overlap);\n"
|
||||
"\n"
|
||||
" bool prevOverlap = true;\n"
|
||||
"\n"
|
||||
" for (int ax=0;ax<3;ax++)\n"
|
||||
" {\n"
|
||||
" if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||\n"
|
||||
@@ -258,7 +229,6 @@ static const char* sapFastCL= \
|
||||
" prevOverlap=false;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if (dmax>0)\n"
|
||||
" {\n"
|
||||
" if (overlap && !prevOverlap)\n"
|
||||
@@ -283,7 +253,6 @@ static const char* sapFastCL= \
|
||||
" addedHostPairsGPU[curPair].y = newPair.y;\n"
|
||||
" addedHostPairsGPU[curPair].z = NEW_PAIR_MARKER;\n"
|
||||
" addedHostPairsGPU[curPair].w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
@@ -326,16 +295,12 @@ static const char* sapFastCL= \
|
||||
" }\n"
|
||||
" }//for (int otherbuffer\n"
|
||||
" }//for (int axis=0;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"//computePairsKernelBatchWrite\n"
|
||||
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
"\n"
|
||||
" __local int numActiveWgItems[1];\n"
|
||||
" __local int breakRequest[1];\n"
|
||||
" __local btAabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
@@ -411,7 +376,6 @@ static const char* sapFastCL= \
|
||||
" tmpPair.y = myPairs[p].y;\n"
|
||||
" tmpPair.z = NEW_PAIR_MARKER;\n"
|
||||
" tmpPair.w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" pairsOut[curPair+p] = tmpPair; //flush to main memory\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
|
||||
@@ -2,21 +2,17 @@
|
||||
static const char* sapCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#define NEW_PAIR_MARKER -1\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -32,8 +28,6 @@ static const char* sapCL= \
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"/// conservative test for overlap between two aabbs\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
|
||||
@@ -53,7 +47,6 @@ static const char* sapCL= \
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
|
||||
"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
@@ -63,18 +56,14 @@ static const char* sapCL= \
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numUnsortedAabbs)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" int j = get_global_id(1);\n"
|
||||
" if (j>=numSortedAabbs)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))\n"
|
||||
" {\n"
|
||||
" int4 myPair;\n"
|
||||
@@ -92,8 +81,6 @@ static const char* sapCL= \
|
||||
" myPair.y = yIndex;\n"
|
||||
" myPair.z = NEW_PAIR_MARKER;\n"
|
||||
" myPair.w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
@@ -101,7 +88,6 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
@@ -120,7 +106,6 @@ static const char* sapCL= \
|
||||
" myPair.y = aabbs[j].m_minIndices[3];\n"
|
||||
" myPair.z = NEW_PAIR_MARKER;\n"
|
||||
" myPair.w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
@@ -129,18 +114,12 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
"\n"
|
||||
" __local int numActiveWgItems[1];\n"
|
||||
" __local int breakRequest[1];\n"
|
||||
"\n"
|
||||
" if (localId==0)\n"
|
||||
" {\n"
|
||||
" numActiveWgItems[0] = 0;\n"
|
||||
@@ -150,7 +129,6 @@ static const char* sapCL= \
|
||||
" atomic_inc(numActiveWgItems);\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" int localBreak = 0;\n"
|
||||
"\n"
|
||||
" int j=i+1;\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
@@ -186,7 +164,6 @@ static const char* sapCL= \
|
||||
" myPair.y = aabbs[j].m_minIndices[3];\n"
|
||||
" myPair.z = NEW_PAIR_MARKER;\n"
|
||||
" myPair.w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
@@ -195,16 +172,12 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" j++;\n"
|
||||
"\n"
|
||||
" } while (breakRequest[0]<numActiveWgItems[0]);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
"\n"
|
||||
" __local int numActiveWgItems[1];\n"
|
||||
" __local int breakRequest[1];\n"
|
||||
" __local btAabbCL localAabbs[128];// = aabbs[i];\n"
|
||||
@@ -264,7 +237,6 @@ static const char* sapCL= \
|
||||
" myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n"
|
||||
" myPair.z = NEW_PAIR_MARKER;\n"
|
||||
" myPair.w = NEW_PAIR_MARKER;\n"
|
||||
"\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
@@ -274,7 +246,6 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" localCount++;\n"
|
||||
" if (localCount==64)\n"
|
||||
" {\n"
|
||||
@@ -288,10 +259,6 @@ static const char* sapCL= \
|
||||
" } while (breakRequest[0]<numActiveWgItems[0]);\n"
|
||||
" \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//http://stereopsis.com/radix.html\n"
|
||||
"unsigned int FloatFlip(float fl);\n"
|
||||
"unsigned int FloatFlip(float fl)\n"
|
||||
@@ -307,10 +274,6 @@ static const char* sapCL= \
|
||||
" unsigned int fl = f ^ mask;\n"
|
||||
" return *(float*)&fl;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
@@ -320,8 +283,6 @@ static const char* sapCL= \
|
||||
" destAabbs[i] = allAabbs[src];\n"
|
||||
" destAabbs[i].m_maxIndices[3] = src;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void flipFloatKernel( __global const btAabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
@@ -332,19 +293,13 @@ static const char* sapCL= \
|
||||
" sortData[i].y = i;\n"
|
||||
" \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void scatterKernel( __global const btAabbCL* aabbs, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" sortedAabbs[i] = aabbs[sortData[i].y];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void prepareSumVarianceKernel( __global const btAabbCL* aabbs, __global float4* sum, __global float4* sum2,int numAabbs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
#if 0
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
@@ -18,77 +19,22 @@ subject to the following restrictions:
|
||||
#include "b3ContactCache.h"
|
||||
#include "Bullet3Common/b3Transform.h"
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
b3Scalar gContactBreakingThreshold = b3Scalar(0.02);
|
||||
b3Scalar m_contactBreakingThreshold;
|
||||
b3Scalar m_contactProcessingThreshold;
|
||||
|
||||
///gContactCalcArea3Points will approximate the convex hull area using 3 points
|
||||
///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower
|
||||
bool gContactCalcArea3Points = true;
|
||||
|
||||
|
||||
b3ContactCache::b3ContactCache()
|
||||
:m_index1a(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
#include <stdio.h>
|
||||
void b3ContactCache::DebugPersistency()
|
||||
{
|
||||
int i;
|
||||
printf("DebugPersistency : numPoints %d\n",m_cachedPoints);
|
||||
for (i=0;i<m_cachedPoints;i++)
|
||||
{
|
||||
printf("m_pointCache[%d].m_userPersistentData = %x\n",i,m_pointCache[i].m_userPersistentData);
|
||||
}
|
||||
}
|
||||
#endif //DEBUG_PERSISTENCY
|
||||
|
||||
void b3ContactCache::clearUserCache(btManifoldPoint& pt)
|
||||
{
|
||||
|
||||
void* oldPtr = pt.m_userPersistentData;
|
||||
if (oldPtr)
|
||||
{
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
int i;
|
||||
int occurance = 0;
|
||||
for (i=0;i<m_cachedPoints;i++)
|
||||
{
|
||||
if (m_pointCache[i].m_userPersistentData == oldPtr)
|
||||
{
|
||||
occurance++;
|
||||
if (occurance>1)
|
||||
printf("error in clearUserCache\n");
|
||||
}
|
||||
}
|
||||
btAssert(occurance<=0);
|
||||
#endif //DEBUG_PERSISTENCY
|
||||
|
||||
if (pt.m_userPersistentData && gContactDestroyedCallback)
|
||||
{
|
||||
(*gContactDestroyedCallback)(pt.m_userPersistentData);
|
||||
pt.m_userPersistentData = 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
DebugPersistency();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
static inline b3Scalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,const btVector3 &p2,const btVector3 &p3)
|
||||
static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3)
|
||||
{
|
||||
// It calculates possible 3 area constructed from random 4 points and returns the biggest one.
|
||||
|
||||
btVector3 a[3],b[3];
|
||||
b3Vector3 a[3],b[3];
|
||||
a[0] = p0 - p1;
|
||||
a[1] = p0 - p2;
|
||||
a[2] = p0 - p3;
|
||||
@@ -97,14 +43,16 @@ static inline b3Scalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,c
|
||||
b[2] = p1 - p2;
|
||||
|
||||
//todo: Following 3 cross production can be easily optimized by SIMD.
|
||||
btVector3 tmp0 = a[0].cross(b[0]);
|
||||
btVector3 tmp1 = a[1].cross(b[1]);
|
||||
btVector3 tmp2 = a[2].cross(b[2]);
|
||||
b3Vector3 tmp0 = a[0].cross(b[0]);
|
||||
b3Vector3 tmp1 = a[1].cross(b[1]);
|
||||
b3Vector3 tmp2 = a[2].cross(b[2]);
|
||||
|
||||
return btMax(btMax(tmp0.length2(),tmp1.length2()),tmp2.length2());
|
||||
return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2());
|
||||
}
|
||||
#if 0
|
||||
|
||||
int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt)
|
||||
//using localPointA for all points
|
||||
int b3ContactCache::sortCachedPoints(const b3Vector3& pt)
|
||||
{
|
||||
//calculate 4 possible cases areas, and take biggest area
|
||||
//also need to keep 'deepest'
|
||||
@@ -129,32 +77,32 @@ int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt)
|
||||
{
|
||||
if (maxPenetrationIndex != 0)
|
||||
{
|
||||
btVector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA;
|
||||
btVector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
|
||||
btVector3 cross = a0.cross(b0);
|
||||
b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA;
|
||||
b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
|
||||
b3Vector3 cross = a0.cross(b0);
|
||||
res0 = cross.length2();
|
||||
}
|
||||
if (maxPenetrationIndex != 1)
|
||||
{
|
||||
btVector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
btVector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
|
||||
btVector3 cross = a1.cross(b1);
|
||||
b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
|
||||
b3Vector3 cross = a1.cross(b1);
|
||||
res1 = cross.length2();
|
||||
}
|
||||
|
||||
if (maxPenetrationIndex != 2)
|
||||
{
|
||||
btVector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
btVector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA;
|
||||
btVector3 cross = a2.cross(b2);
|
||||
b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA;
|
||||
b3Vector3 cross = a2.cross(b2);
|
||||
res2 = cross.length2();
|
||||
}
|
||||
|
||||
if (maxPenetrationIndex != 3)
|
||||
{
|
||||
btVector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
btVector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA;
|
||||
btVector3 cross = a3.cross(b3);
|
||||
b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA;
|
||||
b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA;
|
||||
b3Vector3 cross = a3.cross(b3);
|
||||
res3 = cross.length2();
|
||||
}
|
||||
}
|
||||
@@ -176,23 +124,23 @@ int b3ContactCache::sortCachedPoints(const btManifoldPoint& pt)
|
||||
res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA);
|
||||
}
|
||||
}
|
||||
btVector4 maxvec(res0,res1,res2,res3);
|
||||
b3Vector4 maxvec(res0,res1,res2,res3);
|
||||
int biggestarea = maxvec.closestAxis4();
|
||||
return biggestarea;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int b3ContactCache::getCacheEntry(const btManifoldPoint& newPoint) const
|
||||
int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const
|
||||
{
|
||||
b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold();
|
||||
int size = getNumContacts();
|
||||
int nearestPoint = -1;
|
||||
for( int i = 0; i < size; i++ )
|
||||
{
|
||||
const btManifoldPoint &mp = m_pointCache[i];
|
||||
const b3Vector3 &mp = m_pointCache[i];
|
||||
|
||||
btVector3 diffA = mp.m_localPointA- newPoint.m_localPointA;
|
||||
b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA;
|
||||
const b3Scalar distToManiPoint = diffA.dot(diffA);
|
||||
if( distToManiPoint < shortestDist )
|
||||
{
|
||||
@@ -203,12 +151,9 @@ int b3ContactCache::getCacheEntry(const btManifoldPoint& newPoint) const
|
||||
return nearestPoint;
|
||||
}
|
||||
|
||||
int b3ContactCache::addManifoldPoint(const btManifoldPoint& newPoint, bool isPredictive)
|
||||
int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint)
|
||||
{
|
||||
if (!isPredictive)
|
||||
{
|
||||
btAssert(validContactDistance(newPoint));
|
||||
}
|
||||
b3Assert(validContactDistance(newPoint));
|
||||
|
||||
int insertIndex = getNumContacts();
|
||||
if (insertIndex == MANIFOLD_CACHE_SIZE)
|
||||
@@ -230,74 +175,81 @@ int b3ContactCache::addManifoldPoint(const btManifoldPoint& newPoint, bool isPre
|
||||
if (insertIndex<0)
|
||||
insertIndex=0;
|
||||
|
||||
btAssert(m_pointCache[insertIndex].m_userPersistentData==0);
|
||||
//b3Assert(m_pointCache[insertIndex].m_userPersistentData==0);
|
||||
m_pointCache[insertIndex] = newPoint;
|
||||
return insertIndex;
|
||||
}
|
||||
|
||||
b3Scalar b3ContactCache::getContactBreakingThreshold() const
|
||||
#endif
|
||||
|
||||
bool b3ContactCache::validContactDistance(const b3Vector3& pt)
|
||||
{
|
||||
return m_contactBreakingThreshold;
|
||||
return pt.w <= gContactBreakingThreshold;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void b3ContactCache::refreshContactPoints(const btTransform& trA,const btTransform& trB)
|
||||
void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i)
|
||||
{
|
||||
int numContacts = b3Contact4Data_getNumPoints(&newContactCache);
|
||||
if (i!=(numContacts-1))
|
||||
{
|
||||
b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]);
|
||||
b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]);
|
||||
b3Swap(newContactCache.m_worldPos[i],newContactCache.m_worldPos[numContacts-1]);
|
||||
}
|
||||
b3Contact4Data_setNumPoints(&newContactCache,numContacts-1);
|
||||
|
||||
}
|
||||
|
||||
void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts)
|
||||
{
|
||||
|
||||
int numContacts = b3Contact4Data_getNumPoints(&contacts);
|
||||
|
||||
|
||||
int i;
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
printf("refreshContactPoints posA = (%f,%f,%f) posB = (%f,%f,%f)\n",
|
||||
trA.getOrigin().getX(),
|
||||
trA.getOrigin().getY(),
|
||||
trA.getOrigin().getZ(),
|
||||
trB.getOrigin().getX(),
|
||||
trB.getOrigin().getY(),
|
||||
trB.getOrigin().getZ());
|
||||
#endif //DEBUG_PERSISTENCY
|
||||
/// first refresh worldspace positions and distance
|
||||
for (i=getNumContacts()-1;i>=0;i--)
|
||||
for (i=numContacts-1;i>=0;i--)
|
||||
{
|
||||
btManifoldPoint &manifoldPoint = m_pointCache[i];
|
||||
manifoldPoint.m_positionWorldOnA = trA( manifoldPoint.m_localPointA );
|
||||
manifoldPoint.m_positionWorldOnB = trB( manifoldPoint.m_localPointB );
|
||||
manifoldPoint.m_distance1 = (manifoldPoint.m_positionWorldOnA - manifoldPoint.m_positionWorldOnB).dot(manifoldPoint.m_normalWorldOnB);
|
||||
manifoldPoint.m_lifeTime++;
|
||||
b3Vector3 worldPosA = trA( contacts.m_localPosA[i]);
|
||||
b3Vector3 worldPosB = trB( contacts.m_localPosB[i]);
|
||||
contacts.m_worldPos[i] = worldPosB;
|
||||
float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormal);
|
||||
contacts.m_worldPos[i].w = distance;
|
||||
}
|
||||
|
||||
/// then
|
||||
b3Scalar distance2d;
|
||||
btVector3 projectedDifference,projectedPoint;
|
||||
for (i=getNumContacts()-1;i>=0;i--)
|
||||
b3Vector3 projectedDifference,projectedPoint;
|
||||
for (i=numContacts-1;i>=0;i--)
|
||||
{
|
||||
|
||||
btManifoldPoint &manifoldPoint = m_pointCache[i];
|
||||
b3Vector3 worldPosA = trA( contacts.m_localPosA[i]);
|
||||
b3Vector3 worldPosB = trB( contacts.m_localPosB[i]);
|
||||
b3Vector3&pt = contacts.m_worldPos[i];
|
||||
//contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction)
|
||||
if (!validContactDistance(manifoldPoint))
|
||||
if (!validContactDistance(pt))
|
||||
{
|
||||
removeContactPoint(i);
|
||||
removeContactPoint(contacts,i);
|
||||
} else
|
||||
{
|
||||
//contact also becomes invalid when relative movement orthogonal to normal exceeds margin
|
||||
projectedPoint = manifoldPoint.m_positionWorldOnA - manifoldPoint.m_normalWorldOnB * manifoldPoint.m_distance1;
|
||||
projectedDifference = manifoldPoint.m_positionWorldOnB - projectedPoint;
|
||||
projectedPoint = contacts.m_worldPos[i] - contacts.m_worldNormal * contacts.m_worldPos[i].w;
|
||||
projectedDifference = contacts.m_worldPos[i] - projectedPoint;
|
||||
distance2d = projectedDifference.dot(projectedDifference);
|
||||
if (distance2d > getContactBreakingThreshold()*getContactBreakingThreshold() )
|
||||
if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold )
|
||||
{
|
||||
removeContactPoint(i);
|
||||
removeContactPoint(contacts,i);
|
||||
} else
|
||||
{
|
||||
//contact point processed callback
|
||||
if (gContactProcessedCallback)
|
||||
(*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1);
|
||||
////contact point processed callback
|
||||
//if (gContactProcessedCallback)
|
||||
// (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
DebugPersistency();
|
||||
#endif //
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
@@ -18,23 +18,15 @@ subject to the following restrictions:
|
||||
#define B3_CONTACT_CACHE_H
|
||||
|
||||
|
||||
#include "LinearMath/btVector3.h"
|
||||
#include "LinearMath/btTransform.h"
|
||||
#include "btManifoldPoint.h"
|
||||
class btCollisionObject;
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "Bullet3Common/b3Transform.h"
|
||||
#include "Bullet3Common/b3AlignedAllocator.h"
|
||||
|
||||
struct btCollisionResult;
|
||||
|
||||
///maximum contact breaking and merging threshold
|
||||
extern b3Scalar gContactBreakingThreshold;
|
||||
|
||||
//the enum starts at 1024 to avoid type conflicts with btTypedConstraint
|
||||
enum btContactManifoldTypes
|
||||
{
|
||||
MIN_CONTACT_MANIFOLD_TYPE = 1024,
|
||||
BT_PERSISTENT_MANIFOLD_TYPE
|
||||
};
|
||||
|
||||
|
||||
#define MANIFOLD_CACHE_SIZE 4
|
||||
|
||||
@@ -45,8 +37,6 @@ enum btContactManifoldTypes
|
||||
///reduces the cache to 4 points, when more then 4 points are added, using following rules:
|
||||
///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points
|
||||
///note that some pairs of objects might have more then one contact manifold.
|
||||
|
||||
|
||||
B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache
|
||||
{
|
||||
|
||||
@@ -54,163 +44,36 @@ B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache
|
||||
|
||||
|
||||
/// sort cached points so most isolated points come first
|
||||
int sortCachedPoints(const btManifoldPoint& pt);
|
||||
int sortCachedPoints(const b3Vector3& pt);
|
||||
|
||||
|
||||
int findContactPoint(const btManifoldPoint* unUsed, int numUnused,const btManifoldPoint& pt);
|
||||
|
||||
public:
|
||||
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
|
||||
int m_index1a;
|
||||
|
||||
b3ContactCache();
|
||||
int addManifoldPoint( const b3Vector3& newPoint);
|
||||
|
||||
b3ContactCache(const btCollisionObject* body0,const btCollisionObject* body1,int , b3Scalar contactBreakingThreshold,b3Scalar contactProcessingThreshold)
|
||||
: btTypedObject(BT_PERSISTENT_MANIFOLD_TYPE),
|
||||
m_body0(body0),m_body1(body1),m_cachedPoints(0),
|
||||
m_contactBreakingThreshold(contactBreakingThreshold),
|
||||
m_contactProcessingThreshold(contactProcessingThreshold)
|
||||
/*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex)
|
||||
{
|
||||
}
|
||||
|
||||
B3_FORCE_INLINE const btCollisionObject* getBody0() const { return m_body0;}
|
||||
B3_FORCE_INLINE const btCollisionObject* getBody1() const { return m_body1;}
|
||||
|
||||
void setBodies(const btCollisionObject* body0,const btCollisionObject* body1)
|
||||
{
|
||||
m_body0 = body0;
|
||||
m_body1 = body1;
|
||||
}
|
||||
|
||||
void clearUserCache(btManifoldPoint& pt);
|
||||
|
||||
#ifdef DEBUG_PERSISTENCY
|
||||
void DebugPersistency();
|
||||
#endif //
|
||||
|
||||
B3_FORCE_INLINE int getNumContacts() const { return m_cachedPoints;}
|
||||
/// the setNumContacts API is usually not used, except when you gather/fill all contacts manually
|
||||
void setNumContacts(int cachedPoints)
|
||||
{
|
||||
m_cachedPoints = cachedPoints;
|
||||
}
|
||||
|
||||
|
||||
B3_FORCE_INLINE const btManifoldPoint& getContactPoint(int index) const
|
||||
{
|
||||
btAssert(index < m_cachedPoints);
|
||||
return m_pointCache[index];
|
||||
}
|
||||
|
||||
B3_FORCE_INLINE btManifoldPoint& getContactPoint(int index)
|
||||
{
|
||||
btAssert(index < m_cachedPoints);
|
||||
return m_pointCache[index];
|
||||
}
|
||||
|
||||
|
||||
void setContactBreakingThreshold(b3Scalar contactBreakingThreshold)
|
||||
{
|
||||
m_contactBreakingThreshold = contactBreakingThreshold;
|
||||
}
|
||||
|
||||
void setContactProcessingThreshold(b3Scalar contactProcessingThreshold)
|
||||
{
|
||||
m_contactProcessingThreshold = contactProcessingThreshold;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int getCacheEntry(const btManifoldPoint& newPoint) const;
|
||||
|
||||
int addManifoldPoint( const btManifoldPoint& newPoint, bool isPredictive=false);
|
||||
|
||||
void removeContactPoint (int index)
|
||||
{
|
||||
clearUserCache(m_pointCache[index]);
|
||||
|
||||
int lastUsedIndex = getNumContacts() - 1;
|
||||
// m_pointCache[index] = m_pointCache[lastUsedIndex];
|
||||
if(index != lastUsedIndex)
|
||||
{
|
||||
m_pointCache[index] = m_pointCache[lastUsedIndex];
|
||||
//get rid of duplicated userPersistentData pointer
|
||||
m_pointCache[lastUsedIndex].m_userPersistentData = 0;
|
||||
m_pointCache[lastUsedIndex].m_appliedImpulse = 0.f;
|
||||
m_pointCache[lastUsedIndex].m_lateralFrictionInitialized = false;
|
||||
m_pointCache[lastUsedIndex].m_appliedImpulseLateral1 = 0.f;
|
||||
m_pointCache[lastUsedIndex].m_appliedImpulseLateral2 = 0.f;
|
||||
m_pointCache[lastUsedIndex].m_lifeTime = 0;
|
||||
}
|
||||
|
||||
btAssert(m_pointCache[lastUsedIndex].m_userPersistentData==0);
|
||||
m_cachedPoints--;
|
||||
}
|
||||
void replaceContactPoint(const btManifoldPoint& newPoint,int insertIndex)
|
||||
{
|
||||
btAssert(validContactDistance(newPoint));
|
||||
|
||||
#define MAINTAIN_PERSISTENCY 1
|
||||
#ifdef MAINTAIN_PERSISTENCY
|
||||
int lifeTime = m_pointCache[insertIndex].getLifeTime();
|
||||
b3Scalar appliedImpulse = m_pointCache[insertIndex].m_appliedImpulse;
|
||||
b3Scalar appliedLateralImpulse1 = m_pointCache[insertIndex].m_appliedImpulseLateral1;
|
||||
b3Scalar appliedLateralImpulse2 = m_pointCache[insertIndex].m_appliedImpulseLateral2;
|
||||
// bool isLateralFrictionInitialized = m_pointCache[insertIndex].m_lateralFrictionInitialized;
|
||||
|
||||
|
||||
|
||||
btAssert(lifeTime>=0);
|
||||
void* cache = m_pointCache[insertIndex].m_userPersistentData;
|
||||
|
||||
b3Assert(validContactDistance(newPoint));
|
||||
m_pointCache[insertIndex] = newPoint;
|
||||
|
||||
m_pointCache[insertIndex].m_userPersistentData = cache;
|
||||
m_pointCache[insertIndex].m_appliedImpulse = appliedImpulse;
|
||||
m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1;
|
||||
m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2;
|
||||
|
||||
m_pointCache[insertIndex].m_appliedImpulse = appliedImpulse;
|
||||
m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1;
|
||||
m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2;
|
||||
|
||||
|
||||
m_pointCache[insertIndex].m_lifeTime = lifeTime;
|
||||
#else
|
||||
clearUserCache(m_pointCache[insertIndex]);
|
||||
m_pointCache[insertIndex] = newPoint;
|
||||
|
||||
#endif
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
bool validContactDistance(const btManifoldPoint& pt) const
|
||||
{
|
||||
return pt.m_distance1 <= getContactBreakingThreshold();
|
||||
}
|
||||
|
||||
static bool validContactDistance(const b3Vector3& pt);
|
||||
|
||||
/// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin
|
||||
void refreshContactPoints( const btTransform& trA,const btTransform& trB);
|
||||
|
||||
|
||||
B3_FORCE_INLINE void clearManifold()
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<m_cachedPoints;i++)
|
||||
{
|
||||
clearUserCache(m_pointCache[i]);
|
||||
}
|
||||
m_cachedPoints = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
;
|
||||
static void refreshContactPoints( const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache);
|
||||
|
||||
static void removeContactPoint(struct b3Contact4Data& newContactCache,int i);
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -19,13 +19,14 @@ subject to the following restrictions:
|
||||
///And contact clipping based on work from Simon Hobbs
|
||||
|
||||
//#define B3_DEBUG_SAT_FACE
|
||||
//#define CHECK_ON_HOST
|
||||
|
||||
int b3g_actualSATPairTests=0;
|
||||
|
||||
#include "b3ConvexHullContact.h"
|
||||
#include <string.h>//memcpy
|
||||
#include "b3ConvexPolyhedronCL.h"
|
||||
|
||||
#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h"
|
||||
|
||||
typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
|
||||
|
||||
@@ -1603,7 +1604,7 @@ int computeContactConvexConvex( b3AlignedObjectArray<b3Int4>& pairs,
|
||||
transB.setRotation(rigidBodies[bodyIndexB].m_quat);
|
||||
float maximumDistanceSquared = 1e30f;
|
||||
|
||||
b3Vector3 resultPointOnB;
|
||||
b3Vector3 resultPointOnBWorld;
|
||||
b3Vector3 sepAxis2(0,1,0);
|
||||
b3Scalar distance2 = 1e30f;
|
||||
|
||||
@@ -1618,7 +1619,7 @@ int computeContactConvexConvex( b3AlignedObjectArray<b3Int4>& pairs,
|
||||
maximumDistanceSquared,
|
||||
sepAxis2,
|
||||
distance2,
|
||||
resultPointOnB);
|
||||
resultPointOnBWorld);
|
||||
|
||||
|
||||
if (result2)
|
||||
@@ -1627,31 +1628,58 @@ int computeContactConvexConvex( b3AlignedObjectArray<b3Int4>& pairs,
|
||||
{
|
||||
contactIndex = nGlobalContactsOut;
|
||||
globalContactsOut.expand();
|
||||
b3Contact4& contact = globalContactsOut.at(nGlobalContactsOut);
|
||||
contact.m_batchIdx = 0;//i;
|
||||
contact.m_bodyAPtrAndSignBit = (rigidBodies.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA;
|
||||
contact.m_bodyBPtrAndSignBit = (rigidBodies.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB;
|
||||
b3Contact4& newContact = globalContactsOut.at(nGlobalContactsOut);
|
||||
newContact.m_batchIdx = 0;//i;
|
||||
newContact.m_bodyAPtrAndSignBit = (rigidBodies.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA;
|
||||
newContact.m_bodyBPtrAndSignBit = (rigidBodies.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB;
|
||||
|
||||
contact.m_frictionCoeffCmp = 45874;
|
||||
contact.m_restituitionCoeffCmp = 0;
|
||||
newContact.m_frictionCoeffCmp = 45874;
|
||||
newContact.m_restituitionCoeffCmp = 0;
|
||||
|
||||
|
||||
int numPoints = 1;
|
||||
if (pairs[pairIndex].z>=0)
|
||||
int numPoints = 0;
|
||||
if (0)//pairs[pairIndex].z>=0)
|
||||
{
|
||||
printf("add existing points?\n");
|
||||
//printf("add existing points?\n");
|
||||
//refresh
|
||||
|
||||
int numOldPoints = oldContacts[pairs[pairIndex].z].getNPoints();
|
||||
if (numOldPoints)
|
||||
{
|
||||
newContact = oldContacts[pairs[pairIndex].z];
|
||||
//b3ContactCache::refreshContactPoints(transA,transB,newContact);
|
||||
}
|
||||
numPoints = b3Contact4Data_getNumPoints(&newContact);
|
||||
|
||||
}
|
||||
for (int p=0;p<numPoints;p++)
|
||||
|
||||
/*
|
||||
int insertIndex = m_manifoldPtr->getCacheEntry(newPt);
|
||||
if (insertIndex >= 0)
|
||||
{
|
||||
resultPointOnB.w = distance2;
|
||||
//const btManifoldPoint& oldPoint = m_manifoldPtr->getContactPoint(insertIndex);
|
||||
m_manifoldPtr->replaceContactPoint(newPt,insertIndex);
|
||||
} else
|
||||
{
|
||||
insertIndex = m_manifoldPtr->addManifoldPoint(newPt);
|
||||
}
|
||||
*/
|
||||
|
||||
contact.m_worldPos[p] = resultPointOnB;
|
||||
|
||||
contact.m_worldNormal = -sepAxis2;
|
||||
int p=numPoints;
|
||||
if (numPoints<3)
|
||||
{
|
||||
numPoints++;
|
||||
}
|
||||
{
|
||||
resultPointOnBWorld.w = distance2;
|
||||
newContact.m_worldPos[p] = resultPointOnBWorld;
|
||||
b3Vector3 resultPointOnAWorld = resultPointOnBWorld+distance2*sepAxis2;
|
||||
//newContact.m_localPosA[p] = transA.inverse()*resultPointOnAWorld;
|
||||
// newContact.m_localPosB[p] = transB.inverse()*resultPointOnBWorld;
|
||||
newContact.m_worldNormal = sepAxis2;
|
||||
}
|
||||
//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
|
||||
contact.m_worldNormal.w = (b3Scalar)numPoints;
|
||||
newContact.m_worldNormal.w = (b3Scalar)numPoints;
|
||||
nGlobalContactsOut++;
|
||||
} else
|
||||
{
|
||||
@@ -1797,7 +1825,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
return;
|
||||
|
||||
|
||||
//#define CHECK_ON_HOST
|
||||
|
||||
#ifdef CHECK_ON_HOST
|
||||
b3AlignedObjectArray<b3YetAnotherAabb> hostAabbs;
|
||||
clAabbsWS.copyToHost(hostAabbs);
|
||||
@@ -1909,9 +1937,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
|
||||
{
|
||||
//printf("hostPairs[i].z=%d\n",hostPairs[i].z);
|
||||
int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,
|
||||
hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,
|
||||
oldHostContacts);
|
||||
int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,
|
||||
hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
|
||||
//int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,
|
||||
// hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,
|
||||
// oldHostContacts);
|
||||
|
||||
|
||||
if (contactIndex>=0)
|
||||
{
|
||||
|
||||
@@ -2,17 +2,13 @@
|
||||
static const char* bvhTraversalKernelCL= \
|
||||
"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
|
||||
"//written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#define SHAPE_CONVEX_HULL 3\n"
|
||||
"#define SHAPE_CONCAVE_TRIMESH 5\n"
|
||||
"#define TRIANGLE_NUM_CONVEX_FACES 5\n"
|
||||
"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
|
||||
"#define SHAPE_SPHERE 7\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"\n"
|
||||
"#define MAX_NUM_PARTS_IN_BITS 10\n"
|
||||
"\n"
|
||||
"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
|
||||
"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n"
|
||||
"typedef struct\n"
|
||||
@@ -23,7 +19,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" //4 bytes\n"
|
||||
" int m_escapeIndexOrTriangleIndex;\n"
|
||||
"} btQuantizedBvhNode;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_aabbMin;\n"
|
||||
@@ -33,9 +28,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_numSubTrees;\n"
|
||||
" int m_nodeOffset;\n"
|
||||
" int m_subTreeOffset;\n"
|
||||
"\n"
|
||||
"} b3BvhInfo;\n"
|
||||
"\n"
|
||||
"/*\n"
|
||||
" bool isLeafNode() const\n"
|
||||
" {\n"
|
||||
@@ -62,7 +55,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n"
|
||||
" }\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n"
|
||||
"{\n"
|
||||
" unsigned int x=0;\n"
|
||||
@@ -70,7 +62,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" // Get only the lower bits where the triangle index is stored\n"
|
||||
" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"int isLeaf(const btQuantizedBvhNode* rootNode)\n"
|
||||
"{\n"
|
||||
" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
|
||||
@@ -81,7 +72,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
"{\n"
|
||||
" return -rootNode->m_escapeIndexOrTriangleIndex;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" //12 bytes\n"
|
||||
@@ -93,7 +83,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_subtreeSize;\n"
|
||||
" int m_padding[3];\n"
|
||||
"} btBvhSubtreeInfo;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -103,7 +92,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_childPosition;\n"
|
||||
@@ -113,21 +101,17 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_collidableIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} BodyData;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -143,8 +127,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"int testQuantizedAabbAgainstQuantizedAabb(\n"
|
||||
" const unsigned short int* aabbMin1,\n"
|
||||
" const unsigned short int* aabbMax1,\n"
|
||||
@@ -170,13 +152,10 @@ static const char* bvhTraversalKernelCL= \
|
||||
" //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n"
|
||||
" //return overlap;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n"
|
||||
"{\n"
|
||||
" float4 clampedPoint = max(point2,bvhAabbMin);\n"
|
||||
" clampedPoint = min (clampedPoint, bvhAabbMax);\n"
|
||||
"\n"
|
||||
" float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n"
|
||||
" if (isMax)\n"
|
||||
" {\n"
|
||||
@@ -189,10 +168,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n"
|
||||
" out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void bvhTraversalKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
@@ -223,7 +199,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" \n"
|
||||
" if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
|
||||
" \n"
|
||||
" if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n"
|
||||
@@ -231,9 +206,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
" shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n"
|
||||
" )\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n"
|
||||
"\n"
|
||||
" float4 bvhAabbMin = bvhInfo.m_aabbMin;\n"
|
||||
" float4 bvhAabbMax = bvhInfo.m_aabbMax;\n"
|
||||
" float4 bvhQuantization = bvhInfo.m_quantization;\n"
|
||||
@@ -241,7 +214,6 @@ static const char* bvhTraversalKernelCL= \
|
||||
" __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n"
|
||||
" __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" unsigned short int quantizedQueryAabbMin[3];\n"
|
||||
" unsigned short int quantizedQueryAabbMax[3];\n"
|
||||
" quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n"
|
||||
@@ -308,6 +280,5 @@ static const char* bvhTraversalKernelCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
@@ -345,7 +345,7 @@ void computeContactSphereConvex(int pairIndex,
|
||||
__global const float4* convexVertices,
|
||||
__global const int* convexIndices,
|
||||
__global const btGpuFace* faces,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int maxContactCapacity,
|
||||
float4 spherePos2,
|
||||
@@ -466,9 +466,9 @@ void computeContactSphereConvex(int pairIndex,
|
||||
|
||||
if (1)//dstIdx < maxContactCapacity)
|
||||
{
|
||||
__global b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = normalOnSurfaceB1;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
|
||||
c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
|
||||
@@ -590,7 +590,7 @@ int computeContactPlaneConvex(int pairIndex,
|
||||
__global const float4* convexVertices,
|
||||
__global const int* convexIndices,
|
||||
__global const btGpuFace* faces,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int maxContactCapacity,
|
||||
float4 posB,
|
||||
@@ -692,11 +692,11 @@ int computeContactPlaneConvex(int pairIndex,
|
||||
if (dstIdx < maxContactCapacity)
|
||||
{
|
||||
resultIndex = dstIdx;
|
||||
__global b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = planeNormalWorld;
|
||||
//c->setFrictionCoeff(0.7);
|
||||
//c->setRestituitionCoeff(0.f);
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
|
||||
c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
|
||||
@@ -732,7 +732,7 @@ void computeContactPlaneSphere(int pairIndex,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const btGpuFace* faces,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int maxContactCapacity)
|
||||
{
|
||||
@@ -775,9 +775,9 @@ void computeContactPlaneSphere(int pairIndex,
|
||||
|
||||
if (dstIdx < maxContactCapacity)
|
||||
{
|
||||
__global b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = normalOnSurfaceB1;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
|
||||
c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
|
||||
@@ -798,7 +798,7 @@ __kernel void primitiveContactsKernel( __global int4* pairs,
|
||||
__global const float4* uniqueEdges,
|
||||
__global const btGpuFace* faces,
|
||||
__global const int* indices,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numPairs, int maxContactCapacity)
|
||||
{
|
||||
@@ -953,9 +953,9 @@ __kernel void primitiveContactsKernel( __global int4* pairs,
|
||||
|
||||
if (dstIdx < maxContactCapacity)
|
||||
{
|
||||
__global b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = -normalOnSurfaceB;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = pairs[pairIndex].x;
|
||||
int bodyB = pairs[pairIndex].y;
|
||||
@@ -987,7 +987,7 @@ __kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCo
|
||||
__global const int* indices,
|
||||
__global btAabbCL* aabbs,
|
||||
__global const btGpuChildShape* gpuChildShapes,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numCompoundPairs, int maxContactCapacity
|
||||
)
|
||||
@@ -1166,7 +1166,7 @@ void computeContactSphereTriangle(int pairIndex,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
const float4* triangleVertices,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int maxContactCapacity,
|
||||
float4 spherePos2,
|
||||
@@ -1293,9 +1293,9 @@ void computeContactSphereTriangle(int pairIndex,
|
||||
|
||||
if (dstIdx < maxContactCapacity)
|
||||
{
|
||||
__global b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = normalOnSurfaceB1;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
|
||||
c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
|
||||
@@ -1325,7 +1325,7 @@ __kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,
|
||||
__global const btGpuFace* faces,
|
||||
__global const int* indices,
|
||||
__global btAabbCL* aabbs,
|
||||
__global b3Contact4Data* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numConcavePairs, int maxContactCapacity
|
||||
)
|
||||
|
||||
@@ -1,67 +1,62 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* primitiveContactsKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned int m_coeffs;\n"
|
||||
" unsigned int m_batchIdx;\n"
|
||||
"\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} b3Contact4Data;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#define SHAPE_CONVEX_HULL 3\n"
|
||||
"#define SHAPE_PLANE 4\n"
|
||||
"#define SHAPE_CONCAVE_TRIMESH 5\n"
|
||||
"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
|
||||
"#define SHAPE_SPHERE 7\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile __global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -75,15 +70,9 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -99,7 +88,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -109,7 +97,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_childPosition;\n"
|
||||
@@ -119,23 +106,18 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"\n"
|
||||
"#define GET_NPOINTS(x) (x).m_worldNormal.w\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_collidableIdx; \n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} BodyData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_localCenter;\n"
|
||||
@@ -152,48 +134,37 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int m_uniqueEdgesOffset;\n"
|
||||
" int m_numUniqueEdges;\n"
|
||||
" int m_unused;\n"
|
||||
"\n"
|
||||
"} ConvexPolyhedronCL;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} btGpuFace;\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastDiv(float numerator, float denominator)\n"
|
||||
"{\n"
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"// return numerator/denominator; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
|
||||
"{\n"
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"//#define dot3F4 dot\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -201,35 +172,23 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -240,7 +199,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -257,32 +215,27 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
|
||||
"{\n"
|
||||
" return qtRotate( *orientation, *p ) + (*translation);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void trInverse(float4 translationIn, Quaternion orientationIn,\n"
|
||||
" float4* translationOut, Quaternion* orientationOut)\n"
|
||||
"{\n"
|
||||
" *orientationOut = qtInvert(orientationIn);\n"
|
||||
" *translationOut = qtRotate(*orientationOut, -translationIn);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void trMul(float4 translationA, Quaternion orientationA,\n"
|
||||
" float4 translationB, Quaternion orientationB,\n"
|
||||
" float4* translationOut, Quaternion* orientationOut)\n"
|
||||
@@ -290,17 +243,12 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" *orientationOut = qtMul(orientationA,orientationB);\n"
|
||||
" *translationOut = transform(&translationB,&translationA,&orientationA);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
|
||||
" return fastNormalize4( n );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline float4 lerp3(const float4 a,const float4 b, float t)\n"
|
||||
"{\n"
|
||||
" return make_float4( a.x + (b.x - a.x) * t,\n"
|
||||
@@ -308,8 +256,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" a.z + (b.z - a.z) * t,\n"
|
||||
" 0.f);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n"
|
||||
"{\n"
|
||||
" float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n"
|
||||
@@ -317,9 +263,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" *closestPointOnFace = point - dist * n;\n"
|
||||
" return dist;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline bool IsPointInPolygon(float4 p, \n"
|
||||
" const btGpuFace* face,\n"
|
||||
" __global const float4* baseVertex,\n"
|
||||
@@ -331,17 +274,14 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 ab;\n"
|
||||
" float4 ap;\n"
|
||||
" float4 v;\n"
|
||||
"\n"
|
||||
" float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n"
|
||||
" \n"
|
||||
" if (face->m_numIndices<2)\n"
|
||||
" return false;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n"
|
||||
" \n"
|
||||
" b = v0;\n"
|
||||
"\n"
|
||||
" for(unsigned i=0; i != face->m_numIndices; ++i)\n"
|
||||
" {\n"
|
||||
" a = b;\n"
|
||||
@@ -350,7 +290,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" ab = b-a;\n"
|
||||
" ap = p-a;\n"
|
||||
" v = cross3(ab,plane);\n"
|
||||
"\n"
|
||||
" if (dot(ap, v) > 0.f)\n"
|
||||
" {\n"
|
||||
" float ab_m2 = dot(ab, ab);\n"
|
||||
@@ -375,10 +314,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" }\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void computeContactSphereConvex(int pairIndex,\n"
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
@@ -388,7 +323,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" __global const float4* convexVertices,\n"
|
||||
" __global const int* convexIndices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity,\n"
|
||||
" float4 spherePos2,\n"
|
||||
@@ -397,25 +332,19 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 quat\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" float4 invPos;\n"
|
||||
" float4 invOrn;\n"
|
||||
"\n"
|
||||
" trInverse(pos,quat, &invPos,&invOrn);\n"
|
||||
"\n"
|
||||
" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n"
|
||||
"\n"
|
||||
" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n"
|
||||
" int numFaces = convexShapes[shapeIndex].m_numFaces;\n"
|
||||
" float4 closestPnt = (float4)(0, 0, 0, 0);\n"
|
||||
" float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n"
|
||||
" float minDist = -1000000.f;\n"
|
||||
" bool bCollide = true;\n"
|
||||
"\n"
|
||||
" for ( int f = 0; f < numFaces; f++ )\n"
|
||||
" {\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
|
||||
"\n"
|
||||
" // set up a plane equation \n"
|
||||
" float4 planeEqn;\n"
|
||||
" float4 n1 = face.m_plane;\n"
|
||||
@@ -427,21 +356,17 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" // compute a signed distance from the vertex in cloth to the face of rigidbody.\n"
|
||||
" float4 pntReturn;\n"
|
||||
" float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n"
|
||||
"\n"
|
||||
" // If the distance is positive, the plane is a separating plane. \n"
|
||||
" if ( dist > radius )\n"
|
||||
" {\n"
|
||||
" bCollide = false;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (dist>0)\n"
|
||||
" {\n"
|
||||
" //might hit an edge or vertex\n"
|
||||
" float4 out;\n"
|
||||
" float4 zeroPos = make_float4(0,0,0,0);\n"
|
||||
"\n"
|
||||
" bool isInPoly = IsPointInPolygon(spherePos,\n"
|
||||
" &face,\n"
|
||||
" &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n"
|
||||
@@ -489,8 +414,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" \n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if (bCollide && minDist > -10000)\n"
|
||||
" {\n"
|
||||
" float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n"
|
||||
@@ -500,35 +423,28 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" if (actualDepth<=0.f)\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" pOnB1.w = actualDepth;\n"
|
||||
"\n"
|
||||
" int dstIdx;\n"
|
||||
" AppendInc( nGlobalContactsOut, dstIdx );\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if (1)//dstIdx < maxContactCapacity)\n"
|
||||
" {\n"
|
||||
" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" c->m_worldNormal = normalOnSurfaceB1;\n"
|
||||
" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n"
|
||||
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
|
||||
" c->m_batchIdx = pairIndex;\n"
|
||||
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
|
||||
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
|
||||
" c->m_worldPos[0] = pOnB1;\n"
|
||||
" c->m_childIndexA = -1;\n"
|
||||
" c->m_childIndexB = -1;\n"
|
||||
"\n"
|
||||
" GET_NPOINTS(*c) = 1;\n"
|
||||
" } \n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }//if (hasCollision)\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n"
|
||||
"{\n"
|
||||
" if( nPoints == 0 )\n"
|
||||
@@ -621,9 +537,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" return 4;\n"
|
||||
" \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#define MAX_PLANE_CONVEX_POINTS 64\n"
|
||||
"\n"
|
||||
"int computeContactPlaneConvex(int pairIndex,\n"
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
@@ -633,7 +547,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" __global const float4* convexVertices,\n"
|
||||
" __global const int* convexIndices,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity,\n"
|
||||
" float4 posB,\n"
|
||||
@@ -641,7 +555,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" )\n"
|
||||
"{\n"
|
||||
" int resultIndex=-1;\n"
|
||||
"\n"
|
||||
" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n"
|
||||
" __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n"
|
||||
" \n"
|
||||
@@ -649,10 +562,8 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" Quaternion ornA;\n"
|
||||
" ornA = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
"\n"
|
||||
" int numContactsOut = 0;\n"
|
||||
" int numWorldVertsB1= 0;\n"
|
||||
"\n"
|
||||
" float4 planeEq;\n"
|
||||
" planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n"
|
||||
" float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n"
|
||||
@@ -675,17 +586,12 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); \n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n"
|
||||
" float maxDot = -1e30;\n"
|
||||
" int hitVertex=-1;\n"
|
||||
" float4 hitVtx;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n"
|
||||
" int numPoints = 0;\n"
|
||||
"\n"
|
||||
" int4 contactIdx;\n"
|
||||
" contactIdx=make_int4(0,1,2,3);\n"
|
||||
" \n"
|
||||
@@ -694,8 +600,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" {\n"
|
||||
" float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n"
|
||||
" float curDot = dot(vtx,planeNormalInConvex);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (curDot>maxDot)\n"
|
||||
" {\n"
|
||||
" hitVertex=i;\n"
|
||||
@@ -705,7 +609,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" if (numPoints==MAX_PLANE_CONVEX_POINTS)\n"
|
||||
" numPoints--;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (numPoints<MAX_PLANE_CONVEX_POINTS)\n"
|
||||
" {\n"
|
||||
" float4 vtxWorld = transform(&vtx, &posB, &ornB);\n"
|
||||
@@ -718,34 +621,29 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" numPoints++;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" int numReducedPoints = numPoints;\n"
|
||||
" if (numPoints>4)\n"
|
||||
" {\n"
|
||||
" numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (numReducedPoints>0)\n"
|
||||
" {\n"
|
||||
" int dstIdx;\n"
|
||||
" AppendInc( nGlobalContactsOut, dstIdx );\n"
|
||||
"\n"
|
||||
" if (dstIdx < maxContactCapacity)\n"
|
||||
" {\n"
|
||||
" resultIndex = dstIdx;\n"
|
||||
" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" c->m_worldNormal = planeNormalWorld;\n"
|
||||
" //c->setFrictionCoeff(0.7);\n"
|
||||
" //c->setRestituitionCoeff(0.f);\n"
|
||||
" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n"
|
||||
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
|
||||
" c->m_batchIdx = pairIndex;\n"
|
||||
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
|
||||
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
|
||||
" c->m_childIndexA = -1;\n"
|
||||
" c->m_childIndexB = -1;\n"
|
||||
"\n"
|
||||
" switch (numReducedPoints)\n"
|
||||
" {\n"
|
||||
" case 4:\n"
|
||||
@@ -764,18 +662,15 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" GET_NPOINTS(*c) = numReducedPoints;\n"
|
||||
" }//if (dstIdx < numPairs)\n"
|
||||
" } \n"
|
||||
"\n"
|
||||
" return resultIndex;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void computeContactPlaneSphere(int pairIndex,\n"
|
||||
" int bodyIndexA, int bodyIndexB, \n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity)\n"
|
||||
"{\n"
|
||||
@@ -812,15 +707,14 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n"
|
||||
" float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n"
|
||||
" pOnB1.w = distance;\n"
|
||||
"\n"
|
||||
" int dstIdx;\n"
|
||||
" AppendInc( nGlobalContactsOut, dstIdx );\n"
|
||||
" \n"
|
||||
" if (dstIdx < maxContactCapacity)\n"
|
||||
" {\n"
|
||||
" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" c->m_worldNormal = normalOnSurfaceB1;\n"
|
||||
" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n"
|
||||
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
|
||||
" c->m_batchIdx = pairIndex;\n"
|
||||
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
|
||||
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
|
||||
@@ -831,8 +725,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" }//if (dstIdx < numPairs)\n"
|
||||
" }//if (hasCollision)\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void primitiveContactsKernel( __global int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
@@ -841,27 +733,23 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" __global const float4* uniqueEdges,\n"
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int numPairs, int maxContactCapacity)\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int pairIndex = i;\n"
|
||||
" \n"
|
||||
" float4 worldVertsB1[64];\n"
|
||||
" float4 worldVertsB2[64];\n"
|
||||
" int capacityWorldVerts = 64; \n"
|
||||
"\n"
|
||||
" float4 localContactsOut[64];\n"
|
||||
" int localContactCapacity=64;\n"
|
||||
" \n"
|
||||
" float minDist = -1e30f;\n"
|
||||
" float maxDist = 0.02f;\n"
|
||||
"\n"
|
||||
" if (i<numPairs)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" int bodyIndexA = pairs[i].x;\n"
|
||||
" int bodyIndexB = pairs[i].y;\n"
|
||||
" \n"
|
||||
@@ -871,7 +759,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" float4 posB;\n"
|
||||
" posB = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
" Quaternion ornB;\n"
|
||||
@@ -881,31 +768,22 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n"
|
||||
" if (contactIndex>=0)\n"
|
||||
" pairs[pairIndex].z = contactIndex;\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" float4 posA;\n"
|
||||
" posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" Quaternion ornA;\n"
|
||||
" ornA = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,\n"
|
||||
" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n"
|
||||
"\n"
|
||||
" if (contactIndex>=0)\n"
|
||||
" pairs[pairIndex].z = contactIndex;\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
|
||||
" {\n"
|
||||
@@ -913,23 +791,16 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n"
|
||||
" rigidBodies,collidables,\n"
|
||||
" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"\n"
|
||||
" \n"
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n"
|
||||
" {\n"
|
||||
@@ -938,14 +809,11 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float sphereRadius = collidables[collidableIndexA].m_radius;\n"
|
||||
" float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
" float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n"
|
||||
"\n"
|
||||
" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
|
||||
" spherePos,sphereRadius,convexPos,convexOrn);\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n"
|
||||
" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
|
||||
" {\n"
|
||||
@@ -954,7 +822,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float sphereRadius = collidables[collidableIndexB].m_radius;\n"
|
||||
" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
"\n"
|
||||
" computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
|
||||
" spherePos,sphereRadius,convexPos,convexOrn);\n"
|
||||
@@ -974,7 +841,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float radiusB = collidables[collidableIndexB].m_radius;\n"
|
||||
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
"\n"
|
||||
" float4 diff = posA-posB;\n"
|
||||
" float len = length(diff);\n"
|
||||
" \n"
|
||||
@@ -996,9 +862,9 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" \n"
|
||||
" if (dstIdx < maxContactCapacity)\n"
|
||||
" {\n"
|
||||
" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" c->m_worldNormal = -normalOnSurfaceB;\n"
|
||||
" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n"
|
||||
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
|
||||
" c->m_batchIdx = pairIndex;\n"
|
||||
" int bodyA = pairs[pairIndex].x;\n"
|
||||
" int bodyB = pairs[pairIndex].y;\n"
|
||||
@@ -1010,15 +876,10 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" GET_NPOINTS(*c) = 1;\n"
|
||||
" }//if (dstIdx < numPairs)\n"
|
||||
" }//if ( len <= (radiusA+radiusB))\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }//SHAPE_SPHERE SHAPE_SPHERE\n"
|
||||
"\n"
|
||||
" }// if (i<numPairs)\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
@@ -1030,18 +891,16 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global const btGpuChildShape* gpuChildShapes,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int numCompoundPairs, int maxContactCapacity\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i<numCompoundPairs)\n"
|
||||
" {\n"
|
||||
" int bodyIndexA = gpuCompoundPairs[i].x;\n"
|
||||
" int bodyIndexB = gpuCompoundPairs[i].y;\n"
|
||||
"\n"
|
||||
" int childShapeIndexA = gpuCompoundPairs[i].z;\n"
|
||||
" int childShapeIndexB = gpuCompoundPairs[i].w;\n"
|
||||
" \n"
|
||||
@@ -1087,26 +946,21 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" \n"
|
||||
" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n"
|
||||
" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
|
||||
"\n"
|
||||
" int pairIndex = i;\n"
|
||||
" if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,\n"
|
||||
" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,\n"
|
||||
" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n"
|
||||
" {\n"
|
||||
" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
@@ -1120,16 +974,13 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" \n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" float sphereRadius = collidables[collidableIndexA].m_radius;\n"
|
||||
" float4 convexPos = posB;\n"
|
||||
" float4 convexOrn = ornB;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n"
|
||||
" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
|
||||
" spherePos,sphereRadius,convexPos,convexOrn);\n"
|
||||
@@ -1138,30 +989,23 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" }\n"
|
||||
" }// if (i<numCompoundPairs)\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" const float4* p1 = &vertices[0];\n"
|
||||
" const float4* p2 = &vertices[1];\n"
|
||||
" const float4* p3 = &vertices[2];\n"
|
||||
"\n"
|
||||
" float4 edge1; edge1 = (*p2 - *p1);\n"
|
||||
" float4 edge2; edge2 = ( *p3 - *p2 );\n"
|
||||
" float4 edge3; edge3 = ( *p1 - *p3 );\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float4 p1_to_p; p1_to_p = ( *p - *p1 );\n"
|
||||
" float4 p2_to_p; p2_to_p = ( *p - *p2 );\n"
|
||||
" float4 p3_to_p; p3_to_p = ( *p - *p3 );\n"
|
||||
"\n"
|
||||
" float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n"
|
||||
" float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n"
|
||||
" float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float r1, r2, r3;\n"
|
||||
" r1 = dot(edge1_normal,p1_to_p );\n"
|
||||
" r2 = dot(edge2_normal,p2_to_p );\n"
|
||||
@@ -1172,10 +1016,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n"
|
||||
" return true;\n"
|
||||
" return false;\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n"
|
||||
"{\n"
|
||||
" float4 diff = p - from;\n"
|
||||
@@ -1201,15 +1042,13 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" *nearest = from + t*v;\n"
|
||||
" return dot(diff,diff); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void computeContactSphereTriangle(int pairIndex,\n"
|
||||
" int bodyIndexA, int bodyIndexB,\n"
|
||||
" int collidableIndexA, int collidableIndexB, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" const float4* triangleVertices,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int maxContactCapacity,\n"
|
||||
" float4 spherePos2,\n"
|
||||
@@ -1219,10 +1058,8 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" int faceIndex\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" float4 invPos;\n"
|
||||
" float4 invOrn;\n"
|
||||
"\n"
|
||||
" trInverse(pos,quat, &invPos,&invOrn);\n"
|
||||
" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n"
|
||||
" int numFaces = 3;\n"
|
||||
@@ -1231,12 +1068,9 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float minDist = -1000000.f;\n"
|
||||
" bool bCollide = false;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" //////////////////////////////////////\n"
|
||||
"\n"
|
||||
" float4 sphereCenter;\n"
|
||||
" sphereCenter = spherePos;\n"
|
||||
"\n"
|
||||
" const float4* vertices = triangleVertices;\n"
|
||||
" float contactBreakingThreshold = 0.f;//todo?\n"
|
||||
" float radiusWithThreshold = radius + contactBreakingThreshold;\n"
|
||||
@@ -1252,7 +1086,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" p1ToCenter = sphereCenter - vertices[0];\n"
|
||||
" \n"
|
||||
" float distanceFromPlane = dot(p1ToCenter,normal);\n"
|
||||
"\n"
|
||||
" if (distanceFromPlane < 0.f)\n"
|
||||
" {\n"
|
||||
" //triangle facing the other way\n"
|
||||
@@ -1260,7 +1093,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" normal *= -1.f;\n"
|
||||
" }\n"
|
||||
" hitNormalWorld = normal;\n"
|
||||
"\n"
|
||||
" bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n"
|
||||
" \n"
|
||||
" // Check for contact / intersection\n"
|
||||
@@ -1284,7 +1116,6 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" {\n"
|
||||
" float4 pa =vertices[i];\n"
|
||||
" float4 pb = vertices[(i+1)%3];\n"
|
||||
"\n"
|
||||
" float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n"
|
||||
" if (distanceSqr < contactCapsuleRadiusSqr) \n"
|
||||
" {\n"
|
||||
@@ -1297,10 +1128,8 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (hasContact) \n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" closestPnt = contactPoint;\n"
|
||||
" float4 contactToCenter = sphereCenter - contactPoint;\n"
|
||||
" minDist = length(contactToCenter);\n"
|
||||
@@ -1311,10 +1140,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" /////////////////////////////////////\n"
|
||||
"\n"
|
||||
" if (bCollide && minDist > -10000)\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
@@ -1322,13 +1148,11 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n"
|
||||
" float actualDepth = minDist-radius;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if (actualDepth<=0.f)\n"
|
||||
" {\n"
|
||||
" pOnB1.w = actualDepth;\n"
|
||||
" int dstIdx;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n"
|
||||
" if (lenSqr>FLT_EPSILON)\n"
|
||||
" {\n"
|
||||
@@ -1336,28 +1160,21 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" \n"
|
||||
" if (dstIdx < maxContactCapacity)\n"
|
||||
" {\n"
|
||||
" __global b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
|
||||
" c->m_worldNormal = normalOnSurfaceB1;\n"
|
||||
" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n"
|
||||
" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
|
||||
" c->m_batchIdx = pairIndex;\n"
|
||||
" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n"
|
||||
" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n"
|
||||
" c->m_worldPos[0] = pOnB1;\n"
|
||||
"\n"
|
||||
" c->m_childIndexA = -1;\n"
|
||||
" c->m_childIndexB = faceIndex;\n"
|
||||
"\n"
|
||||
" GET_NPOINTS(*c) = 1;\n"
|
||||
" } \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }//if (hasCollision)\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
@@ -1368,26 +1185,21 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" __global const btGpuFace* faces,\n"
|
||||
" __global const int* indices,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
" __global b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" __global struct b3Contact4Data* restrict globalContactsOut,\n"
|
||||
" counter32_t nGlobalContactsOut,\n"
|
||||
" int numConcavePairs, int maxContactCapacity\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numConcavePairs)\n"
|
||||
" return;\n"
|
||||
" int pairIdx = i;\n"
|
||||
"\n"
|
||||
" int bodyIndexA = concavePairs[i].x;\n"
|
||||
" int bodyIndexB = concavePairs[i].y;\n"
|
||||
"\n"
|
||||
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
|
||||
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
|
||||
"\n"
|
||||
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
|
||||
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n"
|
||||
" {\n"
|
||||
" int f = concavePairs[i].z;\n"
|
||||
@@ -1400,18 +1212,15 @@ static const char* primitiveContactsKernelsCL= \
|
||||
" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n"
|
||||
" verticesA[i] = vert;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
" float sphereRadius = collidables[collidableIndexB].m_radius;\n"
|
||||
" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
"\n"
|
||||
" computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n"
|
||||
" rigidBodies,collidables,\n"
|
||||
" verticesA,\n"
|
||||
" globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n"
|
||||
" spherePos,sphereRadius,convexPos,convexOrn, f);\n"
|
||||
"\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
|
||||
@@ -41,22 +41,7 @@ typedef unsigned int u32;
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal; // w: m_nPoints
|
||||
|
||||
u32 m_coeffs;
|
||||
u32 m_batchIdx;
|
||||
int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
float m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
|
||||
///keep this in sync with btCollidable.h
|
||||
@@ -891,7 +876,7 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
|
||||
__global const float4* separatingNormalsWorld,
|
||||
__global const int* contactCounts,
|
||||
__global const int* contactOffsets,
|
||||
__global Contact4* restrict contactsOut,
|
||||
__global struct b3Contact4Data* restrict contactsOut,
|
||||
counter32_t nContactsOut,
|
||||
int numPairs,
|
||||
int pairIndex
|
||||
@@ -922,9 +907,9 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
|
||||
AppendInc( nContactsOut, dstIdx );
|
||||
//if ((dstIdx+nContacts) < capacity)
|
||||
{
|
||||
__global Contact4* c = contactsOut + dstIdx;
|
||||
__global struct b3Contact4Data* c = contactsOut + dstIdx;
|
||||
c->m_worldNormal = normal;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = idx;
|
||||
int bodyA = pairs[pairIndex].x;
|
||||
int bodyB = pairs[pairIndex].y;
|
||||
@@ -970,7 +955,7 @@ __kernel void clipHullHullKernel( __global int4* pairs,
|
||||
__global const int* indices,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
__global Contact4* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numPairs,
|
||||
int contactCapacity)
|
||||
@@ -1037,9 +1022,9 @@ __kernel void clipHullHullKernel( __global int4* pairs,
|
||||
{
|
||||
pairs[pairIndex].z = dstIdx;
|
||||
|
||||
__global Contact4* c = globalContactsOut+ dstIdx;
|
||||
__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
|
||||
c->m_worldNormal = normal;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = pairs[pairIndex].x;
|
||||
int bodyB = pairs[pairIndex].y;
|
||||
@@ -1073,7 +1058,7 @@ __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPai
|
||||
__global const btGpuChildShape* gpuChildShapes,
|
||||
__global const float4* gpuCompoundSepNormalsOut,
|
||||
__global const int* gpuHasCompoundSepNormalsOut,
|
||||
__global Contact4* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numCompoundPairs, int maxContactCapacity)
|
||||
{
|
||||
@@ -1170,9 +1155,9 @@ __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPai
|
||||
AppendInc( nGlobalContactsOut, dstIdx );
|
||||
if ((dstIdx+nReducedContacts) < maxContactCapacity)
|
||||
{
|
||||
__global Contact4* c = globalContactsOut+ dstIdx;
|
||||
__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
|
||||
c->m_worldNormal = normal;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = gpuCompoundPairs[pairIndex].x;
|
||||
int bodyB = gpuCompoundPairs[pairIndex].y;
|
||||
@@ -1200,7 +1185,7 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
__global Contact4* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numPairs)
|
||||
{
|
||||
@@ -1246,9 +1231,9 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs,
|
||||
|
||||
if (dstIdx < numPairs)
|
||||
{
|
||||
__global Contact4* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = normalOnSurfaceB;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = pairs[pairIndex].x;
|
||||
int bodyB = pairs[pairIndex].y;
|
||||
@@ -1275,7 +1260,7 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
|
||||
__global const int* indices,
|
||||
__global const btGpuChildShape* gpuChildShapes,
|
||||
__global const float4* separatingNormals,
|
||||
__global Contact4* restrict globalContactsOut,
|
||||
__global struct b3Contact4Data* restrict globalContactsOut,
|
||||
counter32_t nGlobalContactsOut,
|
||||
int numConcavePairs)
|
||||
{
|
||||
@@ -1479,9 +1464,9 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
|
||||
AppendInc( nGlobalContactsOut, dstIdx );
|
||||
//if ((dstIdx+nReducedContacts) < capacity)
|
||||
{
|
||||
__global Contact4* c = globalContactsOut+ dstIdx;
|
||||
__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
|
||||
c->m_worldNormal = normal;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = concavePairsIn[pairIndex].x;
|
||||
int bodyB = concavePairsIn[pairIndex].y;
|
||||
@@ -1747,7 +1732,7 @@ __kernel void clipFacesAndContactReductionKernel( __global int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
__global Contact4* globalContactsOut,
|
||||
__global struct b3Contact4Data* globalContactsOut,
|
||||
__global int4* clippingFacesOut,
|
||||
__global float4* worldVertsA1,
|
||||
__global float4* worldNormalsA1,
|
||||
@@ -1860,7 +1845,7 @@ __kernel void newContactReductionKernel( __global int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
__global Contact4* globalContactsOut,
|
||||
__global struct b3Contact4Data* globalContactsOut,
|
||||
__global int4* clippingFaces,
|
||||
__global float4* worldVertsB2,
|
||||
volatile __global int* nGlobalContactsOut,
|
||||
@@ -1901,9 +1886,9 @@ __kernel void newContactReductionKernel( __global int4* pairs,
|
||||
if (dstIdx < numPairs)
|
||||
{
|
||||
|
||||
__global Contact4* c = &globalContactsOut[dstIdx];
|
||||
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
|
||||
c->m_worldNormal = normal;
|
||||
c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);
|
||||
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
|
||||
c->m_batchIdx = pairIndex;
|
||||
int bodyA = pairs[pairIndex].x;
|
||||
int bodyB = pairs[pairIndex].y;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,17 +2,11 @@
|
||||
static const char* satKernelsCL= \
|
||||
"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
|
||||
"//written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SHAPE_CONVEX_HULL 3\n"
|
||||
"#define SHAPE_CONCAVE_TRIMESH 5\n"
|
||||
"#define TRIANGLE_NUM_CONVEX_FACES 5\n"
|
||||
"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"\n"
|
||||
"///keep this in sync with btCollidable.h\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -22,7 +16,6 @@ static const char* satKernelsCL= \
|
||||
" int m_shapeIndex;\n"
|
||||
" \n"
|
||||
"} btCollidableGpu;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_childPosition;\n"
|
||||
@@ -32,22 +25,17 @@ static const char* satKernelsCL= \
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"} btGpuChildShape;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_collidableIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} BodyData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_localCenter;\n"
|
||||
@@ -59,13 +47,11 @@ static const char* satKernelsCL= \
|
||||
" int m_faceOffset;\n"
|
||||
" int m_numFaces;\n"
|
||||
" int m_numVertices;\n"
|
||||
"\n"
|
||||
" int m_vertexOffset;\n"
|
||||
" int m_uniqueEdgesOffset;\n"
|
||||
" int m_numUniqueEdges;\n"
|
||||
" int m_unused;\n"
|
||||
"} ConvexPolyhedronCL;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -81,35 +67,27 @@ static const char* satKernelsCL= \
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} btAabbCL;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} btGpuFace;\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"// float4 a1 = make_float4(a.xyz,0.f);\n"
|
||||
"// float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
"\n"
|
||||
"// return cross(a1,b1);\n"
|
||||
"\n"
|
||||
"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n"
|
||||
" \n"
|
||||
" // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n"
|
||||
" \n"
|
||||
" //return c;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -117,36 +95,24 @@ static const char* satKernelsCL= \
|
||||
" float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" v = make_float4(v.xyz,0.f);\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -157,7 +123,6 @@ static const char* satKernelsCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -174,41 +139,33 @@ static const char* satKernelsCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
|
||||
"{\n"
|
||||
" return qtRotate( *orientation, *p ) + (*translation);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
|
||||
" return fastNormalize4( n );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n"
|
||||
"const float4* dir, const float4* vertices, float* min, float* max)\n"
|
||||
"{\n"
|
||||
" min[0] = FLT_MAX;\n"
|
||||
" max[0] = -FLT_MAX;\n"
|
||||
" int numVerts = hull->m_numVertices;\n"
|
||||
"\n"
|
||||
" const float4 localDir = qtInvRotate(orn,*dir);\n"
|
||||
" float offset = dot(pos,*dir);\n"
|
||||
" for(int i=0;i<numVerts;i++)\n"
|
||||
@@ -228,14 +185,12 @@ static const char* satKernelsCL= \
|
||||
" min[0] += offset;\n"
|
||||
" max[0] += offset;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n"
|
||||
"const float4* dir, __global const float4* vertices, float* min, float* max)\n"
|
||||
"{\n"
|
||||
" min[0] = FLT_MAX;\n"
|
||||
" max[0] = -FLT_MAX;\n"
|
||||
" int numVerts = hull->m_numVertices;\n"
|
||||
"\n"
|
||||
" const float4 localDir = qtInvRotate(orn,*dir);\n"
|
||||
" float offset = dot(pos,*dir);\n"
|
||||
" for(int i=0;i<numVerts;i++)\n"
|
||||
@@ -255,7 +210,6 @@ static const char* satKernelsCL= \
|
||||
" min[0] += offset;\n"
|
||||
" max[0] += offset;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA,const float4 ornA,\n"
|
||||
" const float4 posB,const float4 ornB,\n"
|
||||
@@ -265,28 +219,19 @@ static const char* satKernelsCL= \
|
||||
" float Min1,Max1;\n"
|
||||
" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n"
|
||||
" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n"
|
||||
"\n"
|
||||
" if(Max0<Min1 || Max1<Min0)\n"
|
||||
" return false;\n"
|
||||
"\n"
|
||||
" float d0 = Max0 - Min1;\n"
|
||||
" float d1 = Max1 - Min0;\n"
|
||||
" *depth = d0<d1 ? d0:d1;\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline bool IsAlmostZero(const float4 v)\n"
|
||||
"{\n"
|
||||
" if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n"
|
||||
" return false;\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA1,\n"
|
||||
" const float4 ornA,\n"
|
||||
@@ -298,7 +243,6 @@ static const char* satKernelsCL= \
|
||||
" const float4* uniqueEdgesA, \n"
|
||||
" const btGpuFace* facesA,\n"
|
||||
" const int* indicesA,\n"
|
||||
"\n"
|
||||
" __global const float4* verticesB, \n"
|
||||
" __global const float4* uniqueEdgesB, \n"
|
||||
" __global const btGpuFace* facesB,\n"
|
||||
@@ -307,7 +251,6 @@ static const char* satKernelsCL= \
|
||||
" float* dmin)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float4 posA = posA1;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = posB1;\n"
|
||||
@@ -339,7 +282,6 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA1,\n"
|
||||
" const float4 ornA,\n"
|
||||
@@ -358,7 +300,6 @@ static const char* satKernelsCL= \
|
||||
" float* dmin)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float4 posA = posA1;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = posB1;\n"
|
||||
@@ -390,9 +331,6 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA1,\n"
|
||||
" const float4 ornA,\n"
|
||||
@@ -411,36 +349,28 @@ static const char* satKernelsCL= \
|
||||
" float* dmin)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float4 posA = posA1;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = posB1;\n"
|
||||
" posB.w = 0.f;\n"
|
||||
"\n"
|
||||
" int curPlaneTests=0;\n"
|
||||
"\n"
|
||||
" int curEdgeEdge = 0;\n"
|
||||
" // Test edges\n"
|
||||
" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n"
|
||||
" {\n"
|
||||
" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n"
|
||||
" float4 edge0World = qtRotate(ornA,edge0);\n"
|
||||
"\n"
|
||||
" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n"
|
||||
" {\n"
|
||||
" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n"
|
||||
" float4 edge1World = qtRotate(ornB,edge1);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 crossje = cross3(edge0World,edge1World);\n"
|
||||
"\n"
|
||||
" curEdgeEdge++;\n"
|
||||
" if(!IsAlmostZero(crossje))\n"
|
||||
" {\n"
|
||||
" crossje = normalize3(crossje);\n"
|
||||
" if (dot3F4(DeltaC2,crossje)<0)\n"
|
||||
" crossje *= -1.f;\n"
|
||||
"\n"
|
||||
" float dist;\n"
|
||||
" bool result = true;\n"
|
||||
" {\n"
|
||||
@@ -456,10 +386,8 @@ static const char* satKernelsCL= \
|
||||
" float d1 = Max1 - Min0;\n"
|
||||
" dist = d0<d1 ? d0:d1;\n"
|
||||
" result = true;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if(dist<*dmin)\n"
|
||||
" {\n"
|
||||
" *dmin = dist;\n"
|
||||
@@ -467,18 +395,14 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if((dot3F4(-DeltaC2,*sep))>0.0f)\n"
|
||||
" {\n"
|
||||
" *sep = -(*sep);\n"
|
||||
" }\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA,const float4 ornA,\n"
|
||||
" const float4 posB,const float4 ornB,\n"
|
||||
@@ -488,17 +412,13 @@ static const char* satKernelsCL= \
|
||||
" float Min1,Max1;\n"
|
||||
" project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n"
|
||||
" project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n"
|
||||
"\n"
|
||||
" if(Max0<Min1 || Max1<Min0)\n"
|
||||
" return false;\n"
|
||||
"\n"
|
||||
" float d0 = Max0 - Min1;\n"
|
||||
" float d1 = Max1 - Min0;\n"
|
||||
" *depth = d0<d1 ? d0:d1;\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA1,\n"
|
||||
" const float4 ornA,\n"
|
||||
@@ -513,14 +433,12 @@ static const char* satKernelsCL= \
|
||||
" float* dmin)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float4 posA = posA1;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = posB1;\n"
|
||||
" posB.w = 0.f;\n"
|
||||
" \n"
|
||||
" int curPlaneTests=0;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" int numFacesA = hullA->m_numFaces;\n"
|
||||
" // Test normals from hullA\n"
|
||||
@@ -545,8 +463,6 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if((dot3F4(-DeltaC2,*sep))>0.0f)\n"
|
||||
" {\n"
|
||||
" *sep = -(*sep);\n"
|
||||
@@ -554,10 +470,6 @@ static const char* satKernelsCL= \
|
||||
" \n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n"
|
||||
" const float4 posA1,\n"
|
||||
" const float4 ornA,\n"
|
||||
@@ -572,29 +484,22 @@ static const char* satKernelsCL= \
|
||||
" float* dmin)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float4 posA = posA1;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = posB1;\n"
|
||||
" posB.w = 0.f;\n"
|
||||
"\n"
|
||||
" int curPlaneTests=0;\n"
|
||||
"\n"
|
||||
" int curEdgeEdge = 0;\n"
|
||||
" // Test edges\n"
|
||||
" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n"
|
||||
" {\n"
|
||||
" const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n"
|
||||
" float4 edge0World = qtRotate(ornA,edge0);\n"
|
||||
"\n"
|
||||
" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n"
|
||||
" {\n"
|
||||
" const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n"
|
||||
" float4 edge1World = qtRotate(ornB,edge1);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 crossje = cross3(edge0World,edge1World);\n"
|
||||
"\n"
|
||||
" curEdgeEdge++;\n"
|
||||
" if(!IsAlmostZero(crossje))\n"
|
||||
" {\n"
|
||||
@@ -617,10 +522,8 @@ static const char* satKernelsCL= \
|
||||
" float d1 = Max1 - Min0;\n"
|
||||
" dist = d0<d1 ? d0:d1;\n"
|
||||
" result = true;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if(dist<*dmin)\n"
|
||||
" {\n"
|
||||
" *dmin = dist;\n"
|
||||
@@ -628,18 +531,14 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if((dot3F4(-DeltaC2,*sep))>0.0f)\n"
|
||||
" {\n"
|
||||
" *sep = -(*sep);\n"
|
||||
" }\n"
|
||||
" return true;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
@@ -656,13 +555,11 @@ static const char* satKernelsCL= \
|
||||
" int numCompoundPairs\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i<numCompoundPairs)\n"
|
||||
" {\n"
|
||||
" int bodyIndexA = gpuCompoundPairs[i].x;\n"
|
||||
" int bodyIndexB = gpuCompoundPairs[i].y;\n"
|
||||
"\n"
|
||||
" int childShapeIndexA = gpuCompoundPairs[i].z;\n"
|
||||
" int childShapeIndexB = gpuCompoundPairs[i].w;\n"
|
||||
" \n"
|
||||
@@ -711,12 +608,10 @@ static const char* satKernelsCL= \
|
||||
" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n"
|
||||
" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n"
|
||||
" {\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" int hasSeparatingAxis = 5;\n"
|
||||
" \n"
|
||||
" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
|
||||
@@ -737,7 +632,6 @@ static const char* satKernelsCL= \
|
||||
" } else\n"
|
||||
" {\n"
|
||||
" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n"
|
||||
"\n"
|
||||
" if (!sepB)\n"
|
||||
" {\n"
|
||||
" hasSeparatingAxis = 0;\n"
|
||||
@@ -756,7 +650,6 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findCompoundPairsKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
@@ -774,48 +667,36 @@ static const char* satKernelsCL= \
|
||||
" int maxNumCompoundPairsCapacity\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
"\n"
|
||||
" if (i<numPairs)\n"
|
||||
" {\n"
|
||||
" int bodyIndexA = pairs[i].x;\n"
|
||||
" int bodyIndexB = pairs[i].y;\n"
|
||||
"\n"
|
||||
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
|
||||
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
|
||||
"\n"
|
||||
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
|
||||
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" //once the broadphase avoids static-static pairs, we can remove this test\n"
|
||||
" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n"
|
||||
" {\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n"
|
||||
" for (int c=0;c<numChildrenA;c++)\n"
|
||||
" {\n"
|
||||
" int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n"
|
||||
" int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n"
|
||||
"\n"
|
||||
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n"
|
||||
" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n"
|
||||
" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n"
|
||||
" float4 newOrnA = qtMul(ornA,childOrnA);\n"
|
||||
"\n"
|
||||
" int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n"
|
||||
" {\n"
|
||||
" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n"
|
||||
@@ -829,9 +710,7 @@ static const char* satKernelsCL= \
|
||||
" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
|
||||
" float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
|
||||
" float4 newOrnB = qtMul(ornB,childOrnB);\n"
|
||||
"\n"
|
||||
" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n"
|
||||
"\n"
|
||||
" if (1)\n"
|
||||
" {\n"
|
||||
" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
|
||||
@@ -847,7 +726,6 @@ static const char* satKernelsCL= \
|
||||
" float4 ornB =newOrnB;\n"
|
||||
" float4 c1 = transform(&c1local,&posB,&ornB);\n"
|
||||
" const float4 DeltaC2 = c0 - c1;\n"
|
||||
"\n"
|
||||
" {//\n"
|
||||
" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n"
|
||||
" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n"
|
||||
@@ -875,7 +753,6 @@ static const char* satKernelsCL= \
|
||||
" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n"
|
||||
" float4 c1 = transform(&c1local,&posB,&ornB);\n"
|
||||
" const float4 DeltaC2 = c0 - c1;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n"
|
||||
" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n"
|
||||
@@ -902,12 +779,8 @@ static const char* satKernelsCL= \
|
||||
" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
|
||||
" float4 newPosB = qtRotate(ornB,childPosB)+posB;\n"
|
||||
" float4 newOrnB = qtMul(ornB,childOrnB);\n"
|
||||
"\n"
|
||||
" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" //////////////////////////////////////\n"
|
||||
"\n"
|
||||
" if (1)\n"
|
||||
" {\n"
|
||||
" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
|
||||
@@ -938,7 +811,6 @@ static const char* satKernelsCL= \
|
||||
" }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n"
|
||||
" }//i<numPairs\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findSeparatingAxisKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
@@ -954,16 +826,13 @@ static const char* satKernelsCL= \
|
||||
" int numPairs\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" \n"
|
||||
" if (i<numPairs)\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" int bodyIndexA = pairs[i].x;\n"
|
||||
" int bodyIndexB = pairs[i].y;\n"
|
||||
"\n"
|
||||
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
|
||||
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
|
||||
" \n"
|
||||
@@ -978,7 +847,6 @@ static const char* satKernelsCL= \
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n"
|
||||
" {\n"
|
||||
" hasSeparatingAxis[i] = 0;\n"
|
||||
@@ -990,11 +858,8 @@ static const char* satKernelsCL= \
|
||||
" hasSeparatingAxis[i] = 0;\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
|
||||
"\n"
|
||||
" float dmin = FLT_MAX;\n"
|
||||
"\n"
|
||||
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
@@ -1024,7 +889,6 @@ static const char* satKernelsCL= \
|
||||
" DeltaC2,\n"
|
||||
" vertices,uniqueEdges,faces,\n"
|
||||
" indices,&sepNormal,&dmin);\n"
|
||||
"\n"
|
||||
" if (!sepB)\n"
|
||||
" {\n"
|
||||
" hasSeparatingAxis[i] = 0;\n"
|
||||
@@ -1047,12 +911,7 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
@@ -1068,30 +927,22 @@ static const char* satKernelsCL= \
|
||||
" int numConcavePairs\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numConcavePairs)\n"
|
||||
" return;\n"
|
||||
" int pairIdx = i;\n"
|
||||
"\n"
|
||||
" int bodyIndexA = concavePairs[i].x;\n"
|
||||
" int bodyIndexB = concavePairs[i].y;\n"
|
||||
"\n"
|
||||
" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
|
||||
" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
|
||||
"\n"
|
||||
" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
|
||||
" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n"
|
||||
" collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n"
|
||||
" {\n"
|
||||
" concavePairs[pairIdx].w = -1;\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
|
||||
" int numActualConcaveConvexTests = 0;\n"
|
||||
" \n"
|
||||
@@ -1100,12 +951,10 @@ static const char* satKernelsCL= \
|
||||
" bool overlap = false;\n"
|
||||
" \n"
|
||||
" ConvexPolyhedronCL convexPolyhedronA;\n"
|
||||
"\n"
|
||||
" //add 3 vertices of the triangle\n"
|
||||
" convexPolyhedronA.m_numVertices = 3;\n"
|
||||
" convexPolyhedronA.m_vertexOffset = 0;\n"
|
||||
" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
|
||||
"\n"
|
||||
" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
|
||||
" float4 triMinAabb, triMaxAabb;\n"
|
||||
" btAabbCL triAabb;\n"
|
||||
@@ -1122,9 +971,7 @@ static const char* satKernelsCL= \
|
||||
" \n"
|
||||
" triAabb.m_min = min(triAabb.m_min,vert); \n"
|
||||
" triAabb.m_max = max(triAabb.m_max,vert); \n"
|
||||
"\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" overlap = true;\n"
|
||||
" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n"
|
||||
" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n"
|
||||
@@ -1135,10 +982,8 @@ static const char* satKernelsCL= \
|
||||
" float dmin = FLT_MAX;\n"
|
||||
" int hasSeparatingAxis=5;\n"
|
||||
" float4 sepAxis=make_float4(1,2,3,4);\n"
|
||||
"\n"
|
||||
" int localCC=0;\n"
|
||||
" numActualConcaveConvexTests++;\n"
|
||||
"\n"
|
||||
" //a triangle has 3 unique edges\n"
|
||||
" convexPolyhedronA.m_numUniqueEdges = 3;\n"
|
||||
" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n"
|
||||
@@ -1147,8 +992,6 @@ static const char* satKernelsCL= \
|
||||
" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n"
|
||||
" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n"
|
||||
" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" convexPolyhedronA.m_faceOffset = 0;\n"
|
||||
" \n"
|
||||
" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
|
||||
@@ -1157,7 +1000,6 @@ static const char* satKernelsCL= \
|
||||
" int indicesA[3+3+2+2+2];\n"
|
||||
" int curUsedIndices=0;\n"
|
||||
" int fidx=0;\n"
|
||||
"\n"
|
||||
" //front size of triangle\n"
|
||||
" {\n"
|
||||
" facesA[fidx].m_indexOffset=curUsedIndices;\n"
|
||||
@@ -1189,7 +1031,6 @@ static const char* satKernelsCL= \
|
||||
" facesA[fidx].m_numIndices=3;\n"
|
||||
" }\n"
|
||||
" fidx++;\n"
|
||||
"\n"
|
||||
" bool addEdgePlanes = true;\n"
|
||||
" if (addEdgePlanes)\n"
|
||||
" {\n"
|
||||
@@ -1202,7 +1043,6 @@ static const char* satKernelsCL= \
|
||||
" \n"
|
||||
" float4 edgeNormal = normalize(cross(normal,v1-v0));\n"
|
||||
" float c = -dot(edgeNormal,v0);\n"
|
||||
"\n"
|
||||
" facesA[fidx].m_numIndices = 2;\n"
|
||||
" facesA[fidx].m_indexOffset=curUsedIndices;\n"
|
||||
" indicesA[curUsedIndices++]=i;\n"
|
||||
@@ -1218,22 +1058,15 @@ static const char* satKernelsCL= \
|
||||
" }\n"
|
||||
" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n"
|
||||
" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
|
||||
" posA.w = 0.f;\n"
|
||||
" float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
|
||||
" posB.w = 0.f;\n"
|
||||
"\n"
|
||||
" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
|
||||
" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
" ///////////////////\n"
|
||||
" ///compound shape support\n"
|
||||
"\n"
|
||||
" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n"
|
||||
" {\n"
|
||||
" int compoundChild = concavePairs[pairIdx].w;\n"
|
||||
@@ -1248,14 +1081,11 @@ static const char* satKernelsCL= \
|
||||
" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n"
|
||||
" }\n"
|
||||
" //////////////////\n"
|
||||
"\n"
|
||||
" float4 c0local = convexPolyhedronA.m_localCenter;\n"
|
||||
" float4 c0 = transform(&c0local, &posA, &ornA);\n"
|
||||
" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n"
|
||||
" float4 c1 = transform(&c1local,&posB,&ornB);\n"
|
||||
" const float4 DeltaC2 = c0 - c1;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n"
|
||||
" posA,ornA,\n"
|
||||
" posB,ornB,\n"
|
||||
@@ -1276,7 +1106,6 @@ static const char* satKernelsCL= \
|
||||
" vertices,uniqueEdges,faces,indices,\n"
|
||||
" verticesA,uniqueEdgesA,facesA,indicesA,\n"
|
||||
" &sepAxis,&dmin);\n"
|
||||
"\n"
|
||||
" if (!sepB)\n"
|
||||
" {\n"
|
||||
" hasSeparatingAxis = 0;\n"
|
||||
|
||||
@@ -2,59 +2,45 @@
|
||||
static const char* boundSearchKernelsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" u32 m_key; \n"
|
||||
" u32 m_value;\n"
|
||||
"}SortData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" u32 m_nSrc;\n"
|
||||
" u32 m_nDst;\n"
|
||||
" u32 m_padding[2];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n"
|
||||
" unsigned int nSrc, unsigned int nDst)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc )\n"
|
||||
" {\n"
|
||||
" SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n"
|
||||
" SortData end; end.m_key = nDst; end.m_value = nDst;\n"
|
||||
"\n"
|
||||
" SortData iData = (gIdx==0)? first: src[gIdx-1];\n"
|
||||
" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
"// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n"
|
||||
@@ -65,23 +51,18 @@ static const char* boundSearchKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n"
|
||||
" unsigned int nSrc, unsigned int nDst)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX+1;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc+1 )\n"
|
||||
" {\n"
|
||||
" SortData first; first.m_key = 0; first.m_value = 0;\n"
|
||||
" SortData end; end.m_key = nDst; end.m_value = nDst;\n"
|
||||
"\n"
|
||||
" SortData iData = src[gIdx-1];\n"
|
||||
" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
" u32 k = iData.m_key;\n"
|
||||
@@ -91,7 +72,6 @@ static const char* boundSearchKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n"
|
||||
@@ -99,11 +79,9 @@ static const char* boundSearchKernelsCL= \
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" if( gIdx < nDst )\n"
|
||||
" {\n"
|
||||
" C[gIdx] = A[gIdx] - B[gIdx];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -2,23 +2,18 @@
|
||||
static const char* fillKernelsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
@@ -28,11 +23,9 @@ static const char* fillKernelsCL= \
|
||||
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"#define AtomInc(x) atom_inc(&(x))\n"
|
||||
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
|
||||
"\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
@@ -45,66 +38,54 @@ static const char* fillKernelsCL= \
|
||||
" int m_n;\n"
|
||||
" int m_padding[2];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < num_elements )\n"
|
||||
" {\n"
|
||||
" dstInt[ offset+gIdx ] = value;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < num_elements )\n"
|
||||
" {\n"
|
||||
" dstFloat[ offset+gIdx ] = value;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < num )\n"
|
||||
" {\n"
|
||||
" dstInt[ offset+gIdx ] = value;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < num )\n"
|
||||
" {\n"
|
||||
" dstInt2[ gIdx + offset] = make_int2( value.x, value.y );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < num )\n"
|
||||
" {\n"
|
||||
" dstInt4[ offset+gIdx ] = value;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -2,33 +2,27 @@
|
||||
static const char* prefixScanKernelsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"\n"
|
||||
"// takahiro end\n"
|
||||
"#define WG_SIZE 128 \n"
|
||||
"#define m_numElems x\n"
|
||||
"#define m_numBlocks y\n"
|
||||
"#define m_numScanBlocks z\n"
|
||||
"\n"
|
||||
"/*typedef struct\n"
|
||||
"{\n"
|
||||
" uint m_numElems;\n"
|
||||
@@ -37,7 +31,6 @@ static const char* prefixScanKernelsCL= \
|
||||
" uint m_padding[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize)\n"
|
||||
"{\n"
|
||||
" u32 blocksum;\n"
|
||||
@@ -52,17 +45,13 @@ static const char* prefixScanKernelsCL= \
|
||||
" data[bi] += data[ai];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" blocksum = data[ n-1 ];\n"
|
||||
" data[ n-1 ] = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" offset >>= 1;\n"
|
||||
" for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n"
|
||||
" {\n"
|
||||
@@ -77,27 +66,20 @@ static const char* prefixScanKernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" return blocksum;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer,\n"
|
||||
" uint4 cb)\n"
|
||||
"{\n"
|
||||
" __local u32 ldsData[WG_SIZE*2];\n"
|
||||
"\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
"\n"
|
||||
" ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n"
|
||||
" ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n"
|
||||
"\n"
|
||||
" u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum;\n"
|
||||
"\n"
|
||||
" if( (2*gIdx) < cb.m_numElems )\n"
|
||||
" {\n"
|
||||
" dst[2*gIdx] = ldsData[2*lIdx];\n"
|
||||
@@ -107,25 +89,20 @@ static const char* prefixScanKernelsCL= \
|
||||
" dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb)\n"
|
||||
"{\n"
|
||||
" const u32 blockSize = WG_SIZE*2;\n"
|
||||
"\n"
|
||||
" int myIdx = GET_GROUP_IDX+1;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
"\n"
|
||||
" u32 iBlockSum = blockSum[myIdx];\n"
|
||||
"\n"
|
||||
" int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n"
|
||||
" for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n"
|
||||
" {\n"
|
||||
" dst[i] += iBlockSum;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void TopLevelScanKernel(__global u32* dst, uint4 cb)\n"
|
||||
@@ -134,21 +111,16 @@ static const char* prefixScanKernelsCL= \
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" int lSize = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n"
|
||||
" {\n"
|
||||
" ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" u32 sum = ScanExclusive(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n"
|
||||
" {\n"
|
||||
" dst[i] = ldsData[i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( gIdx == 0 )\n"
|
||||
" {\n"
|
||||
" dst[cb.m_numBlocks] = sum;\n"
|
||||
|
||||
@@ -2,33 +2,27 @@
|
||||
static const char* prefixScanKernelsFloat4CL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"\n"
|
||||
"// takahiro end\n"
|
||||
"#define WG_SIZE 128 \n"
|
||||
"#define m_numElems x\n"
|
||||
"#define m_numBlocks y\n"
|
||||
"#define m_numScanBlocks z\n"
|
||||
"\n"
|
||||
"/*typedef struct\n"
|
||||
"{\n"
|
||||
" uint m_numElems;\n"
|
||||
@@ -37,7 +31,6 @@ static const char* prefixScanKernelsFloat4CL= \
|
||||
" uint m_padding[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize)\n"
|
||||
"{\n"
|
||||
" float4 blocksum;\n"
|
||||
@@ -52,17 +45,13 @@ static const char* prefixScanKernelsFloat4CL= \
|
||||
" data[bi] += data[ai];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" blocksum = data[ n-1 ];\n"
|
||||
" data[ n-1 ] = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" offset >>= 1;\n"
|
||||
" for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n"
|
||||
" {\n"
|
||||
@@ -77,27 +66,20 @@ static const char* prefixScanKernelsFloat4CL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" return blocksum;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb)\n"
|
||||
"{\n"
|
||||
" __local float4 ldsData[WG_SIZE*2];\n"
|
||||
"\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
"\n"
|
||||
" ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n"
|
||||
" ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n"
|
||||
"\n"
|
||||
" float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) \n"
|
||||
" sumBuffer[GET_GROUP_IDX] = sum;\n"
|
||||
"\n"
|
||||
" if( (2*gIdx) < cb.m_numElems )\n"
|
||||
" {\n"
|
||||
" dst[2*gIdx] = ldsData[2*lIdx];\n"
|
||||
@@ -107,25 +89,20 @@ static const char* prefixScanKernelsFloat4CL= \
|
||||
" dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb)\n"
|
||||
"{\n"
|
||||
" const u32 blockSize = WG_SIZE*2;\n"
|
||||
"\n"
|
||||
" int myIdx = GET_GROUP_IDX+1;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
"\n"
|
||||
" float4 iBlockSum = blockSum[myIdx];\n"
|
||||
"\n"
|
||||
" int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n"
|
||||
" for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n"
|
||||
" {\n"
|
||||
" dst[i] += iBlockSum;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void TopLevelScanKernel(__global float4* dst, uint4 cb)\n"
|
||||
@@ -134,21 +111,16 @@ static const char* prefixScanKernelsFloat4CL= \
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" int lSize = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n"
|
||||
" {\n"
|
||||
" ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" float4 sum = ScanExclusiveFloat4(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n"
|
||||
" {\n"
|
||||
" dst[i] = ldsData[i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( gIdx == 0 )\n"
|
||||
" {\n"
|
||||
" dst[cb.m_numBlocks] = sum;\n"
|
||||
|
||||
@@ -3,24 +3,19 @@ static const char* radixSort32KernelsCL= \
|
||||
"/*\n"
|
||||
"Bullet Continuous Collision Detection and Physics Library\n"
|
||||
"Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org\n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Author Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
@@ -31,38 +26,27 @@ static const char* radixSort32KernelsCL= \
|
||||
"#define AtomInc(x) atom_inc(&(x))\n"
|
||||
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE)\n"
|
||||
"#define BITS_PER_PASS 4\n"
|
||||
"#define NUM_BUCKET (1<<BITS_PER_PASS)\n"
|
||||
"typedef uchar u8;\n"
|
||||
"\n"
|
||||
"// this isn't optimization for VLIW. But just reducing writes. \n"
|
||||
"#define USE_2LEVEL_REDUCE 1\n"
|
||||
"\n"
|
||||
"//#define CHECK_BOUNDARY 1\n"
|
||||
"\n"
|
||||
"//#define NV_GPU 1\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// Cypress\n"
|
||||
"#define nPerWI 16\n"
|
||||
"// Cayman\n"
|
||||
"//#define nPerWI 20\n"
|
||||
"\n"
|
||||
"#define m_n x\n"
|
||||
"#define m_nWGs y\n"
|
||||
"#define m_startBit z\n"
|
||||
"#define m_nBlocksPerWG w\n"
|
||||
"\n"
|
||||
"/*\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
@@ -72,14 +56,11 @@ static const char* radixSort32KernelsCL= \
|
||||
" int m_nBlocksPerWG;\n"
|
||||
"} ConstBuffer;\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" unsigned int m_key;\n"
|
||||
" unsigned int m_value;\n"
|
||||
"} SortDataCL;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"uint prefixScanVectorEx( uint4* data )\n"
|
||||
"{\n"
|
||||
" u32 sum = 0;\n"
|
||||
@@ -97,16 +78,13 @@ static const char* radixSort32KernelsCL= \
|
||||
" sum += tmp;\n"
|
||||
" return sum;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 localPrefixSum( u32 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory, int wgSize /*64 or 128*/ )\n"
|
||||
"{\n"
|
||||
" { // Set data\n"
|
||||
" sorterSharedMemory[lIdx] = 0;\n"
|
||||
" sorterSharedMemory[lIdx+wgSize] = pData;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" { // Prefix sum\n"
|
||||
" int idx = 2*lIdx + (wgSize+1);\n"
|
||||
"#if defined(USE_2LEVEL_REDUCE)\n"
|
||||
@@ -118,13 +96,11 @@ static const char* radixSort32KernelsCL= \
|
||||
" u2 = sorterSharedMemory[idx-1];\n"
|
||||
" AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
"\n"
|
||||
" u0 = sorterSharedMemory[idx-12];\n"
|
||||
" u1 = sorterSharedMemory[idx-8];\n"
|
||||
" u2 = sorterSharedMemory[idx-4];\n"
|
||||
" AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
"\n"
|
||||
" u0 = sorterSharedMemory[idx-48];\n"
|
||||
" u1 = sorterSharedMemory[idx-32];\n"
|
||||
" u2 = sorterSharedMemory[idx-16];\n"
|
||||
@@ -135,7 +111,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
" }\n"
|
||||
@@ -159,20 +134,16 @@ static const char* radixSort32KernelsCL= \
|
||||
" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" *totalSum = sorterSharedMemory[wgSize*2-1];\n"
|
||||
" u32 addValue = sorterSharedMemory[lIdx+wgSize-1];\n"
|
||||
" return addValue;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"//__attribute__((reqd_work_group_size(128,1,1)))\n"
|
||||
"uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n"
|
||||
"{\n"
|
||||
@@ -180,8 +151,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 );\n"
|
||||
" return pData + make_uint4( rank, rank, rank, rank );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n"
|
||||
"{\n"
|
||||
@@ -189,28 +158,18 @@ static const char* radixSort32KernelsCL= \
|
||||
" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 );\n"
|
||||
" return pData + make_uint4( rank, rank, rank, rank );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;}\n"
|
||||
"\n"
|
||||
"u32 bit8Scan(u32 v)\n"
|
||||
"{\n"
|
||||
" return (v<<8) + (v<<16) + (v<<24);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"//===\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb )\n"
|
||||
"{\n"
|
||||
" __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n"
|
||||
"\n"
|
||||
" u32 gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 lIdx = GET_LOCAL_IDX;\n"
|
||||
" u32 wgIdx = GET_GROUP_IDX;\n"
|
||||
@@ -219,21 +178,15 @@ static const char* radixSort32KernelsCL= \
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<NUM_BUCKET; i++)\n"
|
||||
" {\n"
|
||||
" MY_HISTOGRAM(i) = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
" u32 localKey;\n"
|
||||
"\n"
|
||||
" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n"
|
||||
" {\n"
|
||||
" // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n"
|
||||
@@ -254,7 +207,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" \n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
@@ -267,13 +219,11 @@ static const char* radixSort32KernelsCL= \
|
||||
" histogramOut[lIdx*nWGs+wgIdx] = sum;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void StreamCountSortDataKernel( __global SortDataCL* gSrc, __global u32* histogramOut, int4 cb )\n"
|
||||
"{\n"
|
||||
" __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n"
|
||||
"\n"
|
||||
" u32 gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 lIdx = GET_LOCAL_IDX;\n"
|
||||
" u32 wgIdx = GET_GROUP_IDX;\n"
|
||||
@@ -282,21 +232,15 @@ static const char* radixSort32KernelsCL= \
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<NUM_BUCKET; i++)\n"
|
||||
" {\n"
|
||||
" MY_HISTOGRAM(i) = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
" u32 localKey;\n"
|
||||
"\n"
|
||||
" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n"
|
||||
" {\n"
|
||||
" // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n"
|
||||
@@ -317,7 +261,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" \n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
@@ -330,20 +273,16 @@ static const char* radixSort32KernelsCL= \
|
||||
" histogramOut[lIdx*nWGs+wgIdx] = sum;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#define nPerLane (nPerWI/4)\n"
|
||||
"\n"
|
||||
"// NUM_BUCKET*nWGs < 128*nPerWI\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(128,1,1)))\n"
|
||||
"void PrefixScanKernel( __global u32* wHistogram1, int4 cb )\n"
|
||||
"{\n"
|
||||
" __local u32 ldsTopScanData[128*2];\n"
|
||||
"\n"
|
||||
" u32 lIdx = GET_LOCAL_IDX;\n"
|
||||
" u32 wgIdx = GET_GROUP_IDX;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
"\n"
|
||||
" u32 data[nPerWI];\n"
|
||||
" for(int i=0; i<nPerWI; i++)\n"
|
||||
" {\n"
|
||||
@@ -351,9 +290,7 @@ static const char* radixSort32KernelsCL= \
|
||||
" if( (nPerWI*lIdx+i) < NUM_BUCKET*nWGs )\n"
|
||||
" data[i] = wHistogram1[nPerWI*lIdx+i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" uint4 myData = make_uint4(0,0,0,0);\n"
|
||||
"\n"
|
||||
" for(int i=0; i<nPerLane; i++)\n"
|
||||
" {\n"
|
||||
" myData.x += data[nPerLane*0+i];\n"
|
||||
@@ -361,10 +298,8 @@ static const char* radixSort32KernelsCL= \
|
||||
" myData.z += data[nPerLane*2+i];\n"
|
||||
" myData.w += data[nPerLane*3+i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" uint totalSum;\n"
|
||||
" uint4 scanned = localPrefixSum128V( myData, lIdx, &totalSum, ldsTopScanData );\n"
|
||||
"\n"
|
||||
"// for(int j=0; j<4; j++) // somehow it introduces a lot of branches\n"
|
||||
" { int j = 0;\n"
|
||||
" u32 sum = 0;\n"
|
||||
@@ -402,7 +337,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" sum += tmp;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" for(int i=0; i<nPerLane; i++)\n"
|
||||
" {\n"
|
||||
" data[nPerLane*0+i] += scanned.x;\n"
|
||||
@@ -410,7 +344,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" data[nPerLane*2+i] += scanned.z;\n"
|
||||
" data[nPerLane*3+i] += scanned.w;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" for(int i=0; i<nPerWI; i++)\n"
|
||||
" {\n"
|
||||
" int index = nPerWI*lIdx+i;\n"
|
||||
@@ -418,7 +351,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" wHistogram1[nPerWI*lIdx+i] = data[i];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// 4 scan, 4 exchange\n"
|
||||
"void sort4Bits(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n"
|
||||
"{\n"
|
||||
@@ -433,26 +365,20 @@ static const char* radixSort32KernelsCL= \
|
||||
" uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3);\n"
|
||||
" uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total );\n"
|
||||
" dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) );\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" ldsSortData[dstAddr.x] = sortData[0];\n"
|
||||
" ldsSortData[dstAddr.y] = sortData[1];\n"
|
||||
" ldsSortData[dstAddr.z] = sortData[2];\n"
|
||||
" ldsSortData[dstAddr.w] = sortData[3];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" sortData[0] = ldsSortData[localAddr.x];\n"
|
||||
" sortData[1] = ldsSortData[localAddr.y];\n"
|
||||
" sortData[2] = ldsSortData[localAddr.z];\n"
|
||||
" sortData[3] = ldsSortData[localAddr.w];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// 2 scan, 2 exchange\n"
|
||||
"void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n"
|
||||
"{\n"
|
||||
@@ -462,7 +388,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" (sortData[1]>>(startBit+ibit)) & 0x3, \n"
|
||||
" (sortData[2]>>(startBit+ibit)) & 0x3, \n"
|
||||
" (sortData[3]>>(startBit+ibit)) & 0x3);\n"
|
||||
"\n"
|
||||
" u32 key4;\n"
|
||||
" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n"
|
||||
" {\n"
|
||||
@@ -470,22 +395,17 @@ static const char* radixSort32KernelsCL= \
|
||||
" sKeyPacked[1] |= 1<<(8*b.y);\n"
|
||||
" sKeyPacked[2] |= 1<<(8*b.z);\n"
|
||||
" sKeyPacked[3] |= 1<<(8*b.w);\n"
|
||||
"\n"
|
||||
" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" u32 rankPacked;\n"
|
||||
" u32 sumPacked;\n"
|
||||
" {\n"
|
||||
" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" u32 newOffset[4] = { 0,0,0,0 };\n"
|
||||
" {\n"
|
||||
" u32 sumScanned = bit8Scan( sumPacked );\n"
|
||||
"\n"
|
||||
" u32 scannedKeys[4];\n"
|
||||
" scannedKeys[0] = 1<<(8*b.x);\n"
|
||||
" scannedKeys[1] = 1<<(8*b.y);\n"
|
||||
@@ -500,7 +420,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" sum4 += tmp;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" u32 sumPlusRank = sumScanned + rankPacked;\n"
|
||||
" { u32 ie = b.x;\n"
|
||||
@@ -521,31 +440,23 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" ldsSortData[newOffset[0]] = sortData[0];\n"
|
||||
" ldsSortData[newOffset[1]] = sortData[1];\n"
|
||||
" ldsSortData[newOffset[2]] = sortData[2];\n"
|
||||
" ldsSortData[newOffset[3]] = sortData[3];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" u32 dstAddr = 4*lIdx;\n"
|
||||
" sortData[0] = ldsSortData[dstAddr+0];\n"
|
||||
" sortData[1] = ldsSortData[dstAddr+1];\n"
|
||||
" sortData[2] = ldsSortData[dstAddr+2];\n"
|
||||
" sortData[3] = ldsSortData[dstAddr+3];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n"
|
||||
@@ -553,34 +464,25 @@ static const char* radixSort32KernelsCL= \
|
||||
" __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n"
|
||||
" __local u32 localHistogramToCarry[NUM_BUCKET];\n"
|
||||
" __local u32 localHistogram[NUM_BUCKET*2];\n"
|
||||
"\n"
|
||||
" u32 gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 lIdx = GET_LOCAL_IDX;\n"
|
||||
" u32 wgIdx = GET_GROUP_IDX;\n"
|
||||
" u32 wgSize = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int startBit = cb.m_startBit;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" if( lIdx < (NUM_BUCKET) )\n"
|
||||
" {\n"
|
||||
" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
"\n"
|
||||
" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n"
|
||||
" {\n"
|
||||
" u32 myHistogram = 0;\n"
|
||||
"\n"
|
||||
" u32 sortData[ELEMENTS_PER_WORK_ITEM];\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
"#if defined(CHECK_BOUNDARY)\n"
|
||||
@@ -588,13 +490,10 @@ static const char* radixSort32KernelsCL= \
|
||||
"#else\n"
|
||||
" sortData[i] = gSrc[ addr+i ];\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" sort4Bits(sortData, startBit, lIdx, ldsSortData);\n"
|
||||
"\n"
|
||||
" u32 keys[ELEMENTS_PER_WORK_ITEM];\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
" keys[i] = (sortData[i]>>startBit) & 0xf;\n"
|
||||
"\n"
|
||||
" { // create histogram\n"
|
||||
" u32 setIdx = lIdx/16;\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
@@ -603,12 +502,10 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" ldsSortData[lIdx] = 0;\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
"#if defined(CHECK_BOUNDARY)\n"
|
||||
" if( addr+i < n )\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#if defined(NV_GPU)\n"
|
||||
" SET_HISTOGRAM( setIdx, keys[i] )++;\n"
|
||||
"#else\n"
|
||||
@@ -629,13 +526,11 @@ static const char* radixSort32KernelsCL= \
|
||||
" localHistogram[hIdx] = sum;\n"
|
||||
" }\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
"#if defined(USE_2LEVEL_REDUCE)\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
" {\n"
|
||||
" localHistogram[hIdx] = localHistogram[hIdx-1];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
"\n"
|
||||
" u32 u0, u1, u2;\n"
|
||||
" u0 = localHistogram[hIdx-3];\n"
|
||||
" u1 = localHistogram[hIdx-2];\n"
|
||||
@@ -665,7 +560,6 @@ static const char* radixSort32KernelsCL= \
|
||||
"#endif\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n"
|
||||
" {\n"
|
||||
@@ -679,9 +573,7 @@ static const char* radixSort32KernelsCL= \
|
||||
" gDst[ groupOffset + myIdx ] = sortData[ie];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
" {\n"
|
||||
" localHistogramToCarry[lIdx] += myHistogram;\n"
|
||||
@@ -689,7 +581,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// 2 scan, 2 exchange\n"
|
||||
"void sort4Bits1KeyValue(u32 sortData[4], int sortVal[4], int startBit, int lIdx, __local u32* ldsSortData, __local int *ldsSortVal)\n"
|
||||
"{\n"
|
||||
@@ -699,7 +590,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" (sortData[1]>>(startBit+ibit)) & 0x3, \n"
|
||||
" (sortData[2]>>(startBit+ibit)) & 0x3, \n"
|
||||
" (sortData[3]>>(startBit+ibit)) & 0x3);\n"
|
||||
"\n"
|
||||
" u32 key4;\n"
|
||||
" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n"
|
||||
" {\n"
|
||||
@@ -707,22 +597,17 @@ static const char* radixSort32KernelsCL= \
|
||||
" sKeyPacked[1] |= 1<<(8*b.y);\n"
|
||||
" sKeyPacked[2] |= 1<<(8*b.z);\n"
|
||||
" sKeyPacked[3] |= 1<<(8*b.w);\n"
|
||||
"\n"
|
||||
" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" u32 rankPacked;\n"
|
||||
" u32 sumPacked;\n"
|
||||
" {\n"
|
||||
" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" u32 newOffset[4] = { 0,0,0,0 };\n"
|
||||
" {\n"
|
||||
" u32 sumScanned = bit8Scan( sumPacked );\n"
|
||||
"\n"
|
||||
" u32 scannedKeys[4];\n"
|
||||
" scannedKeys[0] = 1<<(8*b.x);\n"
|
||||
" scannedKeys[1] = 1<<(8*b.y);\n"
|
||||
@@ -737,7 +622,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" sum4 += tmp;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" u32 sumPlusRank = sumScanned + rankPacked;\n"
|
||||
" { u32 ie = b.x;\n"
|
||||
@@ -758,42 +642,30 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" ldsSortData[newOffset[0]] = sortData[0];\n"
|
||||
" ldsSortData[newOffset[1]] = sortData[1];\n"
|
||||
" ldsSortData[newOffset[2]] = sortData[2];\n"
|
||||
" ldsSortData[newOffset[3]] = sortData[3];\n"
|
||||
"\n"
|
||||
" ldsSortVal[newOffset[0]] = sortVal[0];\n"
|
||||
" ldsSortVal[newOffset[1]] = sortVal[1];\n"
|
||||
" ldsSortVal[newOffset[2]] = sortVal[2];\n"
|
||||
" ldsSortVal[newOffset[3]] = sortVal[3];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" u32 dstAddr = 4*lIdx;\n"
|
||||
" sortData[0] = ldsSortData[dstAddr+0];\n"
|
||||
" sortData[1] = ldsSortData[dstAddr+1];\n"
|
||||
" sortData[2] = ldsSortData[dstAddr+2];\n"
|
||||
" sortData[3] = ldsSortData[dstAddr+3];\n"
|
||||
"\n"
|
||||
" sortVal[0] = ldsSortVal[dstAddr+0];\n"
|
||||
" sortVal[1] = ldsSortVal[dstAddr+1];\n"
|
||||
" sortVal[2] = ldsSortVal[dstAddr+2];\n"
|
||||
" sortVal[3] = ldsSortVal[dstAddr+3];\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n"
|
||||
@@ -802,39 +674,28 @@ static const char* radixSort32KernelsCL= \
|
||||
" __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n"
|
||||
" __local u32 localHistogramToCarry[NUM_BUCKET];\n"
|
||||
" __local u32 localHistogram[NUM_BUCKET*2];\n"
|
||||
"\n"
|
||||
" u32 gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 lIdx = GET_LOCAL_IDX;\n"
|
||||
" u32 wgIdx = GET_GROUP_IDX;\n"
|
||||
" u32 wgSize = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int startBit = cb.m_startBit;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" if( lIdx < (NUM_BUCKET) )\n"
|
||||
" {\n"
|
||||
" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
"\n"
|
||||
" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" u32 myHistogram = 0;\n"
|
||||
"\n"
|
||||
" int sortData[ELEMENTS_PER_WORK_ITEM];\n"
|
||||
" int sortVal[ELEMENTS_PER_WORK_ITEM];\n"
|
||||
"\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
"#if defined(CHECK_BOUNDARY)\n"
|
||||
" {\n"
|
||||
@@ -847,13 +708,10 @@ static const char* radixSort32KernelsCL= \
|
||||
" sortVal[i] = gSrc[ addr+i ].m_value;\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" sort4Bits1KeyValue(sortData, sortVal, startBit, lIdx, ldsSortData, ldsSortVal);\n"
|
||||
"\n"
|
||||
" u32 keys[ELEMENTS_PER_WORK_ITEM];\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
" keys[i] = (sortData[i]>>startBit) & 0xf;\n"
|
||||
"\n"
|
||||
" { // create histogram\n"
|
||||
" u32 setIdx = lIdx/16;\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
@@ -862,12 +720,10 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" ldsSortData[lIdx] = 0;\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n"
|
||||
"#if defined(CHECK_BOUNDARY)\n"
|
||||
" if( addr+i < n )\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#if defined(NV_GPU)\n"
|
||||
" SET_HISTOGRAM( setIdx, keys[i] )++;\n"
|
||||
"#else\n"
|
||||
@@ -888,13 +744,11 @@ static const char* radixSort32KernelsCL= \
|
||||
" localHistogram[hIdx] = sum;\n"
|
||||
" }\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
"#if defined(USE_2LEVEL_REDUCE)\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
" {\n"
|
||||
" localHistogram[hIdx] = localHistogram[hIdx-1];\n"
|
||||
" GROUP_MEM_FENCE;\n"
|
||||
"\n"
|
||||
" u32 u0, u1, u2;\n"
|
||||
" u0 = localHistogram[hIdx-3];\n"
|
||||
" u1 = localHistogram[hIdx-2];\n"
|
||||
@@ -924,7 +778,6 @@ static const char* radixSort32KernelsCL= \
|
||||
"#endif\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n"
|
||||
" {\n"
|
||||
@@ -958,9 +811,7 @@ static const char* radixSort32KernelsCL= \
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" if( lIdx < NUM_BUCKET )\n"
|
||||
" {\n"
|
||||
" localHistogramToCarry[lIdx] += myHistogram;\n"
|
||||
@@ -968,13 +819,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SortAndScatterSortDataKernelSerial( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n"
|
||||
@@ -988,7 +832,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" int counter[NUM_BUCKET];\n"
|
||||
" \n"
|
||||
" if (realLocalIdx>0)\n"
|
||||
@@ -996,11 +839,9 @@ static const char* radixSort32KernelsCL= \
|
||||
" \n"
|
||||
" for (int c=0;c<NUM_BUCKET;c++)\n"
|
||||
" counter[c]=0;\n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
" \n"
|
||||
" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n"
|
||||
" {\n"
|
||||
" for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n"
|
||||
@@ -1022,8 +863,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n"
|
||||
@@ -1037,7 +876,6 @@ static const char* radixSort32KernelsCL= \
|
||||
" const int n = cb.m_n;\n"
|
||||
" const int nWGs = cb.m_nWGs;\n"
|
||||
" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n"
|
||||
"\n"
|
||||
" int counter[NUM_BUCKET];\n"
|
||||
" \n"
|
||||
" if (realLocalIdx>0)\n"
|
||||
@@ -1045,11 +883,9 @@ static const char* radixSort32KernelsCL= \
|
||||
" \n"
|
||||
" for (int c=0;c<NUM_BUCKET;c++)\n"
|
||||
" counter[c]=0;\n"
|
||||
"\n"
|
||||
" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n"
|
||||
" \n"
|
||||
" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n"
|
||||
"\n"
|
||||
" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n"
|
||||
" {\n"
|
||||
" for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n"
|
||||
|
||||
@@ -5,14 +5,11 @@ static const char* rayCastKernelCL= \
|
||||
"#define SHAPE_CONCAVE_TRIMESH 5\n"
|
||||
"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
|
||||
"#define SHAPE_SPHERE 7\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_from;\n"
|
||||
" float4 m_to;\n"
|
||||
"} b3RayInfo;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float m_hitFraction;\n"
|
||||
@@ -22,20 +19,17 @@ static const char* rayCastKernelCL= \
|
||||
" float4 m_hitPoint;\n"
|
||||
" float4 m_hitNormal;\n"
|
||||
"} b3RayHit;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" unsigned int m_collidableIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct Collidable\n"
|
||||
"{\n"
|
||||
" union {\n"
|
||||
@@ -46,53 +40,37 @@ static const char* rayCastKernelCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
"} Collidable;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_localCenter;\n"
|
||||
" float4 m_extents;\n"
|
||||
" float4 mC;\n"
|
||||
" float4 mE;\n"
|
||||
"\n"
|
||||
" float m_radius;\n"
|
||||
" int m_faceOffset;\n"
|
||||
" int m_numFaces;\n"
|
||||
" int m_numVertices;\n"
|
||||
"\n"
|
||||
" int m_vertexOffset;\n"
|
||||
" int m_uniqueEdgesOffset;\n"
|
||||
" int m_numUniqueEdges;\n"
|
||||
" int m_unused;\n"
|
||||
"\n"
|
||||
"} ConvexPolyhedronCL;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_plane;\n"
|
||||
" int m_indexOffset;\n"
|
||||
" int m_numIndices;\n"
|
||||
"} b3GpuFace;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -100,8 +78,6 @@ static const char* rayCastKernelCL= \
|
||||
" float4 b1 = (float4)(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -112,7 +88,6 @@ static const char* rayCastKernelCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -130,39 +105,28 @@ static const char* rayCastKernelCL= \
|
||||
" out = qtMul(out,qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
" float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void trInverse(float4 translationIn, Quaternion orientationIn,\n"
|
||||
" float4* translationOut, Quaternion* orientationOut)\n"
|
||||
"{\n"
|
||||
" *orientationOut = qtInvert(orientationIn);\n"
|
||||
" *translationOut = qtRotate(*orientationOut, -translationIn);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n"
|
||||
" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n"
|
||||
"{\n"
|
||||
" rayFromLocal.w = 0.f;\n"
|
||||
" rayToLocal.w = 0.f;\n"
|
||||
" bool result = true;\n"
|
||||
"\n"
|
||||
" float exitFraction = hitFraction[0];\n"
|
||||
" float enterFraction = -0.3f;\n"
|
||||
" float4 curHitNormal = (float4)(0,0,0,0);\n"
|
||||
@@ -200,12 +164,10 @@ static const char* rayCastKernelCL= \
|
||||
" if (exitFraction <= enterFraction)\n"
|
||||
" result = false;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (enterFraction < 0.f)\n"
|
||||
" {\n"
|
||||
" result = false;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (result)\n"
|
||||
" { \n"
|
||||
" hitFraction[0] = enterFraction;\n"
|
||||
@@ -213,12 +175,6 @@ static const char* rayCastKernelCL= \
|
||||
" }\n"
|
||||
" return result;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n"
|
||||
"{\n"
|
||||
" float4 rs = rayFrom - spherePos;\n"
|
||||
@@ -228,13 +184,10 @@ static const char* rayCastKernelCL= \
|
||||
" float A = dot(rayDir,rayDir);\n"
|
||||
" float B = dot(rs, rayDir);\n"
|
||||
" float C = dot(rs, rs) - (radius * radius);\n"
|
||||
"\n"
|
||||
" float D = B * B - A*C;\n"
|
||||
"\n"
|
||||
" if (D > 0.0f)\n"
|
||||
" {\n"
|
||||
" float t = (-B - sqrt(D))/A;\n"
|
||||
"\n"
|
||||
" if ( (t >= 0.0f) && (t < (*hitFraction)) )\n"
|
||||
" {\n"
|
||||
" *hitFraction = t;\n"
|
||||
@@ -243,7 +196,6 @@ static const char* rayCastKernelCL= \
|
||||
" }\n"
|
||||
" return false;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 setInterpolate3(float4 from, float4 to, float t)\n"
|
||||
"{\n"
|
||||
" float s = 1.0f - t;\n"
|
||||
@@ -252,7 +204,6 @@ static const char* rayCastKernelCL= \
|
||||
" result.w = 0.f; \n"
|
||||
" return result; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void rayCastKernel( \n"
|
||||
" int numRays, \n"
|
||||
" const __global b3RayInfo* rays, \n"
|
||||
@@ -263,23 +214,18 @@ static const char* rayCastKernelCL= \
|
||||
" __global const b3GpuFace* faces,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes )\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numRays)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" hitResults[i].m_hitFraction = 1.f;\n"
|
||||
"\n"
|
||||
" float4 rayFrom = rays[i].m_from;\n"
|
||||
" float4 rayTo = rays[i].m_to;\n"
|
||||
" float hitFraction = 1.f;\n"
|
||||
" float4 hitPoint;\n"
|
||||
" float4 hitNormal;\n"
|
||||
" int hitBodyIndex= -1;\n"
|
||||
"\n"
|
||||
" int cachedCollidableIndex = -1;\n"
|
||||
" Collidable cachedCollidable;\n"
|
||||
"\n"
|
||||
" for (int b=0;b<numBodies;b++)\n"
|
||||
" {\n"
|
||||
" if (hitResults[i].m_hitResult2==b)\n"
|
||||
@@ -294,7 +240,6 @@ static const char* rayCastKernelCL= \
|
||||
" }\n"
|
||||
" if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" float4 invPos = (float4)(0,0,0,0);\n"
|
||||
" float4 invOrn = (float4)(0,0,0,0);\n"
|
||||
" float4 rayFromLocal = (float4)(0,0,0,0);\n"
|
||||
@@ -327,7 +272,6 @@ static const char* rayCastKernelCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (hitBodyIndex>=0)\n"
|
||||
" {\n"
|
||||
" hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
|
||||
@@ -336,6 +280,5 @@ static const char* rayCastKernelCL= \
|
||||
" hitResults[i].m_hitNormal = normalize(hitNormal);\n"
|
||||
" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
;
|
||||
|
||||
@@ -148,8 +148,8 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
||||
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
|
||||
b3Assert(solveFrictionProg);
|
||||
|
||||
//cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
|
||||
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH,true);
|
||||
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
|
||||
|
||||
|
||||
b3Assert(solverSetup2Prog);
|
||||
|
||||
@@ -886,7 +886,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (b3GpuBatchContacts)
|
||||
{
|
||||
B3_PROFILE("gpu batchContacts");
|
||||
maxNumBatches = 50;//250;
|
||||
maxNumBatches = 250;//250;
|
||||
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
} else
|
||||
{
|
||||
|
||||
@@ -33,7 +33,7 @@ subject to the following restrictions:
|
||||
#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
|
||||
#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
|
||||
|
||||
bool useDbvt = false;
|
||||
bool useDbvt = false;//true;
|
||||
bool useBullet2CpuSolver = true;
|
||||
bool dumpContactStats = false;
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
@@ -64,22 +65,7 @@ typedef unsigned char u8;
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyA;//sign bit set for fixed objects
|
||||
int m_bodyB;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
}Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -133,7 +119,7 @@ u32 tryWrite(__local u32* buff, int idx)
|
||||
}
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatches( __global const Contact4* gConstraints, __global Contact4* gConstraintsOut,
|
||||
__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,
|
||||
__global const u32* gN, __global const u32* gStart,
|
||||
int m_staticIdx )
|
||||
{
|
||||
@@ -186,8 +172,8 @@ __kernel void CreateBatches( __global const Contact4* gConstraints, __global Con
|
||||
int dstIdx;
|
||||
AtomInc1( ldsRingEnd, dstIdx );
|
||||
|
||||
int a = gConstraints[m_start+srcIdx].m_bodyA;
|
||||
int b = gConstraints[m_start+srcIdx].m_bodyB;
|
||||
int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;
|
||||
int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;
|
||||
ldsRingElem[dstIdx].m_a = (a>b)? b:a;
|
||||
ldsRingElem[dstIdx].m_b = (a>b)? a:b;
|
||||
ldsRingElem[dstIdx].m_idx = srcIdx;
|
||||
|
||||
@@ -2,37 +2,71 @@
|
||||
static const char* batchingKernelsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile __global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -46,43 +80,16 @@ static const char* batchingKernelsCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyA;//sign bit set for fixed objects\n"
|
||||
" int m_bodyB;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"}Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_n;\n"
|
||||
@@ -90,24 +97,19 @@ static const char* batchingKernelsCL= \
|
||||
" int m_staticIdx;\n"
|
||||
" int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_a;\n"
|
||||
" int m_b;\n"
|
||||
" u32 m_idx;\n"
|
||||
"}Elem;\n"
|
||||
"\n"
|
||||
"#define STACK_SIZE (WG_SIZE*10)\n"
|
||||
"//#define STACK_SIZE (WG_SIZE)\n"
|
||||
"#define RING_SIZE 1024\n"
|
||||
"#define RING_SIZE_MASK (RING_SIZE-1)\n"
|
||||
"#define CHECK_SIZE (WG_SIZE)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n"
|
||||
"#define RING_END ldsTmp\n"
|
||||
"\n"
|
||||
"u32 readBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -115,7 +117,6 @@ static const char* batchingKernelsCL= \
|
||||
" int bufIdx = idx/32;\n"
|
||||
" return buff[bufIdx] & (1<<bitIdx);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void writeBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -124,7 +125,6 @@ static const char* batchingKernelsCL= \
|
||||
"// buff[bufIdx] |= (1<<bitIdx);\n"
|
||||
" atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 tryWrite(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -133,9 +133,8 @@ static const char* batchingKernelsCL= \
|
||||
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
" return ((ans >> bitIdx)&1) == 0;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatches( __global const Contact4* gConstraints, __global Contact4* gConstraintsOut,\n"
|
||||
"__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n"
|
||||
" __global const u32* gN, __global const u32* gStart, \n"
|
||||
" int m_staticIdx )\n"
|
||||
"{\n"
|
||||
@@ -148,7 +147,6 @@ static const char* batchingKernelsCL= \
|
||||
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
|
||||
" __local u32 ldsGEnd;\n"
|
||||
" __local u32 ldsDstEnd;\n"
|
||||
"\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" \n"
|
||||
@@ -168,7 +166,6 @@ static const char* batchingKernelsCL= \
|
||||
" for(int ie=0; ie<50; ie++)\n"
|
||||
" {\n"
|
||||
" ldsFixedBuffer[lIdx] = 0;\n"
|
||||
"\n"
|
||||
" for(int giter=0; giter<4; giter++)\n"
|
||||
" {\n"
|
||||
" int ringCap = GET_RING_CAPACITY;\n"
|
||||
@@ -188,8 +185,8 @@ static const char* batchingKernelsCL= \
|
||||
" int dstIdx;\n"
|
||||
" AtomInc1( ldsRingEnd, dstIdx );\n"
|
||||
" \n"
|
||||
" int a = gConstraints[m_start+srcIdx].m_bodyA;\n"
|
||||
" int b = gConstraints[m_start+srcIdx].m_bodyB;\n"
|
||||
" int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n"
|
||||
" int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n"
|
||||
" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n"
|
||||
" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n"
|
||||
" ldsRingElem[dstIdx].m_idx = srcIdx;\n"
|
||||
@@ -198,37 +195,31 @@ static const char* batchingKernelsCL= \
|
||||
" ringCap = GET_RING_CAPACITY;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" \n"
|
||||
" // 2. fill stack\n"
|
||||
" __local Elem* dst = ldsRingElem;\n"
|
||||
" if( lIdx == 0 ) RING_END = 0;\n"
|
||||
"\n"
|
||||
" int srcIdx=lIdx;\n"
|
||||
" int end = ldsRingEnd;\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" Elem e;\n"
|
||||
" if(srcIdx<end) e = ldsRingElem[srcIdx];\n"
|
||||
" bool done = (srcIdx<end)?false:true;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n"
|
||||
" \n"
|
||||
" if( !done )\n"
|
||||
" {\n"
|
||||
" int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a));\n"
|
||||
" int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b));\n"
|
||||
"\n"
|
||||
" if( aUsed==0 && bUsed==0 )\n"
|
||||
" {\n"
|
||||
" int aAvailable=1;\n"
|
||||
" int bAvailable=1;\n"
|
||||
" int ea = abs(e.m_a);\n"
|
||||
" int eb = abs(e.m_b);\n"
|
||||
"\n"
|
||||
" bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n"
|
||||
" bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n"
|
||||
" \n"
|
||||
@@ -239,7 +230,6 @@ static const char* batchingKernelsCL= \
|
||||
" \n"
|
||||
" //aAvailable = aStatic? 1: aAvailable;\n"
|
||||
" //bAvailable = bStatic? 1: bAvailable;\n"
|
||||
"\n"
|
||||
" bool success = (aAvailable && bAvailable);\n"
|
||||
" if(success)\n"
|
||||
" {\n"
|
||||
@@ -252,7 +242,6 @@ static const char* batchingKernelsCL= \
|
||||
" done = success;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // put it aside\n"
|
||||
" if(srcIdx<end)\n"
|
||||
" {\n"
|
||||
@@ -272,7 +261,6 @@ static const char* batchingKernelsCL= \
|
||||
" dst[dstIdx] = e;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // if filled, flush\n"
|
||||
" if( ldsStackEnd == STACK_SIZE )\n"
|
||||
" {\n"
|
||||
@@ -284,18 +272,14 @@ static const char* batchingKernelsCL= \
|
||||
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
|
||||
" }\n"
|
||||
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
|
||||
"\n"
|
||||
" //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n"
|
||||
" ldsFixedBuffer[lIdx] = 0;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) ldsRingEnd = RING_END;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n"
|
||||
" {\n"
|
||||
" int idx = m_start + ldsStackIdx[i];\n"
|
||||
@@ -303,7 +287,6 @@ static const char* batchingKernelsCL= \
|
||||
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
|
||||
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // in case it couldn't consume any pair. Flush them\n"
|
||||
" // todo. Serial batch worth while?\n"
|
||||
" if( ldsStackEnd == 0 )\n"
|
||||
@@ -318,38 +301,11 @@ static const char* batchingKernelsCL= \
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
" if( lIdx == 0 ) ldsRingEnd = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" // termination\n"
|
||||
" if( ldsGEnd == m_n && ldsRingEnd == 0 )\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -13,6 +13,7 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
@@ -65,22 +66,7 @@ typedef unsigned char u8;
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;//sign bit set for fixed objects
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
}Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -102,7 +88,7 @@ typedef struct
|
||||
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )
|
||||
__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )
|
||||
{
|
||||
int wgIdx = GET_GROUP_IDX;
|
||||
int lIdx = GET_LOCAL_IDX;
|
||||
@@ -155,13 +141,13 @@ u32 tryWrite(__local u32* buff, int idx)
|
||||
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )
|
||||
__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )
|
||||
{
|
||||
int wgIdx = GET_GROUP_IDX;
|
||||
int lIdx = GET_LOCAL_IDX;
|
||||
const int numConstraints = gN[wgIdx];
|
||||
const int m_start = gStart[wgIdx];
|
||||
|
||||
b3Contact4Data_t tmp;
|
||||
|
||||
__local u32 ldsFixedBuffer[CHECK_SIZE];
|
||||
|
||||
@@ -173,7 +159,7 @@ __kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const
|
||||
{
|
||||
|
||||
|
||||
__global Contact4* cs = &gConstraints[m_start];
|
||||
__global struct b3Contact4Data* cs = &gConstraints[m_start];
|
||||
|
||||
|
||||
int numValidConstraints = 0;
|
||||
@@ -214,11 +200,51 @@ __kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
//btSwap(cs[i],cs[numValidConstraints]);
|
||||
|
||||
Contact4 tmp = cs[i];
|
||||
cs[i] = cs[numValidConstraints];
|
||||
cs[numValidConstraints] = tmp;
|
||||
// tmp = cs[i];
|
||||
// cs[i] = cs[numValidConstraints];
|
||||
// cs[numValidConstraints] = tmp;
|
||||
|
||||
#ifdef CHECK_SIZE
|
||||
tmp.m_worldPos[0] = cs[i].m_worldPos[0];
|
||||
tmp.m_worldPos[1] = cs[i].m_worldPos[1];
|
||||
tmp.m_worldPos[2] = cs[i].m_worldPos[2];
|
||||
tmp.m_worldPos[3] = cs[i].m_worldPos[3];
|
||||
tmp.m_worldNormal = cs[i].m_worldNormal;
|
||||
tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp;
|
||||
tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp;
|
||||
tmp.m_batchIdx = cs[i].m_batchIdx;
|
||||
tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit;
|
||||
tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit;
|
||||
tmp.m_childIndexA = cs[i].m_childIndexA;
|
||||
tmp.m_childIndexB = cs[i].m_childIndexB;
|
||||
|
||||
cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0];
|
||||
cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1];
|
||||
cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2];
|
||||
cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3];
|
||||
cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal;
|
||||
cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp;
|
||||
cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp;
|
||||
cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx;
|
||||
cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit;
|
||||
cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit;
|
||||
cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA;
|
||||
cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB;
|
||||
|
||||
cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0];
|
||||
cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1];
|
||||
cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2];
|
||||
cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3];
|
||||
cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal;
|
||||
cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp;
|
||||
cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp;
|
||||
cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx;
|
||||
cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit;
|
||||
cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit;
|
||||
cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA;
|
||||
cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -2,38 +2,72 @@
|
||||
static const char* batchingKernelsNewCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile __global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#define SIMD_WIDTH 64\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -47,43 +81,16 @@ static const char* batchingKernelsNewCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;//sign bit set for fixed objects\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"}Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_n;\n"
|
||||
@@ -91,20 +98,14 @@ static const char* batchingKernelsNewCL= \
|
||||
" int m_staticIdx;\n"
|
||||
" int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_a;\n"
|
||||
" int m_b;\n"
|
||||
" u32 m_idx;\n"
|
||||
"}Elem;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n"
|
||||
"__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n"
|
||||
"{\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
@@ -122,13 +123,7 @@ static const char* batchingKernelsNewCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define CHECK_SIZE (WG_SIZE)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"u32 readBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -136,7 +131,6 @@ static const char* batchingKernelsNewCL= \
|
||||
" int bufIdx = idx/32;\n"
|
||||
" return buff[bufIdx] & (1<<bitIdx);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void writeBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -145,7 +139,6 @@ static const char* batchingKernelsNewCL= \
|
||||
" buff[bufIdx] |= (1<<bitIdx);\n"
|
||||
" //atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 tryWrite(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
@@ -154,16 +147,14 @@ static const char* batchingKernelsNewCL= \
|
||||
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
" return ((ans >> bitIdx)&1) == 0;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n"
|
||||
"__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n"
|
||||
"{\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" const int numConstraints = gN[wgIdx];\n"
|
||||
" const int m_start = gStart[wgIdx];\n"
|
||||
" \n"
|
||||
" b3Contact4Data_t tmp;\n"
|
||||
" \n"
|
||||
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
|
||||
" \n"
|
||||
@@ -175,12 +166,11 @@ static const char* batchingKernelsNewCL= \
|
||||
" {\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" __global Contact4* cs = &gConstraints[m_start]; \n"
|
||||
" __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" int numValidConstraints = 0;\n"
|
||||
" int batchIdx = 0;\n"
|
||||
"\n"
|
||||
" while( numValidConstraints < numConstraints)\n"
|
||||
" {\n"
|
||||
" int nCurrentBatch = 0;\n"
|
||||
@@ -188,10 +178,8 @@ static const char* batchingKernelsNewCL= \
|
||||
" \n"
|
||||
" for(int i=0; i<CHECK_SIZE; i++) \n"
|
||||
" ldsFixedBuffer[i] = 0; \n"
|
||||
"\n"
|
||||
" for(int i=numValidConstraints; i<numConstraints; i++)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n"
|
||||
" int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n"
|
||||
" int bodyA = abs(bodyAS);\n"
|
||||
@@ -211,19 +199,51 @@ static const char* batchingKernelsNewCL= \
|
||||
" {\n"
|
||||
" writeBuf( ldsFixedBuffer, bodyB );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" cs[i].m_batchIdx = batchIdx;\n"
|
||||
"\n"
|
||||
" if (i!=numValidConstraints)\n"
|
||||
" {\n"
|
||||
" //btSwap(cs[i],cs[numValidConstraints]);\n"
|
||||
" \n"
|
||||
" Contact4 tmp = cs[i];\n"
|
||||
" cs[i] = cs[numValidConstraints];\n"
|
||||
" cs[numValidConstraints] = tmp;\n"
|
||||
" \n"
|
||||
"// tmp = cs[i];\n"
|
||||
"// cs[i] = cs[numValidConstraints];\n"
|
||||
"// cs[numValidConstraints] = tmp;\n"
|
||||
"#ifdef CHECK_SIZE\n"
|
||||
" tmp.m_worldPos[0] = cs[i].m_worldPos[0];\n"
|
||||
" tmp.m_worldPos[1] = cs[i].m_worldPos[1];\n"
|
||||
" tmp.m_worldPos[2] = cs[i].m_worldPos[2];\n"
|
||||
" tmp.m_worldPos[3] = cs[i].m_worldPos[3];\n"
|
||||
" tmp.m_worldNormal = cs[i].m_worldNormal;\n"
|
||||
" tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp;\n"
|
||||
" tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp;\n"
|
||||
" tmp.m_batchIdx = cs[i].m_batchIdx;\n"
|
||||
" tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit;\n"
|
||||
" tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit;\n"
|
||||
" tmp.m_childIndexA = cs[i].m_childIndexA;\n"
|
||||
" tmp.m_childIndexB = cs[i].m_childIndexB;\n"
|
||||
" cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0];\n"
|
||||
" cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1];\n"
|
||||
" cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2];\n"
|
||||
" cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3];\n"
|
||||
" cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal;\n"
|
||||
" cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp;\n"
|
||||
" cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp;\n"
|
||||
" cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx;\n"
|
||||
" cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit;\n"
|
||||
" cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit;\n"
|
||||
" cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA;\n"
|
||||
" cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB;\n"
|
||||
" cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0];\n"
|
||||
" cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1];\n"
|
||||
" cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2];\n"
|
||||
" cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3];\n"
|
||||
" cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal;\n"
|
||||
" cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp;\n"
|
||||
" cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp;\n"
|
||||
" cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx;\n"
|
||||
" cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit;\n"
|
||||
" cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit;\n"
|
||||
" cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA;\n"
|
||||
" cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB;\n"
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" numValidConstraints++;\n"
|
||||
" \n"
|
||||
" nCurrentBatch++;\n"
|
||||
|
||||
@@ -2,19 +2,16 @@
|
||||
static const char* integrateKernelCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2013 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"float4 quatMult(float4 q1, float4 q2)\n"
|
||||
"{\n"
|
||||
" float4 q;\n"
|
||||
@@ -24,7 +21,6 @@ static const char* integrateKernelCL= \
|
||||
" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n"
|
||||
" return q;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 quatNorm(float4 q)\n"
|
||||
"{\n"
|
||||
" float len = native_sqrt(dot(q, q));\n"
|
||||
@@ -39,24 +35,17 @@ static const char* integrateKernelCL= \
|
||||
" }\n"
|
||||
" return q;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" unsigned int m_collidableIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void \n"
|
||||
" integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
|
||||
"{\n"
|
||||
@@ -92,12 +81,10 @@ static const char* integrateKernelCL= \
|
||||
" float4 dorn = axis;\n"
|
||||
" dorn.w = native_cos(fAngle * timeStep * 0.5f);\n"
|
||||
" float4 orn0 = bodies[nodeID].m_quat;\n"
|
||||
"\n"
|
||||
" float4 predictedOrn = quatMult(dorn, orn0);\n"
|
||||
" predictedOrn = quatNorm(predictedOrn);\n"
|
||||
" bodies[nodeID].m_quat=predictedOrn;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //linear velocity \n"
|
||||
" bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n"
|
||||
" \n"
|
||||
|
||||
@@ -2,56 +2,37 @@
|
||||
static const char* solveConstraintRowsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2013 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#define B3_CONSTRAINT_FLAG_ENABLED 1\n"
|
||||
"\n"
|
||||
"#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n"
|
||||
"#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n"
|
||||
"\n"
|
||||
"#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n"
|
||||
"#define B3_INFINITY 1e30f\n"
|
||||
"\n"
|
||||
"#define mymake_float4 (float4)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" float4 a1 = mymake_float4(a.xyz,0.f);\n"
|
||||
" float4 b1 = mymake_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -62,36 +43,28 @@ static const char* solveConstraintRowsCL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertiaWorld;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} BodyInertia;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_basis;//orientation\n"
|
||||
" float4 m_origin;//transform\n"
|
||||
"}b3Transform;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
"// b3Transform m_worldTransformUnused;\n"
|
||||
@@ -104,38 +77,30 @@ static const char* solveConstraintRowsCL= \
|
||||
" float4 m_turnVelocity;\n"
|
||||
" float4 m_linearVelocity;\n"
|
||||
" float4 m_angularVelocity;\n"
|
||||
"\n"
|
||||
" union \n"
|
||||
" {\n"
|
||||
" void* m_originalBody;\n"
|
||||
" int m_originalBodyIndex;\n"
|
||||
" };\n"
|
||||
" int padding[3];\n"
|
||||
"\n"
|
||||
"} b3GpuSolverBody;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" unsigned int m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} b3RigidBodyCL;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" float4 m_relpos1CrossNormal;\n"
|
||||
" float4 m_contactNormal;\n"
|
||||
"\n"
|
||||
" float4 m_relpos2CrossNormal;\n"
|
||||
" //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n"
|
||||
"\n"
|
||||
" float4 m_angularComponentA;\n"
|
||||
" float4 m_angularComponentB;\n"
|
||||
" \n"
|
||||
@@ -152,15 +117,11 @@ static const char* solveConstraintRowsCL= \
|
||||
" float m_upperLimit;\n"
|
||||
" float m_rhsPenetration;\n"
|
||||
" int m_originalConstraint;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int m_overrideNumSolverIterations;\n"
|
||||
" int m_frictionIndex;\n"
|
||||
" int m_solverBodyIdA;\n"
|
||||
" int m_solverBodyIdB;\n"
|
||||
"\n"
|
||||
"} b3SolverConstraint;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
@@ -168,28 +129,18 @@ static const char* solveConstraintRowsCL= \
|
||||
" int m_originalConstraintIndex;\n"
|
||||
" int m_batchId;\n"
|
||||
"} b3BatchConstraint;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_constraintType;\n"
|
||||
" int m_rbA;\n"
|
||||
" int m_rbB;\n"
|
||||
" float m_breakingImpulseThreshold;\n"
|
||||
"\n"
|
||||
" float4 m_pivotInA;\n"
|
||||
" float4 m_pivotInB;\n"
|
||||
" Quaternion m_relTargetAB;\n"
|
||||
"\n"
|
||||
" int m_flags;\n"
|
||||
" int m_padding[3];\n"
|
||||
"} b3GpuGenericConstraint;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n"
|
||||
"{\n"
|
||||
" b3Transform newTrans;\n"
|
||||
@@ -197,39 +148,25 @@ static const char* solveConstraintRowsCL= \
|
||||
" newTrans.setRotation(rb->m_quat);\n"
|
||||
" return newTrans;\n"
|
||||
"}*/\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" v = mymake_float4(v.xyz,0.f);\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -240,7 +177,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -257,30 +193,23 @@ static const char* solveConstraintRowsCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n"
|
||||
"{\n"
|
||||
" body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n"
|
||||
" body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n"
|
||||
"{\n"
|
||||
" float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n"
|
||||
" float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n"
|
||||
" float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n"
|
||||
"\n"
|
||||
" deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n"
|
||||
" deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n"
|
||||
"\n"
|
||||
" float sum = c->m_appliedImpulse + deltaImpulse;\n"
|
||||
" if (sum < c->m_lowerLimit)\n"
|
||||
" {\n"
|
||||
@@ -296,12 +225,9 @@ static const char* solveConstraintRowsCL= \
|
||||
" {\n"
|
||||
" c->m_appliedImpulse = sum;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n"
|
||||
" internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n"
|
||||
" __global b3BatchConstraint* batchConstraints,\n"
|
||||
" __global b3SolverConstraint* rows,\n"
|
||||
@@ -315,7 +241,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" int b = get_global_id(0);\n"
|
||||
" if (b>=numConstraintsInBatch)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n"
|
||||
" int originalConstraintIndex = c->m_originalConstraintIndex;\n"
|
||||
" if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n"
|
||||
@@ -329,16 +254,13 @@ static const char* solveConstraintRowsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numBodies)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" __global b3GpuSolverBody* solverBody = &solverBodies[i];\n"
|
||||
" __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n"
|
||||
"\n"
|
||||
" solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n"
|
||||
" solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n"
|
||||
" solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n"
|
||||
@@ -350,7 +272,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" solverBody->m_linearVelocity = bodyCL->m_linVel;\n"
|
||||
" solverBody->m_angularVelocity = bodyCL->m_angVel;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n"
|
||||
"{\n"
|
||||
" int cid = get_global_id(0);\n"
|
||||
@@ -370,17 +291,12 @@ static const char* solveConstraintRowsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numConstraints)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" __global b3GpuGenericConstraint* constraint = &constraints[i];\n"
|
||||
"\n"
|
||||
" switch (constraint->m_constraintType)\n"
|
||||
" {\n"
|
||||
" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n"
|
||||
@@ -398,7 +314,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n"
|
||||
" __global b3BatchConstraint* batchConstraints, \n"
|
||||
" __global b3GpuGenericConstraint* constraints,\n"
|
||||
@@ -408,26 +323,18 @@ static const char* solveConstraintRowsCL= \
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numConstraints)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" int rbA = constraints[i].m_rbA;\n"
|
||||
" int rbB = constraints[i].m_rbB;\n"
|
||||
"\n"
|
||||
" batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass? rbA : -rbA;\n"
|
||||
" batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass? rbB : -rbB;\n"
|
||||
" batchConstraints[i].m_batchId = -1;\n"
|
||||
" batchConstraints[i].m_originalConstraintIndex = i;\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" // integrator parameters: frames per second (1/stepsize), default error\n"
|
||||
" // reduction parameter (0..1).\n"
|
||||
" float fps,erp;\n"
|
||||
"\n"
|
||||
" // for the first and second body, pointers to two (linear and angular)\n"
|
||||
" // n*3 jacobian sub matrices, stored by rows. these matrices will have\n"
|
||||
" // been initialized to 0 on entry. if the second body is zero then the\n"
|
||||
@@ -441,7 +348,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" {\n"
|
||||
" __global float4* m_J1angularAxisFloat4;\n"
|
||||
" __global float* m_J1angularAxis;\n"
|
||||
"\n"
|
||||
" };\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
@@ -455,17 +361,14 @@ static const char* solveConstraintRowsCL= \
|
||||
" };\n"
|
||||
" // elements to jump from one row to the next in J's\n"
|
||||
" int rowskip;\n"
|
||||
"\n"
|
||||
" // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n"
|
||||
" // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n"
|
||||
" // set to a constant value (typically very small or zero) value on entry.\n"
|
||||
" __global float* m_constraintError;\n"
|
||||
" __global float* cfm;\n"
|
||||
"\n"
|
||||
" // lo and hi limits for variables (set to -/+ infinity on entry).\n"
|
||||
" __global float* m_lowerLimit;\n"
|
||||
" __global float* m_upperLimit;\n"
|
||||
"\n"
|
||||
" // findex vector for variables. see the LCP solver interface for a\n"
|
||||
" // description of what this does. this is set to -1 on entry.\n"
|
||||
" // note that the returned indexes are relative to the first index of\n"
|
||||
@@ -473,39 +376,28 @@ static const char* solveConstraintRowsCL= \
|
||||
" __global int *findex;\n"
|
||||
" // number of solver iterations\n"
|
||||
" int m_numIterations;\n"
|
||||
"\n"
|
||||
" //damping of the velocity\n"
|
||||
" float m_damping;\n"
|
||||
"} b3GpuConstraintInfo2;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n"
|
||||
"{\n"
|
||||
" *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n"
|
||||
" *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n"
|
||||
" *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n"
|
||||
"{\n"
|
||||
" float4 posA = bodies[constraint->m_rbA].m_pos;\n"
|
||||
" Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n"
|
||||
"\n"
|
||||
" float4 posB = bodies[constraint->m_rbB].m_pos;\n"
|
||||
" Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" // anchor points in global coordinates with respect to body PORs.\n"
|
||||
" \n"
|
||||
" // set jacobian\n"
|
||||
" info->m_J1linearAxis[0] = 1;\n"
|
||||
" info->m_J1linearAxis[info->rowskip+1] = 1;\n"
|
||||
" info->m_J1linearAxis[2*info->rowskip+2] = 1;\n"
|
||||
"\n"
|
||||
" float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n"
|
||||
" __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n"
|
||||
@@ -533,18 +425,15 @@ static const char* solveConstraintRowsCL= \
|
||||
" // set right hand side\n"
|
||||
"// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n"
|
||||
" float currERP = info->erp;\n"
|
||||
"\n"
|
||||
" float k = info->fps * currERP;\n"
|
||||
" int j;\n"
|
||||
" float4 result = a2 + posB - a1 - posA;\n"
|
||||
" float* resultPtr = &result;\n"
|
||||
"\n"
|
||||
" for (j=0; j<3; j++)\n"
|
||||
" {\n"
|
||||
" info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"Quaternion nearest( Quaternion first, Quaternion qd)\n"
|
||||
"{\n"
|
||||
" Quaternion diff,sum;\n"
|
||||
@@ -555,7 +444,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" return qd;\n"
|
||||
" return (-qd);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float b3Acos(float x) \n"
|
||||
"{ \n"
|
||||
" if (x<-1) \n"
|
||||
@@ -564,7 +452,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" x=1;\n"
|
||||
" return acos(x); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float getAngle(Quaternion orn)\n"
|
||||
"{\n"
|
||||
" if (orn.w>=1.f)\n"
|
||||
@@ -572,7 +459,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" float s = 2.f * b3Acos(orn.w);\n"
|
||||
" return s;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n"
|
||||
"{\n"
|
||||
" Quaternion orn1 = nearest(orn0,orn1a);\n"
|
||||
@@ -588,17 +474,12 @@ static const char* solveConstraintRowsCL= \
|
||||
" else\n"
|
||||
" *axis /= sqrt(len);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n"
|
||||
"{\n"
|
||||
" Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n"
|
||||
" Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n"
|
||||
"\n"
|
||||
" int s = info->rowskip;\n"
|
||||
" int start_index = start_row * s;\n"
|
||||
"\n"
|
||||
" // 3 rows to make body rotations equal\n"
|
||||
" info->m_J1angularAxis[start_index] = 1;\n"
|
||||
" info->m_J1angularAxis[start_index + s + 1] = 1;\n"
|
||||
@@ -626,16 +507,12 @@ static const char* solveConstraintRowsCL= \
|
||||
" info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numBodies)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" if (bodies[i].m_invMass)\n"
|
||||
" {\n"
|
||||
"// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)\n"
|
||||
@@ -648,8 +525,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" } \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, \n"
|
||||
" __global unsigned int* infos, \n"
|
||||
" __global unsigned int* constraintRowOffsets, \n"
|
||||
@@ -665,7 +540,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" int globalNumIterations,\n"
|
||||
" int numConstraints)\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numConstraints)\n"
|
||||
" return;\n"
|
||||
@@ -675,17 +549,12 @@ static const char* solveConstraintRowsCL= \
|
||||
" \n"
|
||||
" __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n"
|
||||
" __global b3GpuGenericConstraint* constraint = &constraints[i];\n"
|
||||
"\n"
|
||||
" __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n"
|
||||
" __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n"
|
||||
"\n"
|
||||
" int solverBodyIdA = constraint->m_rbA;\n"
|
||||
" int solverBodyIdB = constraint->m_rbB;\n"
|
||||
"\n"
|
||||
" __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n"
|
||||
" __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (rbA->m_invMass)\n"
|
||||
" {\n"
|
||||
" batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n"
|
||||
@@ -695,7 +564,6 @@ static const char* solveConstraintRowsCL= \
|
||||
"// m_staticIdx = 0;\n"
|
||||
" batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (rbB->m_invMass)\n"
|
||||
" {\n"
|
||||
" batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n"
|
||||
@@ -705,14 +573,11 @@ static const char* solveConstraintRowsCL= \
|
||||
"// m_staticIdx = 0;\n"
|
||||
" batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (info1)\n"
|
||||
" {\n"
|
||||
" int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n"
|
||||
"// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n"
|
||||
" // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int j;\n"
|
||||
" for ( j=0;j<info1;j++)\n"
|
||||
" {\n"
|
||||
@@ -728,7 +593,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" currentConstraintRow[j].m_jacDiagABInv = 0.f;\n"
|
||||
" currentConstraintRow[j].m_lowerLimit = 0.f;\n"
|
||||
" currentConstraintRow[j].m_upperLimit = 0.f;\n"
|
||||
"\n"
|
||||
" currentConstraintRow[j].m_originalConstraint = i;\n"
|
||||
" currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n"
|
||||
" currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n"
|
||||
@@ -746,7 +610,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;\n"
|
||||
" currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n"
|
||||
" bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n"
|
||||
" bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n"
|
||||
@@ -755,12 +618,8 @@ static const char* solveConstraintRowsCL= \
|
||||
" bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n"
|
||||
" bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n"
|
||||
" bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n"
|
||||
"\n"
|
||||
" int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
" b3GpuConstraintInfo2 info2;\n"
|
||||
" info2.fps = 1.f/timeStep;\n"
|
||||
" info2.erp = globalErp;\n"
|
||||
@@ -769,7 +628,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" info2.m_J2linearAxisFloat4 = 0;\n"
|
||||
" info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n"
|
||||
" info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n"
|
||||
"\n"
|
||||
" ///the size of b3SolverConstraint needs be a multiple of float\n"
|
||||
"// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n"
|
||||
" info2.m_constraintError = ¤tConstraintRow->m_rhs;\n"
|
||||
@@ -779,7 +637,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n"
|
||||
" info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n"
|
||||
" info2.m_numIterations = globalNumIterations;\n"
|
||||
"\n"
|
||||
" switch (constraint->m_constraintType)\n"
|
||||
" {\n"
|
||||
" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n"
|
||||
@@ -790,37 +647,29 @@ static const char* solveConstraintRowsCL= \
|
||||
" case B3_GPU_FIXED_CONSTRAINT_TYPE:\n"
|
||||
" {\n"
|
||||
" getInfo2Point2Point(constraint,&info2,bodies);\n"
|
||||
"\n"
|
||||
" getInfo2FixedOrientation(constraint,&info2,bodies,3);\n"
|
||||
"\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" default:\n"
|
||||
" {\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" ///finalize the constraint setup\n"
|
||||
" for ( j=0;j<info1;j++)\n"
|
||||
" {\n"
|
||||
" __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j];\n"
|
||||
"\n"
|
||||
" if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)\n"
|
||||
" {\n"
|
||||
" solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n"
|
||||
" {\n"
|
||||
" solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"// solverConstraint->m_originalContactPoint = constraint;\n"
|
||||
" \n"
|
||||
" Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" //float4 angularFactorA(1,1,1);\n"
|
||||
" float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n"
|
||||
" solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n"
|
||||
@@ -828,11 +677,9 @@ static const char* solveConstraintRowsCL= \
|
||||
" \n"
|
||||
" Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n"
|
||||
" solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n"
|
||||
" //because it gets multiplied iMJlB\n"
|
||||
@@ -840,7 +687,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n"
|
||||
" float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n"
|
||||
" float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n"
|
||||
"\n"
|
||||
" float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n"
|
||||
" sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n"
|
||||
" sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n"
|
||||
@@ -854,17 +700,13 @@ static const char* solveConstraintRowsCL= \
|
||||
" solverConstraint->m_jacDiagABInv = 0.f;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" ///fix rhs\n"
|
||||
" ///todo: add force/torque accelerators\n"
|
||||
" {\n"
|
||||
" float rel_vel;\n"
|
||||
" float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n"
|
||||
" float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n"
|
||||
"\n"
|
||||
" rel_vel = vel1Dotn+vel2Dotn;\n"
|
||||
"\n"
|
||||
" float restitution = 0.f;\n"
|
||||
" float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n"
|
||||
" float velocityError = restitution - rel_vel * info2.m_damping;\n"
|
||||
@@ -872,7 +714,6 @@ static const char* solveConstraintRowsCL= \
|
||||
" float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n"
|
||||
" solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n"
|
||||
" solverConstraint->m_appliedImpulse = 0.f;\n"
|
||||
"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
|
||||
@@ -204,22 +204,7 @@ typedef struct
|
||||
u32 m_paddings[1];
|
||||
} Constraint4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -2,37 +2,29 @@
|
||||
static const char* solveContactCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -46,43 +38,28 @@ static const char* solveContactCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define mymake_float4 (float4)\n"
|
||||
"//#define make_float2 (float2)\n"
|
||||
"//#define make_uint4 (uint4)\n"
|
||||
"//#define make_int4 (int4)\n"
|
||||
"//#define make_uint2 (uint2)\n"
|
||||
"//#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Vector\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -90,10 +67,6 @@ static const char* solveContactCL= \
|
||||
" float4 b1 = mymake_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
@@ -102,33 +75,17 @@ static const char* solveContactCL= \
|
||||
"// float length = sqrtf(dot3F4(a, a));\n"
|
||||
"// return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Matrix3x3\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -139,54 +96,39 @@ static const char* solveContactCL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_linear;\n"
|
||||
@@ -195,34 +137,13 @@ static const char* solveContactCL= \
|
||||
" float m_jacCoeffInv[4];\n"
|
||||
" float m_b[4];\n"
|
||||
" float m_appliedRambdaDt[4];\n"
|
||||
"\n"
|
||||
" float m_fJacCoeffInv[2]; \n"
|
||||
" float m_fAppliedRambdaDt[2]; \n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
"\n"
|
||||
" int m_batchIdx;\n"
|
||||
" u32 m_paddings[1];\n"
|
||||
"} Constraint4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" \n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nConstraints;\n"
|
||||
@@ -231,7 +152,6 @@ static const char* solveContactCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_solveFriction;\n"
|
||||
@@ -240,27 +160,20 @@ static const char* solveContactCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBufferBatchSolve;\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
|
||||
"{\n"
|
||||
" *linear = mymake_float4(-n.xyz,0.f);\n"
|
||||
" *angular0 = -cross3(r0, n);\n"
|
||||
" *angular1 = cross3(r1, n);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
|
||||
"{\n"
|
||||
" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
|
||||
"{\n"
|
||||
@@ -271,32 +184,25 @@ static const char* solveContactCL= \
|
||||
" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
|
||||
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void solveContact(__global Constraint4* cs,\n"
|
||||
" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"
|
||||
" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n"
|
||||
"\n"
|
||||
"void solveContact(__global Constraint4* cs,\n"
|
||||
" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"
|
||||
" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n"
|
||||
"{\n"
|
||||
" float minRambdaDt = 0;\n"
|
||||
" float maxRambdaDt = FLT_MAX;\n"
|
||||
"\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n"
|
||||
"\n"
|
||||
" float4 angular0, angular1, linear;\n"
|
||||
" float4 r0 = cs->m_worldPos[ic] - posA;\n"
|
||||
" float4 r1 = cs->m_worldPos[ic] - posB;\n"
|
||||
" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n"
|
||||
"\n"
|
||||
" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n"
|
||||
" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n"
|
||||
" rambdaDt *= cs->m_jacCoeffInv[ic];\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" float prevSum = cs->m_appliedRambdaDt[ic];\n"
|
||||
" float updated = prevSum;\n"
|
||||
@@ -306,19 +212,16 @@ static const char* solveContactCL= \
|
||||
" rambdaDt = updated - prevSum;\n"
|
||||
" cs->m_appliedRambdaDt[ic] = updated;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" float4 linImp0 = invMassA*linear*rambdaDt;\n"
|
||||
" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"
|
||||
" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"
|
||||
" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"
|
||||
"\n"
|
||||
" *linVelA += linImp0;\n"
|
||||
" *angVelA += angImp0;\n"
|
||||
" *linVelB += linImp1;\n"
|
||||
" *angVelB += angImp1;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
@@ -347,29 +250,24 @@ static const char* solveContactCL= \
|
||||
" q[0].z = a*k;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n"
|
||||
"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"
|
||||
"{\n"
|
||||
" //float frictionCoeff = ldsCs[0].m_linear.w;\n"
|
||||
" int aIdx = ldsCs[0].m_bodyA;\n"
|
||||
" int bIdx = ldsCs[0].m_bodyB;\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
|
||||
" posB, &linVelB, &angVelB, invMassB, invInertiaB );\n"
|
||||
"\n"
|
||||
" if (gBodies[aIdx].m_invMass)\n"
|
||||
" {\n"
|
||||
" gBodies[aIdx].m_linVel = linVelA;\n"
|
||||
@@ -390,27 +288,18 @@ static const char* solveContactCL= \
|
||||
" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_valInt0;\n"
|
||||
" int m_valInt1;\n"
|
||||
" int m_valInt2;\n"
|
||||
" int m_valInt3;\n"
|
||||
"\n"
|
||||
" float m_val0;\n"
|
||||
" float m_val1;\n"
|
||||
" float m_val2;\n"
|
||||
" float m_val3;\n"
|
||||
"} SolverDebugInfo;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void BatchSolveKernelContact(__global Body* gBodies,\n"
|
||||
@@ -427,21 +316,16 @@ static const char* solveContactCL= \
|
||||
" __local int ldsCurBatch;\n"
|
||||
" __local int ldsNextBatch;\n"
|
||||
" __local int ldsStart;\n"
|
||||
"\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
"\n"
|
||||
"// int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"// debugInfo[gIdx].m_valInt0 = gIdx;\n"
|
||||
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n"
|
||||
" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n"
|
||||
" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n"
|
||||
" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n"
|
||||
" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n"
|
||||
"\n"
|
||||
" //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"
|
||||
" //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"
|
||||
" //int cellIdx = xIdx+yIdx*nSplit;\n"
|
||||
@@ -450,23 +334,18 @@ static const char* solveContactCL= \
|
||||
" return;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" const int start = gOffsets[cellIdx];\n"
|
||||
" const int end = start + gN[cellIdx];\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" ldsCurBatch = 0;\n"
|
||||
" ldsNextBatch = 0;\n"
|
||||
" ldsStart = start;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" int idx=ldsStart+lIdx;\n"
|
||||
" while (ldsCurBatch < maxBatch)\n"
|
||||
" {\n"
|
||||
@@ -475,7 +354,6 @@ static const char* solveContactCL= \
|
||||
" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"
|
||||
" {\n"
|
||||
" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
||||
"\n"
|
||||
" idx+=64;\n"
|
||||
" } else\n"
|
||||
" {\n"
|
||||
|
||||
@@ -204,22 +204,7 @@ typedef struct
|
||||
u32 m_paddings[1];
|
||||
} Constraint4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -2,37 +2,29 @@
|
||||
static const char* solveFrictionCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -46,43 +38,28 @@ static const char* solveFrictionCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define mymake_float4 (float4)\n"
|
||||
"//#define make_float2 (float2)\n"
|
||||
"//#define make_uint4 (uint4)\n"
|
||||
"//#define make_int4 (int4)\n"
|
||||
"//#define make_uint2 (uint2)\n"
|
||||
"//#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Vector\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -90,10 +67,6 @@ static const char* solveFrictionCL= \
|
||||
" float4 b1 = mymake_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
@@ -102,33 +75,17 @@ static const char* solveFrictionCL= \
|
||||
"// float length = sqrtf(dot3F4(a, a));\n"
|
||||
"// return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Matrix3x3\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -139,54 +96,39 @@ static const char* solveFrictionCL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_linear;\n"
|
||||
@@ -195,34 +137,13 @@ static const char* solveFrictionCL= \
|
||||
" float m_jacCoeffInv[4];\n"
|
||||
" float m_b[4];\n"
|
||||
" float m_appliedRambdaDt[4];\n"
|
||||
"\n"
|
||||
" float m_fJacCoeffInv[2]; \n"
|
||||
" float m_fAppliedRambdaDt[2]; \n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
"\n"
|
||||
" int m_batchIdx;\n"
|
||||
" u32 m_paddings[1];\n"
|
||||
"} Constraint4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nConstraints;\n"
|
||||
@@ -231,7 +152,6 @@ static const char* solveFrictionCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_solveFriction;\n"
|
||||
@@ -240,27 +160,20 @@ static const char* solveFrictionCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBufferBatchSolve;\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
|
||||
"{\n"
|
||||
" *linear = mymake_float4(-n.xyz,0.f);\n"
|
||||
" *angular0 = -cross3(r0, n);\n"
|
||||
" *angular1 = cross3(r1, n);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
|
||||
"{\n"
|
||||
" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
|
||||
"{\n"
|
||||
@@ -299,33 +212,26 @@ static const char* solveFrictionCL= \
|
||||
" q[0].z = a*k;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n"
|
||||
"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"
|
||||
"{\n"
|
||||
" float frictionCoeff = ldsCs[0].m_linear.w;\n"
|
||||
" int aIdx = ldsCs[0].m_bodyA;\n"
|
||||
" int bIdx = ldsCs[0].m_bodyB;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"
|
||||
" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"
|
||||
"\n"
|
||||
" float sum = 0;\n"
|
||||
" for(int j=0; j<4; j++)\n"
|
||||
" {\n"
|
||||
@@ -338,7 +244,6 @@ static const char* solveFrictionCL= \
|
||||
" minRambdaDt[j] = -maxRambdaDt[j];\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
|
||||
"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"
|
||||
" \n"
|
||||
@@ -399,9 +304,7 @@ static const char* solveFrictionCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (gBodies[aIdx].m_invMass)\n"
|
||||
" {\n"
|
||||
" gBodies[aIdx].m_linVel = linVelA;\n"
|
||||
@@ -421,25 +324,18 @@ static const char* solveFrictionCL= \
|
||||
" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_valInt0;\n"
|
||||
" int m_valInt1;\n"
|
||||
" int m_valInt2;\n"
|
||||
" int m_valInt3;\n"
|
||||
"\n"
|
||||
" float m_val0;\n"
|
||||
" float m_val1;\n"
|
||||
" float m_val2;\n"
|
||||
" float m_val3;\n"
|
||||
"} SolverDebugInfo;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void BatchSolveKernelFriction(__global Body* gBodies,\n"
|
||||
@@ -456,39 +352,29 @@ static const char* solveFrictionCL= \
|
||||
" __local int ldsCurBatch;\n"
|
||||
" __local int ldsNextBatch;\n"
|
||||
" __local int ldsStart;\n"
|
||||
"\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
"\n"
|
||||
"// int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"// debugInfo[gIdx].m_valInt0 = gIdx;\n"
|
||||
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n"
|
||||
" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n"
|
||||
" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n"
|
||||
" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n"
|
||||
" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if( gN[cellIdx] == 0 ) \n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" const int start = gOffsets[cellIdx];\n"
|
||||
" const int end = start + gN[cellIdx];\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" ldsCurBatch = 0;\n"
|
||||
" ldsNextBatch = 0;\n"
|
||||
" ldsStart = start;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" GROUP_LDS_BARRIER;\n"
|
||||
"\n"
|
||||
" int idx=ldsStart+lIdx;\n"
|
||||
" while (ldsCurBatch < maxBatch)\n"
|
||||
" {\n"
|
||||
@@ -496,9 +382,7 @@ static const char* solveFrictionCL= \
|
||||
" {\n"
|
||||
" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
||||
"\n"
|
||||
" idx+=64;\n"
|
||||
" } else\n"
|
||||
" {\n"
|
||||
|
||||
@@ -14,6 +14,7 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
@@ -403,22 +404,7 @@ typedef struct
|
||||
u32 m_paddings[1];
|
||||
} Constraint4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -525,7 +511,7 @@ void btPlaneSpace1 (float4 n, float4* p, float4* q);
|
||||
|
||||
void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
|
||||
const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB,
|
||||
__global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,
|
||||
__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
|
||||
Constraint4* dstC )
|
||||
{
|
||||
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
|
||||
@@ -622,7 +608,7 @@ typedef struct
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut,
|
||||
void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut,
|
||||
int nContacts,
|
||||
float dt,
|
||||
float positionDrift,
|
||||
|
||||
@@ -2,37 +2,71 @@
|
||||
static const char* solverSetupCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -46,22 +80,15 @@ static const char* solverSetupCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Vector\n"
|
||||
"///////////////////////////////////////\n"
|
||||
@@ -71,52 +98,43 @@ static const char* solverSetupCL= \
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"// return numerator/denominator; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
|
||||
"{\n"
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastSqrtf(float f2)\n"
|
||||
"{\n"
|
||||
" return native_sqrt(f2);\n"
|
||||
"// return sqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastRSqrt(float f2)\n"
|
||||
"{\n"
|
||||
" return native_rsqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastLength4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_length(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float sqrtf(float a)\n"
|
||||
"{\n"
|
||||
"// return sqrt(a);\n"
|
||||
" return native_sqrt(a);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -124,26 +142,22 @@ static const char* solverSetupCL= \
|
||||
" float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float length3(const float4 a)\n"
|
||||
"{\n"
|
||||
" return sqrtf(dot3F4(a,a));\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot4(const float4 a, const float4 b)\n"
|
||||
"{\n"
|
||||
" return dot( a, b );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// for height\n"
|
||||
"__inline\n"
|
||||
"float dot3w1(const float4 point, const float4 eqn)\n"
|
||||
"{\n"
|
||||
" return dot3F4(point,eqn) + eqn.w;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
@@ -152,14 +166,12 @@ static const char* solverSetupCL= \
|
||||
"// float length = sqrtf(dot3F4(a, a));\n"
|
||||
"// return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize4(const float4 a)\n"
|
||||
"{\n"
|
||||
" float length = sqrtf(dot4(a, a));\n"
|
||||
" return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 createEquation(const float4 a, const float4 b, const float4 c)\n"
|
||||
"{\n"
|
||||
@@ -170,34 +182,25 @@ static const char* solverSetupCL= \
|
||||
" eqn.w = -dot3F4(eqn,a);\n"
|
||||
" return eqn;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Matrix3x3\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero()\n"
|
||||
"{\n"
|
||||
@@ -207,7 +210,6 @@ static const char* solverSetupCL= \
|
||||
" m.m_row[2] = (float4)(0.f);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity()\n"
|
||||
"{\n"
|
||||
@@ -217,7 +219,6 @@ static const char* solverSetupCL= \
|
||||
" m.m_row[2] = (float4)(0,0,1,0);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
|
||||
"{\n"
|
||||
@@ -227,7 +228,6 @@ static const char* solverSetupCL= \
|
||||
" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
@@ -248,7 +248,6 @@ static const char* solverSetupCL= \
|
||||
" }\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -259,44 +258,32 @@ static const char* solverSetupCL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 qtGetRotationMatrix(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -307,7 +294,6 @@ static const char* solverSetupCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -324,67 +310,52 @@ static const char* solverSetupCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n"
|
||||
"{\n"
|
||||
" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
|
||||
" Matrix3x3 out;\n"
|
||||
"\n"
|
||||
" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
|
||||
" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
|
||||
" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
|
||||
" out.m_row[0].w = 0.f;\n"
|
||||
"\n"
|
||||
" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
|
||||
" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
|
||||
" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
|
||||
" out.m_row[1].w = 0.f;\n"
|
||||
"\n"
|
||||
" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
|
||||
" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
|
||||
" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
|
||||
" out.m_row[2].w = 0.f;\n"
|
||||
"\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_linear;\n"
|
||||
@@ -393,34 +364,13 @@ static const char* solverSetupCL= \
|
||||
" float m_jacCoeffInv[4];\n"
|
||||
" float m_b[4];\n"
|
||||
" float m_appliedRambdaDt[4];\n"
|
||||
"\n"
|
||||
" float m_fJacCoeffInv[2]; \n"
|
||||
" float m_fAppliedRambdaDt[2]; \n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
"\n"
|
||||
" int m_batchIdx;\n"
|
||||
" u32 m_paddings[1];\n"
|
||||
"} Constraint4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nConstraints;\n"
|
||||
@@ -429,7 +379,6 @@ static const char* solverSetupCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_solveFriction;\n"
|
||||
@@ -438,22 +387,16 @@ static const char* solverSetupCL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBufferBatchSolve;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
|
||||
"{\n"
|
||||
" *linear = make_float4(-n.xyz,0.f);\n"
|
||||
" *angular0 = -cross3(r0, n);\n"
|
||||
" *angular1 = cross3(r1, n);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
|
||||
"{\n"
|
||||
" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
|
||||
"{\n"
|
||||
@@ -465,26 +408,17 @@ static const char* solverSetupCL= \
|
||||
" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
|
||||
"}\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_valInt0;\n"
|
||||
" int m_valInt1;\n"
|
||||
" int m_valInt2;\n"
|
||||
" int m_valInt3;\n"
|
||||
"\n"
|
||||
" float m_val0;\n"
|
||||
" float m_val1;\n"
|
||||
" float m_val2;\n"
|
||||
" float m_val3;\n"
|
||||
"} SolverDebugInfo;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nContacts;\n"
|
||||
@@ -492,8 +426,6 @@ static const char* solverSetupCL= \
|
||||
" float m_scale;\n"
|
||||
" int m_nSplit;\n"
|
||||
"} ConstBufferSSD;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
@@ -522,84 +454,68 @@ static const char* solverSetupCL= \
|
||||
" q[0].z = a*k;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n"
|
||||
" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"
|
||||
" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,\n"
|
||||
" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n"
|
||||
" Constraint4* dstC )\n"
|
||||
"{\n"
|
||||
" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n"
|
||||
" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" float dtInv = 1.f/dt;\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
|
||||
" }\n"
|
||||
" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" dstC->m_linear = -src->m_worldNormal;\n"
|
||||
" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" float4 r0 = src->m_worldPos[ic] - posA;\n"
|
||||
" float4 r1 = src->m_worldPos[ic] - posB;\n"
|
||||
"\n"
|
||||
" if( ic >= src->m_worldNormal.w )//npoints\n"
|
||||
" {\n"
|
||||
" dstC->m_jacCoeffInv[ic] = 0.f;\n"
|
||||
" continue;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" float relVelN;\n"
|
||||
" {\n"
|
||||
" float4 linear, angular0, angular1;\n"
|
||||
" setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n"
|
||||
"\n"
|
||||
" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
|
||||
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
|
||||
"\n"
|
||||
" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
|
||||
" linVelA, angVelA, linVelB, angVelB);\n"
|
||||
"\n"
|
||||
" float e = 0.f;//src->getRestituitionCoeff();\n"
|
||||
" if( relVelN*relVelN < 0.004f ) e = 0.f;\n"
|
||||
"\n"
|
||||
" dstC->m_b[ic] = e*relVelN;\n"
|
||||
" //float penetration = src->m_worldPos[ic].w;\n"
|
||||
" dstC->m_b[ic] += (src->m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"
|
||||
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( src->m_worldNormal.w > 0 )//npoints\n"
|
||||
" { // prepare friction\n"
|
||||
" float4 center = make_float4(0.f);\n"
|
||||
" for(int i=0; i<src->m_worldNormal.w; i++) \n"
|
||||
" center += src->m_worldPos[i];\n"
|
||||
" center /= (float)src->m_worldNormal.w;\n"
|
||||
"\n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" \n"
|
||||
" float4 r[2];\n"
|
||||
" r[0] = center - posA;\n"
|
||||
" r[1] = center - posB;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<2; i++)\n"
|
||||
" {\n"
|
||||
" float4 linear, angular0, angular1;\n"
|
||||
" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
|
||||
"\n"
|
||||
" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
|
||||
" invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
|
||||
" dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
|
||||
" }\n"
|
||||
" dstC->m_center = center;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" for(int i=0; i<4; i++)\n"
|
||||
" {\n"
|
||||
" if( i<src->m_worldNormal.w )\n"
|
||||
@@ -612,7 +528,6 @@ static const char* solverSetupCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nContacts;\n"
|
||||
@@ -620,10 +535,9 @@ static const char* solverSetupCL= \
|
||||
" float m_positionDrift;\n"
|
||||
" float m_positionConstraintCoeff;\n"
|
||||
"} ConstBufferCTC;\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n"
|
||||
"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, \n"
|
||||
"int nContacts,\n"
|
||||
"float dt,\n"
|
||||
"float positionDrift,\n"
|
||||
@@ -636,33 +550,23 @@ static const char* solverSetupCL= \
|
||||
" {\n"
|
||||
" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n"
|
||||
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" Constraint4 cs;\n"
|
||||
"\n"
|
||||
" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"
|
||||
" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n"
|
||||
" &cs );\n"
|
||||
" \n"
|
||||
" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n"
|
||||
"\n"
|
||||
" gConstraintOut[gIdx] = cs;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -14,6 +14,8 @@ subject to the following restrictions:
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
@@ -377,22 +379,7 @@ typedef struct
|
||||
u32 m_paddings[1];
|
||||
} Constraint4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -435,7 +422,7 @@ typedef struct
|
||||
// others
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )
|
||||
void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )
|
||||
{
|
||||
int nContacts = cb.x;
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
@@ -448,7 +435,7 @@ void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __globa
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)
|
||||
void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
|
||||
@@ -462,7 +449,7 @@ void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global i
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
|
||||
@@ -478,7 +465,7 @@ void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global i
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
|
||||
@@ -496,7 +483,7 @@ void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* s
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
|
||||
@@ -552,7 +539,7 @@ static __constant const int gridTable8x8[] =
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
||||
void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
||||
int nContacts,float scale,int4 nSplit,int staticIdx)
|
||||
|
||||
{
|
||||
@@ -613,7 +600,7 @@ int nContacts,float scale,int4 nSplit,int staticIdx)
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )
|
||||
void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
if( gIdx < cb.x )
|
||||
|
||||
@@ -2,37 +2,71 @@
|
||||
static const char* solverSetup2CL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Takahiro Harada\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -46,22 +80,15 @@ static const char* solverSetup2CL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Vector\n"
|
||||
"///////////////////////////////////////\n"
|
||||
@@ -71,52 +98,43 @@ static const char* solverSetup2CL= \
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"// return numerator/denominator; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
|
||||
"{\n"
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastSqrtf(float f2)\n"
|
||||
"{\n"
|
||||
" return native_sqrt(f2);\n"
|
||||
"// return sqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastRSqrt(float f2)\n"
|
||||
"{\n"
|
||||
" return native_rsqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastLength4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_length(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float sqrtf(float a)\n"
|
||||
"{\n"
|
||||
"// return sqrt(a);\n"
|
||||
" return native_sqrt(a);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -124,26 +142,22 @@ static const char* solverSetup2CL= \
|
||||
" float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float length3(const float4 a)\n"
|
||||
"{\n"
|
||||
" return sqrtf(dot3F4(a,a));\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot4(const float4 a, const float4 b)\n"
|
||||
"{\n"
|
||||
" return dot( a, b );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// for height\n"
|
||||
"__inline\n"
|
||||
"float dot3w1(const float4 point, const float4 eqn)\n"
|
||||
"{\n"
|
||||
" return dot3F4(point,eqn) + eqn.w;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
@@ -152,14 +166,12 @@ static const char* solverSetup2CL= \
|
||||
"// float length = sqrtf(dot3F4(a, a));\n"
|
||||
"// return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize4(const float4 a)\n"
|
||||
"{\n"
|
||||
" float length = sqrtf(dot4(a, a));\n"
|
||||
" return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 createEquation(const float4 a, const float4 b, const float4 c)\n"
|
||||
"{\n"
|
||||
@@ -170,34 +182,25 @@ static const char* solverSetup2CL= \
|
||||
" eqn.w = -dot3F4(eqn,a);\n"
|
||||
" return eqn;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Matrix3x3\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero()\n"
|
||||
"{\n"
|
||||
@@ -207,7 +210,6 @@ static const char* solverSetup2CL= \
|
||||
" m.m_row[2] = (float4)(0.f);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity()\n"
|
||||
"{\n"
|
||||
@@ -217,7 +219,6 @@ static const char* solverSetup2CL= \
|
||||
" m.m_row[2] = (float4)(0,0,1,0);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
|
||||
"{\n"
|
||||
@@ -227,7 +228,6 @@ static const char* solverSetup2CL= \
|
||||
" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
@@ -248,7 +248,6 @@ static const char* solverSetup2CL= \
|
||||
" }\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -259,43 +258,30 @@ static const char* solverSetup2CL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -306,7 +292,6 @@ static const char* solverSetup2CL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -323,43 +308,33 @@ static const char* solverSetup2CL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_linear;\n"
|
||||
@@ -368,34 +343,13 @@ static const char* solverSetup2CL= \
|
||||
" float m_jacCoeffInv[4];\n"
|
||||
" float m_b[4];\n"
|
||||
" float m_appliedRambdaDt[4];\n"
|
||||
"\n"
|
||||
" float m_fJacCoeffInv[2]; \n"
|
||||
" float m_fAppliedRambdaDt[2]; \n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
"\n"
|
||||
" int m_batchIdx;\n"
|
||||
" u32 m_paddings[1];\n"
|
||||
"} Constraint4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nConstraints;\n"
|
||||
@@ -404,7 +358,6 @@ static const char* solverSetup2CL= \
|
||||
" int m_nSplit;\n"
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_solveFriction;\n"
|
||||
@@ -414,46 +367,34 @@ static const char* solverSetup2CL= \
|
||||
"// int m_paddings[1];\n"
|
||||
"} ConstBufferBatchSolve;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_valInt0;\n"
|
||||
" int m_valInt1;\n"
|
||||
" int m_valInt2;\n"
|
||||
" int m_valInt3;\n"
|
||||
"\n"
|
||||
" float m_val0;\n"
|
||||
" float m_val1;\n"
|
||||
" float m_val2;\n"
|
||||
" float m_val3;\n"
|
||||
"} SolverDebugInfo;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// others\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )\n"
|
||||
"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
|
||||
"{\n"
|
||||
" int nContacts = cb.x;\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int srcIdx = sortData[gIdx].y;\n"
|
||||
" out[gIdx] = in[srcIdx];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)\n"
|
||||
"void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int2 sd;\n"
|
||||
@@ -462,12 +403,10 @@ static const char* solverSetup2CL= \
|
||||
" sortDataOut[gIdx] = sd;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int2 sdIn;\n"
|
||||
@@ -478,12 +417,10 @@ static const char* solverSetup2CL= \
|
||||
" sortDataInOut[gIdx] = sdOut;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int2 sdIn;\n"
|
||||
@@ -494,14 +431,11 @@ static const char* solverSetup2CL= \
|
||||
" sortDataInOut[gIdx] = sdOut;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
"\n"
|
||||
" if( gIdx < nContacts )\n"
|
||||
" {\n"
|
||||
" int2 sdIn;\n"
|
||||
@@ -512,10 +446,6 @@ static const char* solverSetup2CL= \
|
||||
" sortDataInOut[gIdx] = sdOut;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" int m_nContacts;\n"
|
||||
@@ -523,8 +453,6 @@ static const char* solverSetup2CL= \
|
||||
" float m_scale;\n"
|
||||
" int m_nSplit;\n"
|
||||
"} ConstBufferSSD;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"static __constant const int gridTable4x4[] = \n"
|
||||
"{\n"
|
||||
" 0,1,17,16,\n"
|
||||
@@ -532,7 +460,6 @@ static const char* solverSetup2CL= \
|
||||
" 17,18,32,3,\n"
|
||||
" 16,19,3,34\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"static __constant const int gridTable8x8[] = \n"
|
||||
"{\n"
|
||||
" 0, 2, 3, 16, 17, 18, 19, 1,\n"
|
||||
@@ -545,18 +472,12 @@ static const char* solverSetup2CL= \
|
||||
" 197,27,214,213,212,199,198,196\n"
|
||||
" \n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define USE_SPATIAL_BATCHING 1\n"
|
||||
"#define USE_4x4_GRID 1\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
||||
"void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
||||
"int nContacts,float scale,int4 nSplit,int staticIdx)\n"
|
||||
"\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" \n"
|
||||
@@ -564,13 +485,10 @@ static const char* solverSetup2CL= \
|
||||
" {\n"
|
||||
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
|
||||
" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int aIdx = abs(aPtrAndSignBit );\n"
|
||||
" int bIdx = abs(bPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
|
||||
" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n"
|
||||
"\n"
|
||||
"#if USE_SPATIAL_BATCHING \n"
|
||||
" int idx = (aStatic)? bIdx: aIdx;\n"
|
||||
" float4 p = gBodies[idx].m_pos;\n"
|
||||
@@ -587,7 +505,6 @@ static const char* solverSetup2CL= \
|
||||
" aa = bb;\n"
|
||||
" if (bStatic)\n"
|
||||
" bb = aa;\n"
|
||||
"\n"
|
||||
" int gridIndex = aa + bb*4;\n"
|
||||
" int newIndex = gridTable4x4[gridIndex];\n"
|
||||
" #else//USE_4x4_GRID\n"
|
||||
@@ -597,13 +514,10 @@ static const char* solverSetup2CL= \
|
||||
" aa = bb;\n"
|
||||
" if (bStatic)\n"
|
||||
" bb = aa;\n"
|
||||
"\n"
|
||||
" int gridIndex = aa + bb*8;\n"
|
||||
" int newIndex = gridTable8x8[gridIndex];\n"
|
||||
" #endif//USE_4x4_GRID\n"
|
||||
"#endif//USE_SPATIAL_BATCHING\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" gSortDataOut[gIdx].x = newIndex;\n"
|
||||
" gSortDataOut[gIdx].y = gIdx;\n"
|
||||
" }\n"
|
||||
@@ -612,10 +526,9 @@ static const char* solverSetup2CL= \
|
||||
" gSortDataOut[gIdx].x = 0xffffffff;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )\n"
|
||||
"void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" if( gIdx < cb.x )\n"
|
||||
@@ -623,7 +536,4 @@ static const char* solverSetup2CL= \
|
||||
" gOut[gIdx] = gIn[gIdx];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
;
|
||||
|
||||
@@ -13,6 +13,8 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
@@ -380,25 +382,10 @@ typedef struct
|
||||
u32 m_paddings;
|
||||
} Constraint4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
} Contact4;
|
||||
|
||||
|
||||
__kernel void CountBodiesKernel(__global Contact4* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)
|
||||
|
||||
__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)
|
||||
{
|
||||
int i = GET_GLOBAL_IDX;
|
||||
|
||||
@@ -844,7 +831,7 @@ __kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* of
|
||||
|
||||
void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
|
||||
const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB,
|
||||
__global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,
|
||||
__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,
|
||||
Constraint4* dstC )
|
||||
{
|
||||
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
|
||||
@@ -934,7 +921,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe
|
||||
|
||||
__kernel
|
||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||
void ContactToConstraintSplitKernel(__global const Contact4* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut,
|
||||
void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut,
|
||||
__global const unsigned int* bodyCount,
|
||||
int nContacts,
|
||||
float dt,
|
||||
|
||||
@@ -2,36 +2,71 @@
|
||||
static const char* solverUtilsCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2013 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"#ifndef B3_CONTACT4DATA_H\n"
|
||||
"#define B3_CONTACT4DATA_H\n"
|
||||
"#ifndef B3_FLOAT4_H\n"
|
||||
"#define B3_FLOAT4_H\n"
|
||||
"#ifndef B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"#define B3_PLATFORM_DEFINITIONS_H\n"
|
||||
"struct MyTest\n"
|
||||
"{\n"
|
||||
" int bla;\n"
|
||||
"};\n"
|
||||
"#endif\n"
|
||||
"#ifdef __cplusplus\n"
|
||||
"#else//bla\n"
|
||||
" typedef float4 b3Float4;\n"
|
||||
"#endif \n"
|
||||
"#endif //B3_FLOAT4_H\n"
|
||||
"typedef struct b3Contact4Data b3Contact4Data_t;\n"
|
||||
"struct b3Contact4Data\n"
|
||||
"{\n"
|
||||
" b3Float4 m_worldPos[4];\n"
|
||||
"// b3Float4 m_localPosB[4];\n"
|
||||
" b3Float4 m_worldNormal; // w: m_nPoints\n"
|
||||
" unsigned short m_restituitionCoeffCmp;\n"
|
||||
" unsigned short m_frictionCoeffCmp;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
" b3Float4 m_localPosA;\n"
|
||||
"};\n"
|
||||
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
|
||||
"{\n"
|
||||
" return (int)contact->m_worldNormal.w;\n"
|
||||
"};\n"
|
||||
"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
|
||||
"{\n"
|
||||
" contact->m_worldNormal.w = (float)numPoints;\n"
|
||||
"};\n"
|
||||
"#endif //B3_CONTACT4DATA_H\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
@@ -45,22 +80,15 @@ static const char* solverUtilsCL= \
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Vector\n"
|
||||
"///////////////////////////////////////\n"
|
||||
@@ -70,57 +98,47 @@ static const char* solverUtilsCL= \
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"// return numerator/denominator; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastDiv4(float4 numerator, float4 denominator)\n"
|
||||
"{\n"
|
||||
" return native_divide(numerator, denominator); \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastSqrtf(float f2)\n"
|
||||
"{\n"
|
||||
" return native_sqrt(f2);\n"
|
||||
"// return sqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastRSqrt(float f2)\n"
|
||||
"{\n"
|
||||
" return native_rsqrt(f2);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float fastLength4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_length(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 fastNormalize4(float4 v)\n"
|
||||
"{\n"
|
||||
" return fast_normalize(v);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float sqrtf(float a)\n"
|
||||
"{\n"
|
||||
"// return sqrt(a);\n"
|
||||
" return native_sqrt(a);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a1, float4 b1)\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" float4 a=make_float4(a1.xyz,0.f);\n"
|
||||
" float4 b=make_float4(b1.xyz,0.f);\n"
|
||||
" //float4 a=a1;\n"
|
||||
" //float4 b=b1;\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -128,26 +146,22 @@ static const char* solverUtilsCL= \
|
||||
" float4 b1 = make_float4(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float length3(const float4 a)\n"
|
||||
"{\n"
|
||||
" return sqrtf(dot3F4(a,a));\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot4(const float4 a, const float4 b)\n"
|
||||
"{\n"
|
||||
" return dot( a, b );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// for height\n"
|
||||
"__inline\n"
|
||||
"float dot3w1(const float4 point, const float4 eqn)\n"
|
||||
"{\n"
|
||||
" return dot3F4(point,eqn) + eqn.w;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize3(const float4 a)\n"
|
||||
"{\n"
|
||||
@@ -156,14 +170,12 @@ static const char* solverUtilsCL= \
|
||||
"// float length = sqrtf(dot3F4(a, a));\n"
|
||||
"// return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 normalize4(const float4 a)\n"
|
||||
"{\n"
|
||||
" float length = sqrtf(dot4(a, a));\n"
|
||||
" return 1.f/length * a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 createEquation(const float4 a, const float4 b, const float4 c)\n"
|
||||
"{\n"
|
||||
@@ -174,34 +186,25 @@ static const char* solverUtilsCL= \
|
||||
" eqn.w = -dot3F4(eqn,a);\n"
|
||||
" return eqn;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Matrix3x3\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"}Matrix3x3;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity();\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtZero()\n"
|
||||
"{\n"
|
||||
@@ -211,7 +214,6 @@ static const char* solverUtilsCL= \
|
||||
" m.m_row[2] = (float4)(0.f);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtIdentity()\n"
|
||||
"{\n"
|
||||
@@ -221,7 +223,6 @@ static const char* solverUtilsCL= \
|
||||
" m.m_row[2] = (float4)(0,0,1,0);\n"
|
||||
" return m;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
|
||||
"{\n"
|
||||
@@ -231,7 +232,6 @@ static const char* solverUtilsCL= \
|
||||
" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
@@ -252,7 +252,6 @@ static const char* solverUtilsCL= \
|
||||
" }\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul1(Matrix3x3 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -263,43 +262,30 @@ static const char* solverUtilsCL= \
|
||||
" ans.w = 0.f;\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 mtMul3(float4 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
|
||||
" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
|
||||
" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
|
||||
"\n"
|
||||
" float4 ans;\n"
|
||||
" ans.x = dot3F4( a, colx );\n"
|
||||
" ans.y = dot3F4( a, coly );\n"
|
||||
" ans.z = dot3F4( a, colz );\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"// Quaternion\n"
|
||||
"///////////////////////////////////////\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec);\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -310,7 +296,6 @@ static const char* solverUtilsCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtNormalize(Quaternion in)\n"
|
||||
"{\n"
|
||||
@@ -327,43 +312,33 @@ static const char* solverUtilsCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
" return qtRotate( qtInvert( q ), vec );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" Quaternion m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_shapeIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_linear;\n"
|
||||
@@ -372,35 +347,14 @@ static const char* solverUtilsCL= \
|
||||
" float m_jacCoeffInv[4];\n"
|
||||
" float m_b[4];\n"
|
||||
" float m_appliedRambdaDt[4];\n"
|
||||
"\n"
|
||||
" float m_fJacCoeffInv[2]; \n"
|
||||
" float m_fAppliedRambdaDt[2]; \n"
|
||||
"\n"
|
||||
" u32 m_bodyA;\n"
|
||||
" u32 m_bodyB;\n"
|
||||
" int m_batchIdx;\n"
|
||||
" u32 m_paddings;\n"
|
||||
"} Constraint4;\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"\n"
|
||||
" int m_childIndexA;\n"
|
||||
" int m_childIndexB;\n"
|
||||
" int m_unused1;\n"
|
||||
" int m_unused2;\n"
|
||||
"\n"
|
||||
"} Contact4;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void CountBodiesKernel(__global Contact4* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n"
|
||||
"__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n"
|
||||
"{\n"
|
||||
" int i = GET_GLOBAL_IDX;\n"
|
||||
" \n"
|
||||
@@ -423,7 +377,6 @@ static const char* solverUtilsCL= \
|
||||
" } \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n"
|
||||
"{\n"
|
||||
" int i = GET_GLOBAL_IDX;\n"
|
||||
@@ -434,8 +387,6 @@ static const char* solverUtilsCL= \
|
||||
" angularVelocities[i] = make_float4(0);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n"
|
||||
"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n"
|
||||
"{\n"
|
||||
@@ -465,23 +416,16 @@ static const char* solverUtilsCL= \
|
||||
" }//bodies[i].m_invMass\n"
|
||||
" }//i<numBodies\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
|
||||
"{\n"
|
||||
" *linear = make_float4(-n.xyz,0.f);\n"
|
||||
" *angular0 = -cross3(r0, n);\n"
|
||||
" *angular1 = cross3(r1, n);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
|
||||
"{\n"
|
||||
" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
|
||||
" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n"
|
||||
"{\n"
|
||||
@@ -492,8 +436,6 @@ static const char* solverUtilsCL= \
|
||||
" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
|
||||
" return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
|
||||
" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
|
||||
"{\n"
|
||||
@@ -522,11 +464,6 @@ static const char* solverUtilsCL= \
|
||||
" q[0].z = a*k;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void solveContact(__global Constraint4* cs,\n"
|
||||
" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"
|
||||
" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n"
|
||||
@@ -534,23 +471,18 @@ static const char* solverUtilsCL= \
|
||||
"{\n"
|
||||
" float minRambdaDt = 0;\n"
|
||||
" float maxRambdaDt = FLT_MAX;\n"
|
||||
"\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n"
|
||||
"\n"
|
||||
" float4 angular0, angular1, linear;\n"
|
||||
" float4 r0 = cs->m_worldPos[ic] - posA;\n"
|
||||
" float4 r1 = cs->m_worldPos[ic] - posB;\n"
|
||||
" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n"
|
||||
" *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n"
|
||||
" rambdaDt *= cs->m_jacCoeffInv[ic];\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" {\n"
|
||||
" float prevSum = cs->m_appliedRambdaDt[ic];\n"
|
||||
" float updated = prevSum;\n"
|
||||
@@ -561,13 +493,11 @@ static const char* solverUtilsCL= \
|
||||
" cs->m_appliedRambdaDt[ic] = updated;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float4 linImp0 = invMassA*linear*rambdaDt;\n"
|
||||
" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"
|
||||
" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"
|
||||
" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if (invMassA)\n"
|
||||
" {\n"
|
||||
" *dLinVelA += linImp0;\n"
|
||||
@@ -580,33 +510,25 @@ static const char* solverUtilsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n"
|
||||
"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n"
|
||||
"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" //float frictionCoeff = ldsCs[0].m_linear.w;\n"
|
||||
" int aIdx = ldsCs[0].m_bodyA;\n"
|
||||
" int bIdx = ldsCs[0].m_bodyB;\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" float4 dLinVelA = make_float4(0,0,0,0);\n"
|
||||
" float4 dAngVelA = make_float4(0,0,0,0);\n"
|
||||
" float4 dLinVelB = make_float4(0,0,0,0);\n"
|
||||
@@ -621,20 +543,16 @@ static const char* solverUtilsCL= \
|
||||
" dLinVelA = deltaLinearVelocities[splitIndexA];\n"
|
||||
" dAngVelA = deltaAngularVelocities[splitIndexA];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" int bodyOffsetB = offsetSplitBodies[bIdx];\n"
|
||||
" int constraintOffsetB = contactConstraintOffsets[0].y;\n"
|
||||
" int splitIndexB= bodyOffsetB+constraintOffsetB;\n"
|
||||
"\n"
|
||||
" if (invMassB)\n"
|
||||
" {\n"
|
||||
" dLinVelB = deltaLinearVelocities[splitIndexB];\n"
|
||||
" dAngVelB = deltaAngularVelocities[splitIndexB];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
|
||||
" posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n"
|
||||
"\n"
|
||||
" if (invMassA)\n"
|
||||
" {\n"
|
||||
" deltaLinearVelocities[splitIndexA] = dLinVelA;\n"
|
||||
@@ -645,10 +563,7 @@ static const char* solverUtilsCL= \
|
||||
" deltaLinearVelocities[splitIndexB] = dLinVelB;\n"
|
||||
" deltaAngularVelocities[splitIndexB] = dAngVelB;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n"
|
||||
"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n"
|
||||
"float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n"
|
||||
@@ -660,10 +575,6 @@ static const char* solverUtilsCL= \
|
||||
" solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n"
|
||||
" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n"
|
||||
" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n"
|
||||
@@ -671,21 +582,17 @@ static const char* solverUtilsCL= \
|
||||
" float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n"
|
||||
" int aIdx = ldsCs[0].m_bodyA;\n"
|
||||
" int bIdx = ldsCs[0].m_bodyB;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
" \n"
|
||||
"\n"
|
||||
" float4 dLinVelA = make_float4(0,0,0,0);\n"
|
||||
" float4 dAngVelA = make_float4(0,0,0,0);\n"
|
||||
" float4 dLinVelB = make_float4(0,0,0,0);\n"
|
||||
@@ -700,24 +607,17 @@ static const char* solverUtilsCL= \
|
||||
" dLinVelA = deltaLinearVelocities[splitIndexA];\n"
|
||||
" dAngVelA = deltaAngularVelocities[splitIndexA];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" int bodyOffsetB = offsetSplitBodies[bIdx];\n"
|
||||
" int constraintOffsetB = contactConstraintOffsets[0].y;\n"
|
||||
" int splitIndexB= bodyOffsetB+constraintOffsetB;\n"
|
||||
"\n"
|
||||
" if (invMassB)\n"
|
||||
" {\n"
|
||||
" dLinVelB = deltaLinearVelocities[splitIndexB];\n"
|
||||
" dAngVelB = deltaAngularVelocities[splitIndexB];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" {\n"
|
||||
" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"
|
||||
" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"
|
||||
"\n"
|
||||
" float sum = 0;\n"
|
||||
" for(int j=0; j<4; j++)\n"
|
||||
" {\n"
|
||||
@@ -730,7 +630,6 @@ static const char* solverUtilsCL= \
|
||||
" minRambdaDt[j] = -maxRambdaDt[j];\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
|
||||
"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"
|
||||
" \n"
|
||||
@@ -791,9 +690,7 @@ static const char* solverUtilsCL= \
|
||||
" }\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (invMassA)\n"
|
||||
" {\n"
|
||||
" deltaLinearVelocities[splitIndexA] = dLinVelA;\n"
|
||||
@@ -805,10 +702,7 @@ static const char* solverUtilsCL= \
|
||||
" deltaAngularVelocities[splitIndexB] = dAngVelB;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n"
|
||||
" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n"
|
||||
" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n"
|
||||
@@ -821,8 +715,6 @@ static const char* solverUtilsCL= \
|
||||
" solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n"
|
||||
" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n"
|
||||
"{\n"
|
||||
@@ -841,85 +733,68 @@ static const char* solverUtilsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n"
|
||||
" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"
|
||||
" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n"
|
||||
" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n"
|
||||
" Constraint4* dstC )\n"
|
||||
"{\n"
|
||||
" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n"
|
||||
" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" float dtInv = 1.f/dt;\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
|
||||
" }\n"
|
||||
" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" dstC->m_linear = -src->m_worldNormal;\n"
|
||||
" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n"
|
||||
" for(int ic=0; ic<4; ic++)\n"
|
||||
" {\n"
|
||||
" float4 r0 = src->m_worldPos[ic] - posA;\n"
|
||||
" float4 r1 = src->m_worldPos[ic] - posB;\n"
|
||||
"\n"
|
||||
" if( ic >= src->m_worldNormal.w )//npoints\n"
|
||||
" {\n"
|
||||
" dstC->m_jacCoeffInv[ic] = 0.f;\n"
|
||||
" continue;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" float relVelN;\n"
|
||||
" {\n"
|
||||
" float4 linear, angular0, angular1;\n"
|
||||
" setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n"
|
||||
"\n"
|
||||
" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
|
||||
" invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n"
|
||||
"\n"
|
||||
" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
|
||||
" linVelA, angVelA, linVelB, angVelB);\n"
|
||||
"\n"
|
||||
" float e = 0.f;//src->getRestituitionCoeff();\n"
|
||||
" if( relVelN*relVelN < 0.004f ) e = 0.f;\n"
|
||||
"\n"
|
||||
" dstC->m_b[ic] = e*relVelN;\n"
|
||||
" //float penetration = src->m_worldPos[ic].w;\n"
|
||||
" dstC->m_b[ic] += (src->m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"
|
||||
" dstC->m_appliedRambdaDt[ic] = 0.f;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if( src->m_worldNormal.w > 0 )//npoints\n"
|
||||
" { // prepare friction\n"
|
||||
" float4 center = make_float4(0.f);\n"
|
||||
" for(int i=0; i<src->m_worldNormal.w; i++) \n"
|
||||
" center += src->m_worldPos[i];\n"
|
||||
" center /= (float)src->m_worldNormal.w;\n"
|
||||
"\n"
|
||||
" float4 tangent[2];\n"
|
||||
" btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
|
||||
" \n"
|
||||
" float4 r[2];\n"
|
||||
" r[0] = center - posA;\n"
|
||||
" r[1] = center - posB;\n"
|
||||
"\n"
|
||||
" for(int i=0; i<2; i++)\n"
|
||||
" {\n"
|
||||
" float4 linear, angular0, angular1;\n"
|
||||
" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
|
||||
"\n"
|
||||
" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
|
||||
" invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n"
|
||||
" dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
|
||||
" }\n"
|
||||
" dstC->m_center = center;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" for(int i=0; i<4; i++)\n"
|
||||
" {\n"
|
||||
" if( i<src->m_worldNormal.w )\n"
|
||||
@@ -932,11 +807,9 @@ static const char* solverUtilsCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel\n"
|
||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||
"void ContactToConstraintSplitKernel(__global const Contact4* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n"
|
||||
"void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n"
|
||||
"__global const unsigned int* bodyCount,\n"
|
||||
"int nContacts,\n"
|
||||
"float dt,\n"
|
||||
@@ -950,30 +823,24 @@ static const char* solverUtilsCL= \
|
||||
" {\n"
|
||||
" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n"
|
||||
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
|
||||
"\n"
|
||||
" float4 posA = gBodies[aIdx].m_pos;\n"
|
||||
" float4 linVelA = gBodies[aIdx].m_linVel;\n"
|
||||
" float4 angVelA = gBodies[aIdx].m_angVel;\n"
|
||||
" float invMassA = gBodies[aIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" float4 posB = gBodies[bIdx].m_pos;\n"
|
||||
" float4 linVelB = gBodies[bIdx].m_linVel;\n"
|
||||
" float4 angVelB = gBodies[bIdx].m_angVel;\n"
|
||||
" float invMassB = gBodies[bIdx].m_invMass;\n"
|
||||
" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
|
||||
"\n"
|
||||
" Constraint4 cs;\n"
|
||||
"\n"
|
||||
" float countA = invMassA ? (float)bodyCount[aIdx] : 1;\n"
|
||||
" float countB = invMassB ? (float)bodyCount[bIdx] : 1;\n"
|
||||
"\n"
|
||||
" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"
|
||||
" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n"
|
||||
" &cs );\n"
|
||||
" \n"
|
||||
" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n"
|
||||
"\n"
|
||||
" gConstraintOut[gIdx] = cs;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* updateAabbsKernelCL= \
|
||||
"#define SHAPE_CONVEX_HULL 3\n"
|
||||
"\n"
|
||||
"typedef float4 Quaternion;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 cross3(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
" return cross(a,b);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float dot3F4(float4 a, float4 b)\n"
|
||||
"{\n"
|
||||
@@ -17,8 +14,6 @@ static const char* updateAabbsKernelCL= \
|
||||
" float4 b1 = (float4)(b.xyz,0.f);\n"
|
||||
" return dot(a1, b1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
|
||||
"{\n"
|
||||
@@ -28,13 +23,11 @@ static const char* updateAabbsKernelCL= \
|
||||
" ans.w = a.w*b.w - dot3F4(a, b);\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Quaternion qtInvert(Quaternion q)\n"
|
||||
"{\n"
|
||||
" return (Quaternion)(-q.xyz, q.w);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 qtRotate(Quaternion q, float4 vec)\n"
|
||||
"{\n"
|
||||
@@ -44,34 +37,27 @@ static const char* updateAabbsKernelCL= \
|
||||
" float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
|
||||
"{\n"
|
||||
" return qtRotate( *orientation, *p ) + (*translation);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_row[3];\n"
|
||||
"} Matrix3x3;\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" float4 m_pos;\n"
|
||||
" float4 m_quat;\n"
|
||||
" float4 m_linVel;\n"
|
||||
" float4 m_angVel;\n"
|
||||
"\n"
|
||||
" u32 m_collidableIdx;\n"
|
||||
" float m_invMass;\n"
|
||||
" float m_restituitionCoeff;\n"
|
||||
" float m_frictionCoeff;\n"
|
||||
"} Body;\n"
|
||||
"\n"
|
||||
"typedef struct Collidable\n"
|
||||
"{\n"
|
||||
" int m_unused1;\n"
|
||||
@@ -79,40 +65,30 @@ static const char* updateAabbsKernelCL= \
|
||||
" int m_shapeType;\n"
|
||||
" int m_shapeIndex;\n"
|
||||
"} Collidable;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" Matrix3x3 m_invInertia;\n"
|
||||
" Matrix3x3 m_initInvInertia;\n"
|
||||
"} Shape;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 qtGetRotationMatrix(float4 quat)\n"
|
||||
"{\n"
|
||||
" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
|
||||
" Matrix3x3 out;\n"
|
||||
"\n"
|
||||
" out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);\n"
|
||||
" out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);\n"
|
||||
" out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);\n"
|
||||
" out.m_row[0].w = 0.f;\n"
|
||||
"\n"
|
||||
" out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);\n"
|
||||
" out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);\n"
|
||||
" out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);\n"
|
||||
" out.m_row[1].w = 0.f;\n"
|
||||
"\n"
|
||||
" out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);\n"
|
||||
" out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);\n"
|
||||
" out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);\n"
|
||||
" out.m_row[2].w = 0.f;\n"
|
||||
"\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float fx;\n"
|
||||
@@ -120,7 +96,6 @@ static const char* updateAabbsKernelCL= \
|
||||
" float fz;\n"
|
||||
" int uw;\n"
|
||||
"} btAABBCL;\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
|
||||
"{\n"
|
||||
@@ -130,9 +105,6 @@ static const char* updateAabbsKernelCL= \
|
||||
" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__inline\n"
|
||||
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
|
||||
"{\n"
|
||||
@@ -153,8 +125,6 @@ static const char* updateAabbsKernelCL= \
|
||||
" }\n"
|
||||
" return ans;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n"
|
||||
"{\n"
|
||||
" int nodeID = get_global_id(0);\n"
|
||||
|
||||
221
test/OpenCL/KernelLaunch/main.cpp
Normal file
221
test/OpenCL/KernelLaunch/main.cpp
Normal file
@@ -0,0 +1,221 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
|
||||
typedef b3Vector3 b3Float4;
|
||||
typedef struct b3Contact4Data b3Contact4Data_t;
|
||||
struct b3Contact4Data
|
||||
{
|
||||
b3Float4 m_worldPos[4];
|
||||
b3Float4 m_localPosA[4];
|
||||
b3Float4 m_localPosB[4];
|
||||
b3Float4 m_worldNormal; // w: m_nPoints
|
||||
unsigned short m_restituitionCoeffCmp;
|
||||
unsigned short m_frictionCoeffCmp;
|
||||
int m_batchIdx;
|
||||
int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
|
||||
int m_bodyBPtrAndSignBit;
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
|
||||
static const char* s_testKernelString= MSTRINGIFY(
|
||||
|
||||
struct MyTest
|
||||
{
|
||||
int bla;
|
||||
};
|
||||
|
||||
typedef float4 b3Float4;
|
||||
typedef struct b3Contact4Data b3Contact4Data_t;
|
||||
struct b3Contact4Data
|
||||
{
|
||||
b3Float4 m_worldPos[4];
|
||||
b3Float4 m_localPosA[4];
|
||||
b3Float4 m_localPosB[4];
|
||||
b3Float4 m_worldNormal; // w: m_nPoints
|
||||
unsigned short m_restituitionCoeffCmp;
|
||||
unsigned short m_frictionCoeffCmp;
|
||||
int m_batchIdx;
|
||||
int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
|
||||
int m_bodyBPtrAndSignBit;
|
||||
int m_childIndexA;
|
||||
int m_childIndexB;
|
||||
int m_unused1;
|
||||
int m_unused2;
|
||||
|
||||
};
|
||||
inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)
|
||||
{
|
||||
return (int)contact->m_worldNormal.w;
|
||||
};
|
||||
inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)
|
||||
{
|
||||
contact->m_worldNormal.w = (float)numPoints;
|
||||
};
|
||||
|
||||
typedef volatile __global int* my_counter32_t;
|
||||
|
||||
|
||||
__kernel void testKernel( __global int* testData, __global b3Contact4Data_t* contactData, my_counter32_t numElements)
|
||||
{
|
||||
int id = get_local_id(0);
|
||||
int sz = sizeof(b3Contact4Data_t);
|
||||
testData[id]=sz;
|
||||
|
||||
__private b3Contact4Data_t tmp;
|
||||
if (id==0)
|
||||
{
|
||||
tmp = contactData[1];
|
||||
contactData[1] = contactData[0];
|
||||
contactData[0] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
);
|
||||
|
||||
|
||||
|
||||
#include "Bullet3Common/b3Logging.h"
|
||||
|
||||
|
||||
void myprintf(const char* msg)
|
||||
{
|
||||
//OutputDebugStringA(msg);
|
||||
printf(msg);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
b3SetCustomPrintfFunc(myprintf);
|
||||
//b3SetCustomWarningMessageFunc(myprintf);
|
||||
//b3SetCustomErrorMessageFunc(myprintf);
|
||||
|
||||
b3Printf("test b3Printf\n");
|
||||
b3Warning("test warning\n");
|
||||
b3Error("test error\n");
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
|
||||
const char* vendorSDK = b3OpenCLUtils::getSdkVendorName();
|
||||
|
||||
b3Printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
|
||||
int numPlatforms = b3OpenCLUtils::getNumPlatforms();
|
||||
b3Printf("Num Platforms = %d\n", numPlatforms);
|
||||
|
||||
for (int i=0;i<numPlatforms;i++)
|
||||
{
|
||||
cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
|
||||
b3OpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
|
||||
b3Printf("--------------------------------\n");
|
||||
b3Printf("Platform info for platform nr %d:\n",i);
|
||||
b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
|
||||
b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
|
||||
b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
|
||||
|
||||
cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
|
||||
if (context)
|
||||
{
|
||||
int numDevices = b3OpenCLUtils::getNumDevices(context);
|
||||
b3Printf("Num Devices = %d\n", numDevices);
|
||||
for (int j=0;j<numDevices;j++)
|
||||
{
|
||||
cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
|
||||
b3OpenCLDeviceInfo devInfo;
|
||||
b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
|
||||
b3OpenCLUtils::printDeviceInfo(dev);
|
||||
|
||||
int errNum;
|
||||
|
||||
cl_command_queue queue = clCreateCommandQueue(context, dev, 0, &errNum);
|
||||
|
||||
|
||||
cl_program pairBenchProg=0;
|
||||
|
||||
cl_kernel testKernel = b3OpenCLUtils::compileCLKernelFromString(context,dev,s_testKernelString,"testKernel",&errNum,pairBenchProg);
|
||||
if (testKernel)
|
||||
{
|
||||
printf("kernel compiled ok\n");
|
||||
|
||||
int numWorkItems = 64;
|
||||
b3OpenCLArray<int> deviceElements(context,queue);
|
||||
b3OpenCLArray<int> atomicCounter(context,queue);
|
||||
b3OpenCLArray<b3Contact4Data> deviceContacts(context,queue);
|
||||
b3AlignedObjectArray<b3Contact4Data> hostContacts;
|
||||
|
||||
b3Contact4Data tmp;
|
||||
int sz = sizeof(b3Contact4Data);
|
||||
memset(&tmp,1,sz);
|
||||
deviceContacts.push_back(tmp);
|
||||
b3Contact4Data tmp2 = tmp;
|
||||
memset(&tmp,2,sz);
|
||||
deviceContacts.push_back(tmp);
|
||||
b3Contact4Data tmp3 = tmp;
|
||||
|
||||
|
||||
atomicCounter.push_back(0);
|
||||
deviceElements.resize(numWorkItems);
|
||||
b3LauncherCL run(queue,testKernel);
|
||||
run.setBuffer(deviceElements.getBufferCL());
|
||||
run.setBuffer(deviceContacts.getBufferCL());
|
||||
run.setBuffer(atomicCounter.getBufferCL());
|
||||
|
||||
run.launch1D(numWorkItems);
|
||||
|
||||
b3AlignedObjectArray<int> hostElements;
|
||||
deviceElements.copyToHost(hostElements);
|
||||
deviceContacts.copyToHost(hostContacts);
|
||||
tmp2 = hostContacts[0];
|
||||
tmp3 = hostContacts[1];
|
||||
|
||||
|
||||
printf("...\n");
|
||||
|
||||
} else
|
||||
{
|
||||
printf("kernel failed to compile\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseContext(context);
|
||||
}
|
||||
|
||||
b3Printf("\npress <Enter>\n");
|
||||
getchar();
|
||||
return 0;
|
||||
}
|
||||
33
test/OpenCL/KernelLaunch/premake4.lua
Normal file
33
test/OpenCL/KernelLaunch/premake4.lua
Normal file
@@ -0,0 +1,33 @@
|
||||
function createProject(vendor)
|
||||
|
||||
hasCL = findOpenCL(vendor)
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project ("Test_OpenCL_kernel_launch_" .. vendor)
|
||||
|
||||
initOpenCL(vendor)
|
||||
|
||||
language "C++"
|
||||
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
includedirs {"../../../src"}
|
||||
|
||||
files {
|
||||
"main.cpp",
|
||||
"../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.cpp",
|
||||
"../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h",
|
||||
"../../../src/Bullet3Common/b3Logging.cpp",
|
||||
}
|
||||
|
||||
end
|
||||
end
|
||||
createProject("clew")
|
||||
createProject("Apple")
|
||||
createProject("AMD")
|
||||
createProject("Intel")
|
||||
createProject("NVIDIA")
|
||||
Reference in New Issue
Block a user