share more data structures and code between OpenCL kernels and C/C++ code on CPU (non-OpenCL)

integrateSingleTransform
fix bug in registerRigidBody (could lead to random crashes, especially when performing picking/adding rigid bodies afterwards)
This commit is contained in:
erwin coumans
2013-11-07 15:49:57 -08:00
parent e20cb22832
commit e85bae5fe3
19 changed files with 270 additions and 111 deletions

View File

@@ -15,10 +15,14 @@
#include "GpuRigidBodyDemoInternalData.h" #include "GpuRigidBodyDemoInternalData.h"
#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
static b3KeyboardCallback oldCallback = 0; static b3KeyboardCallback oldCallback = 0;
extern bool gReset; extern bool gReset;
bool convertOnCpu = false;
#define MSTRINGIFY(A) #A #define MSTRINGIFY(A) #A
static const char* s_rigidBodyKernelString = MSTRINGIFY( static const char* s_rigidBodyKernelString = MSTRINGIFY(
@@ -213,15 +217,36 @@ void GpuRigidBodyDemo::clientMoveAndDisplay()
if (numObjects) if (numObjects)
{ {
B3_PROFILE("cl2gl_convert"); if (convertOnCpu)
int ciErrNum = 0; {
cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); b3GpuNarrowPhaseInternalData* npData = m_data->m_np->getInternalData();
b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
launch.setBuffer(bodies);
launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); b3AlignedObjectArray<b3Vector4> vboCPU;
launch.setConst(numObjects); m_data->m_instancePosOrnColor->copyToHost(vboCPU);
launch.launch1D(numObjects);
oclCHECKERROR(ciErrNum, CL_SUCCESS); for (int i=0;i<numObjects;i++)
{
b3Vector4 pos = (const b3Vector4&)npData->m_bodyBufferCPU->at(i).m_pos;
b3Quat orn = npData->m_bodyBufferCPU->at(i).m_quat;
pos.w = 1.f;
vboCPU[i] = pos;
vboCPU[i + numObjects] = (b3Vector4&)orn;
}
m_data->m_instancePosOrnColor->copyFromHost(vboCPU);
} else
{
B3_PROFILE("cl2gl_convert");
int ciErrNum = 0;
cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer();
b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel);
launch.setBuffer(bodies);
launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL());
launch.setConst(numObjects);
launch.launch1D(numObjects);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
} }
if (animate && numObjects) if (animate && numObjects)

View File

@@ -12,6 +12,9 @@ struct MyTest
#else #else
#define b3AtomicInc atomic_inc #define b3AtomicInc atomic_inc
#define b3Fabs fabs #define b3Fabs fabs
#define b3Sqrt native_sqrt
#define b3Sin native_sin
#define b3Cos native_cos
#endif #endif
#endif #endif

View File

@@ -31,7 +31,7 @@ inline float4 b3FastNormalize4(float4 v)
} }
inline b3Quat b3QuatMul(b3Quat a, b3Quat b); inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
inline b3Quat b3QuatNormalize(b3QuatConstArg in); inline b3Quat b3QuatNormalized(b3QuatConstArg in);
inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec); inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
inline b3Quat b3QuatInvert(b3QuatConstArg q); inline b3Quat b3QuatInvert(b3QuatConstArg q);
inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
@@ -44,8 +44,10 @@ inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
return ans; return ans;
} }
inline b3Quat b3QuatNormalize(b3QuatConstArg in) inline b3Quat b3QuatNormalized(b3QuatConstArg in)
{ {
b3Quat q;
q=in;
//return b3FastNormalize4(in); //return b3FastNormalize4(in);
float len = native_sqrt(dot(q, q)); float len = native_sqrt(dot(q, q));
if(len > 0.f) if(len > 0.f)

View File

@@ -3,7 +3,63 @@
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
inline void b3IntegrateTransform( b3RigidBodyData* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
{
if (bodies[nodeID].m_invMass != 0.f)
{
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
//angular velocity
{
b3Float4 axis;
//add some hardcoded angular damping
bodies[nodeID].m_angVel.x *= angularDamping;
bodies[nodeID].m_angVel.y *= angularDamping;
bodies[nodeID].m_angVel.z *= angularDamping;
b3Float4 angvel = bodies[nodeID].m_angVel;
float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
//limit the angular motion
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
{
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
}
if(fAngle < 0.001f)
{
// use Taylor's expansions of sync function
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);
}
b3Quat dorn;
dorn.x = axis.x;
dorn.y = axis.y;
dorn.z = axis.z;
dorn.w = b3Cos(fAngle * timeStep * 0.5f);
b3Quat orn0 = bodies[nodeID].m_quat;
b3Quat predictedOrn = b3QuatMul(dorn, orn0);
predictedOrn = b3QuatNormalized(predictedOrn);
bodies[nodeID].m_quat=predictedOrn;
}
//linear velocity
bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;
//apply gravity
bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
}
}
inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
{ {
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
@@ -53,4 +109,5 @@ inline void b3IntegrateTransform( b3RigidBodyData* body, float timeStep, float a
body->m_pos += body->m_linVel * timeStep; body->m_pos += body->m_linVel * timeStep;
} }
} }

View File

@@ -14,6 +14,9 @@ static const char* primitiveContactsKernelsCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -46,6 +46,9 @@ static const char* satClipKernelsCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -155,6 +155,9 @@ static const char* satKernelsCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
@@ -210,7 +213,7 @@ static const char* satKernelsCL= \
"}\n" "}\n"
" \n" " \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
@@ -222,8 +225,10 @@ static const char* satKernelsCL= \
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n" " return ans;\n"
"}\n" "}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
"{\n" "{\n"
" b3Quat q;\n"
" q=in;\n"
" //return b3FastNormalize4(in);\n" " //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n" " float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n" " if(len > 0.f)\n"

View File

@@ -908,7 +908,7 @@ int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
return -1; return -1;
} }
m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1); m_data->m_bodyBufferCPU->resize(m_data->m_numAcceleratedRigidBodies+1);
b3RigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); b3RigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies);

View File

@@ -97,6 +97,11 @@ public:
return m_data; return m_data;
} }
b3GpuNarrowPhaseInternalData* getInternalData()
{
return m_data;
}
const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
}; };

View File

@@ -40,6 +40,7 @@ subject to the following restrictions:
bool dumpContactStats = false; bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = true; bool calcWorldSpaceAabbOnCpu = true;
bool useCalculateOverlappingPairsHost = true; bool useCalculateOverlappingPairsHost = true;
bool integrateOnCpu = true;
#else #else
bool useDbvt = false;//true; bool useDbvt = false;//true;
@@ -47,6 +48,8 @@ subject to the following restrictions:
bool dumpContactStats = false; bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = false;//true; bool calcWorldSpaceAabbOnCpu = false;//true;
bool useCalculateOverlappingPairsHost = false; bool useCalculateOverlappingPairsHost = false;
bool integrateOnCpu = false;
#endif #endif
#ifdef TEST_OTHER_GPU_SOLVER #ifdef TEST_OTHER_GPU_SOLVER
@@ -63,7 +66,9 @@ subject to the following restrictions:
#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
#include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
{ {
@@ -430,22 +435,39 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
} }
void b3GpuRigidBodyPipeline::integrate(float timeStep) void b3GpuRigidBodyPipeline::integrate(float timeStep)
{ {
//integrate //integrate
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel);
launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
int numBodies = m_data->m_narrowphase->getNumRigidBodies(); int numBodies = m_data->m_narrowphase->getNumRigidBodies();
launcher.setConst(numBodies);
launcher.setConst(timeStep);
float angularDamp = 0.99f; float angularDamp = 0.99f;
launcher.setConst(angularDamp);
launcher.setConst(m_data->m_gravity);
launcher.launch1D(numBodies); if (integrateOnCpu)
{
if(numBodies)
{
b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData();
npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0);
for (int nodeID=0;nodeID<numBodies;nodeID++)
{
integrateSingleTransform( bodies,nodeID, timeStep, angularDamp, m_data->m_gravity);
}
npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU);
}
} else
{
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel);
launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
launcher.setConst(numBodies);
launcher.setConst(timeStep);
launcher.setConst(angularDamp);
launcher.setConst(m_data->m_gravity);
launcher.launch1D(numBodies);
}
} }

View File

@@ -18,7 +18,7 @@ subject to the following restrictions:
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
bool useNewBatchingKernel = false; bool useNewBatchingKernel = false;
bool convertConstraintOnCpu = false;//true; bool convertConstraintOnCpu = false;
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"

View File

@@ -26,6 +26,9 @@ static const char* batchingKernelsCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -26,6 +26,9 @@ static const char* batchingKernelsNewCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -16,19 +16,7 @@ subject to the following restrictions:
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
float4 quatMult(float4 q1, float4 q2) #include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
{
float4 q;
q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
return q;
}
@@ -36,48 +24,9 @@ __kernel void
integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)
{ {
int nodeID = get_global_id(0); int nodeID = get_global_id(0);
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f)) if( nodeID < numNodes)
{ {
//angular velocity integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);
{
float4 axis;
//add some hardcoded angular damping
bodies[nodeID].m_angVel.x *= angularDamping;
bodies[nodeID].m_angVel.y *= angularDamping;
bodies[nodeID].m_angVel.z *= angularDamping;
float4 angvel = bodies[nodeID].m_angVel;
float fAngle = native_sqrt(dot(angvel, angvel));
//limit the angular motion
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
{
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
}
if(fAngle < 0.001f)
{
// use Taylor's expansions of sync function
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);
}
float4 dorn = axis;
dorn.w = native_cos(fAngle * timeStep * 0.5f);
float4 orn0 = bodies[nodeID].m_quat;
float4 predictedOrn = quatMult(dorn, orn0);
predictedOrn = b3QuatNormalize(predictedOrn);
bodies[nodeID].m_quat=predictedOrn;
}
//linear velocity
bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;
//apply gravity
bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
} }
} }

View File

@@ -26,6 +26,9 @@ static const char* integrateKernelCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
@@ -79,7 +82,7 @@ static const char* integrateKernelCL= \
"}\n" "}\n"
" \n" " \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
@@ -91,8 +94,10 @@ static const char* integrateKernelCL= \
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n" " return ans;\n"
"}\n" "}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
"{\n" "{\n"
" b3Quat q;\n"
" q=in;\n"
" //return b3FastNormalize4(in);\n" " //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n" " float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n" " if(len > 0.f)\n"
@@ -273,32 +278,26 @@ static const char* integrateKernelCL= \
"};\n" "};\n"
"#endif //B3_RIGIDBODY_DATA_H\n" "#endif //B3_RIGIDBODY_DATA_H\n"
" \n" " \n"
"float4 quatMult(float4 q1, float4 q2)\n" "#ifndef B3_RIGIDBODY_DATA_H\n"
"#endif //B3_RIGIDBODY_DATA_H\n"
" \n"
"inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n"
"{\n" "{\n"
" float4 q;\n" " \n"
" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n" " if (bodies[nodeID].m_invMass != 0.f)\n"
" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n"
" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n"
" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n"
" return q;\n"
"}\n"
"__kernel void \n"
" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
"{\n"
" int nodeID = get_global_id(0);\n"
" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
" if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))\n"
" {\n" " {\n"
" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
" //angular velocity\n" " //angular velocity\n"
" {\n" " {\n"
" float4 axis;\n" " b3Float4 axis;\n"
" //add some hardcoded angular damping\n" " //add some hardcoded angular damping\n"
" bodies[nodeID].m_angVel.x *= angularDamping;\n" " bodies[nodeID].m_angVel.x *= angularDamping;\n"
" bodies[nodeID].m_angVel.y *= angularDamping;\n" " bodies[nodeID].m_angVel.y *= angularDamping;\n"
" bodies[nodeID].m_angVel.z *= angularDamping;\n" " bodies[nodeID].m_angVel.z *= angularDamping;\n"
" \n" " \n"
" float4 angvel = bodies[nodeID].m_angVel;\n" " b3Float4 angvel = bodies[nodeID].m_angVel;\n"
" float fAngle = native_sqrt(dot(angvel, angvel));\n" " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n"
" \n"
" //limit the angular motion\n" " //limit the angular motion\n"
" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n"
" {\n" " {\n"
@@ -312,13 +311,17 @@ static const char* integrateKernelCL= \
" else\n" " else\n"
" {\n" " {\n"
" // sync(fAngle) = sin(c*fAngle)/t\n" " // sync(fAngle) = sin(c*fAngle)/t\n"
" axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);\n" " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n"
" }\n" " }\n"
" float4 dorn = axis;\n" " \n"
" dorn.w = native_cos(fAngle * timeStep * 0.5f);\n" " b3Quat dorn;\n"
" float4 orn0 = bodies[nodeID].m_quat;\n" " dorn.x = axis.x;\n"
" float4 predictedOrn = quatMult(dorn, orn0);\n" " dorn.y = axis.y;\n"
" predictedOrn = b3QuatNormalize(predictedOrn);\n" " dorn.z = axis.z;\n"
" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n"
" b3Quat orn0 = bodies[nodeID].m_quat;\n"
" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n"
" predictedOrn = b3QuatNormalized(predictedOrn);\n"
" bodies[nodeID].m_quat=predictedOrn;\n" " bodies[nodeID].m_quat=predictedOrn;\n"
" }\n" " }\n"
" //linear velocity \n" " //linear velocity \n"
@@ -328,5 +331,65 @@ static const char* integrateKernelCL= \
" bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" " bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n"
" \n" " \n"
" }\n" " }\n"
" \n"
"}\n"
"inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n"
"{\n"
" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
" \n"
" if( (body->m_invMass != 0.f))\n"
" {\n"
" //angular velocity\n"
" {\n"
" b3Float4 axis;\n"
" //add some hardcoded angular damping\n"
" body->m_angVel.x *= angularDamping;\n"
" body->m_angVel.y *= angularDamping;\n"
" body->m_angVel.z *= angularDamping;\n"
" \n"
" b3Float4 angvel = body->m_angVel;\n"
" float fAngle = b3Sqrt(b3Dot(angvel, angvel));\n"
" //limit the angular motion\n"
" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n"
" {\n"
" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n"
" }\n"
" if(fAngle < 0.001f)\n"
" {\n"
" // use Taylor's expansions of sync function\n"
" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n"
" }\n"
" else\n"
" {\n"
" // sync(fAngle) = sin(c*fAngle)/t\n"
" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n"
" }\n"
" b3Quat dorn;\n"
" dorn.x = axis.x;\n"
" dorn.y = axis.y;\n"
" dorn.z = axis.z;\n"
" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n"
" b3Quat orn0 = body->m_quat;\n"
" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n"
" predictedOrn = b3QuatNormalized(predictedOrn);\n"
" body->m_quat=predictedOrn;\n"
" }\n"
" //apply gravity\n"
" body->m_linVel += gravityAcceleration * timeStep;\n"
" //linear velocity \n"
" body->m_pos += body->m_linVel * timeStep;\n"
" \n"
" }\n"
" \n"
"}\n"
"__kernel void \n"
" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
"{\n"
" int nodeID = get_global_id(0);\n"
" \n"
" if( nodeID < numNodes)\n"
" {\n"
" integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n"
" }\n"
"}\n" "}\n"
; ;

View File

@@ -26,6 +26,9 @@ static const char* solverSetupCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
@@ -141,7 +144,7 @@ static const char* solverSetupCL= \
"}\n" "}\n"
" \n" " \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
@@ -153,8 +156,10 @@ static const char* solverSetupCL= \
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n" " return ans;\n"
"}\n" "}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
"{\n" "{\n"
" b3Quat q;\n"
" q=in;\n"
" //return b3FastNormalize4(in);\n" " //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n" " float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n" " if(len > 0.f)\n"

View File

@@ -26,6 +26,9 @@ static const char* solverSetup2CL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -26,6 +26,9 @@ static const char* solverUtilsCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"

View File

@@ -16,6 +16,9 @@ static const char* updateAabbsKernelCL= \
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n"
"#define b3Cos native_cos\n"
"#endif\n" "#endif\n"
"#endif\n" "#endif\n"
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
@@ -71,7 +74,7 @@ static const char* updateAabbsKernelCL= \
"}\n" "}\n"
" \n" " \n"
"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
@@ -83,8 +86,10 @@ static const char* updateAabbsKernelCL= \
" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
" return ans;\n" " return ans;\n"
"}\n" "}\n"
"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
"{\n" "{\n"
" b3Quat q;\n"
" q=in;\n"
" //return b3FastNormalize4(in);\n" " //return b3FastNormalize4(in);\n"
" float len = native_sqrt(dot(q, q));\n" " float len = native_sqrt(dot(q, q));\n"
" if(len > 0.f)\n" " if(len > 0.f)\n"