diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index 0521244ca..4d99cb339 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -15,10 +15,14 @@ #include "GpuRigidBodyDemoInternalData.h" #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" + static b3KeyboardCallback oldCallback = 0; extern bool gReset; +bool convertOnCpu = false; + #define MSTRINGIFY(A) #A static const char* s_rigidBodyKernelString = MSTRINGIFY( @@ -213,15 +217,36 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() if (numObjects) { - B3_PROFILE("cl2gl_convert"); - int ciErrNum = 0; - cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); - b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); - launch.setBuffer(bodies); - launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); - launch.setConst(numObjects); - launch.launch1D(numObjects); - oclCHECKERROR(ciErrNum, CL_SUCCESS); + if (convertOnCpu) + { + b3GpuNarrowPhaseInternalData* npData = m_data->m_np->getInternalData(); + npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU); + + b3AlignedObjectArray vboCPU; + m_data->m_instancePosOrnColor->copyToHost(vboCPU); + + for (int i=0;im_bodyBufferCPU->at(i).m_pos; + b3Quat orn = npData->m_bodyBufferCPU->at(i).m_quat; + pos.w = 1.f; + vboCPU[i] = pos; + vboCPU[i + numObjects] = (b3Vector4&)orn; + } + m_data->m_instancePosOrnColor->copyFromHost(vboCPU); + + } else + { + B3_PROFILE("cl2gl_convert"); + int ciErrNum = 0; + cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); + b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); + launch.setBuffer(bodies); + launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); + launch.setConst(numObjects); + launch.launch1D(numObjects); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } } if (animate && numObjects) diff --git a/src/Bullet3Common/shared/b3PlatformDefinitions.h b/src/Bullet3Common/shared/b3PlatformDefinitions.h index 2c62236f0..51af689f8 100644 --- a/src/Bullet3Common/shared/b3PlatformDefinitions.h +++ b/src/Bullet3Common/shared/b3PlatformDefinitions.h @@ -12,6 +12,9 @@ struct MyTest #else #define b3AtomicInc atomic_inc #define b3Fabs fabs +#define b3Sqrt native_sqrt +#define b3Sin native_sin +#define b3Cos native_cos #endif #endif diff --git a/src/Bullet3Common/shared/b3Quat.h b/src/Bullet3Common/shared/b3Quat.h index 49b3d5b77..8f2fe8301 100644 --- a/src/Bullet3Common/shared/b3Quat.h +++ b/src/Bullet3Common/shared/b3Quat.h @@ -31,7 +31,7 @@ inline float4 b3FastNormalize4(float4 v) } inline b3Quat b3QuatMul(b3Quat a, b3Quat b); -inline b3Quat b3QuatNormalize(b3QuatConstArg in); +inline b3Quat b3QuatNormalized(b3QuatConstArg in); inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec); inline b3Quat b3QuatInvert(b3QuatConstArg q); inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) @@ -44,8 +44,10 @@ inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) return ans; } -inline b3Quat b3QuatNormalize(b3QuatConstArg in) +inline b3Quat b3QuatNormalized(b3QuatConstArg in) { + b3Quat q; + q=in; //return b3FastNormalize4(in); float len = native_sqrt(dot(q, q)); if(len > 0.f) diff --git a/src/Bullet3Dynamics/shared/b3IntegrateTransforms.h b/src/Bullet3Dynamics/shared/b3IntegrateTransforms.h index 6eeec3964..fd8d529f3 100644 --- a/src/Bullet3Dynamics/shared/b3IntegrateTransforms.h +++ b/src/Bullet3Dynamics/shared/b3IntegrateTransforms.h @@ -3,7 +3,63 @@ #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -inline void b3IntegrateTransform( b3RigidBodyData* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration) + +inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration) +{ + + if (bodies[nodeID].m_invMass != 0.f) + { + float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); + + //angular velocity + { + b3Float4 axis; + //add some hardcoded angular damping + bodies[nodeID].m_angVel.x *= angularDamping; + bodies[nodeID].m_angVel.y *= angularDamping; + bodies[nodeID].m_angVel.z *= angularDamping; + + b3Float4 angvel = bodies[nodeID].m_angVel; + + float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel)); + + //limit the angular motion + if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) + { + fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; + } + if(fAngle < 0.001f) + { + // use Taylor's expansions of sync function + axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle); + } + else + { + // sync(fAngle) = sin(c*fAngle)/t + axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle); + } + + b3Quat dorn; + dorn.x = axis.x; + dorn.y = axis.y; + dorn.z = axis.z; + dorn.w = b3Cos(fAngle * timeStep * 0.5f); + b3Quat orn0 = bodies[nodeID].m_quat; + b3Quat predictedOrn = b3QuatMul(dorn, orn0); + predictedOrn = b3QuatNormalized(predictedOrn); + bodies[nodeID].m_quat=predictedOrn; + } + //linear velocity + bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep; + + //apply gravity + bodies[nodeID].m_linVel += gravityAcceleration * timeStep; + + } + +} + +inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration) { float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); @@ -53,4 +109,5 @@ inline void b3IntegrateTransform( b3RigidBodyData* body, float timeStep, float a body->m_pos += body->m_linVel * timeStep; } + } diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index b21de63f9..03f0480d1 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -14,6 +14,9 @@ static const char* primitiveContactsKernelsCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 65d36410c..e9ad48d7c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -46,6 +46,9 @@ static const char* satClipKernelsCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index de850b116..9ff2f67ea 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -155,6 +155,9 @@ static const char* satKernelsCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -210,7 +213,7 @@ static const char* satKernelsCL= \ "}\n" " \n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" @@ -222,8 +225,10 @@ static const char* satKernelsCL= \ " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " return ans;\n" "}\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" "{\n" +" b3Quat q;\n" +" q=in;\n" " //return b3FastNormalize4(in);\n" " float len = native_sqrt(dot(q, q));\n" " if(len > 0.f)\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp index d52ea4a97..d086bc628 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp @@ -908,7 +908,7 @@ int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f return -1; } - m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1); + m_data->m_bodyBufferCPU->resize(m_data->m_numAcceleratedRigidBodies+1); b3RigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h index e29e362ce..6d315be6d 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h @@ -97,6 +97,11 @@ public: return m_data; } + b3GpuNarrowPhaseInternalData* getInternalData() + { + return m_data; + } + const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; }; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 07a1d8084..dbe81d500 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -40,6 +40,7 @@ subject to the following restrictions: bool dumpContactStats = false; bool calcWorldSpaceAabbOnCpu = true; bool useCalculateOverlappingPairsHost = true; + bool integrateOnCpu = true; #else bool useDbvt = false;//true; @@ -47,6 +48,8 @@ subject to the following restrictions: bool dumpContactStats = false; bool calcWorldSpaceAabbOnCpu = false;//true; bool useCalculateOverlappingPairsHost = false; + bool integrateOnCpu = false; + #endif #ifdef TEST_OTHER_GPU_SOLVER @@ -63,7 +66,9 @@ subject to the following restrictions: #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" - + +#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) { @@ -430,22 +435,39 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) } + void b3GpuRigidBodyPipeline::integrate(float timeStep) { //integrate - - b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel); - launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - launcher.setConst(numBodies); - launcher.setConst(timeStep); float angularDamp = 0.99f; - launcher.setConst(angularDamp); - - - launcher.setConst(m_data->m_gravity); - launcher.launch1D(numBodies); + if (integrateOnCpu) + { + if(numBodies) + { + b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData(); + npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU); + + b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0); + + for (int nodeID=0;nodeIDm_gravity); + } + npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU); + } + } else + { + b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel); + launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); + + launcher.setConst(numBodies); + launcher.setConst(timeStep); + launcher.setConst(angularDamp); + launcher.setConst(m_data->m_gravity); + launcher.launch1D(numBodies); + } } diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index db11067d9..83625d585 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -18,7 +18,7 @@ subject to the following restrictions: ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments bool useNewBatchingKernel = false; -bool convertConstraintOnCpu = false;//true; +bool convertConstraintOnCpu = false; #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index a19c57e40..2b910591a 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -26,6 +26,9 @@ static const char* batchingKernelsCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 175e16d1a..91c4d828b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -26,6 +26,9 @@ static const char* batchingKernelsNewCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl index d589efdab..e22bc9bc3 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl @@ -16,19 +16,7 @@ subject to the following restrictions: #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -float4 quatMult(float4 q1, float4 q2) -{ - float4 q; - q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y; - q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z; - q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x; - q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; - return q; -} - - - - +#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" @@ -36,48 +24,9 @@ __kernel void integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) { int nodeID = get_global_id(0); - float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); - if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f)) + + if( nodeID < numNodes) { - //angular velocity - { - float4 axis; - //add some hardcoded angular damping - bodies[nodeID].m_angVel.x *= angularDamping; - bodies[nodeID].m_angVel.y *= angularDamping; - bodies[nodeID].m_angVel.z *= angularDamping; - - float4 angvel = bodies[nodeID].m_angVel; - float fAngle = native_sqrt(dot(angvel, angvel)); - //limit the angular motion - if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) - { - fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; - } - if(fAngle < 0.001f) - { - // use Taylor's expansions of sync function - axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle); - } - else - { - // sync(fAngle) = sin(c*fAngle)/t - axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle); - } - float4 dorn = axis; - dorn.w = native_cos(fAngle * timeStep * 0.5f); - float4 orn0 = bodies[nodeID].m_quat; - - float4 predictedOrn = quatMult(dorn, orn0); - predictedOrn = b3QuatNormalize(predictedOrn); - bodies[nodeID].m_quat=predictedOrn; - } - - //linear velocity - bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep; - - //apply gravity - bodies[nodeID].m_linVel += gravityAcceleration * timeStep; - + integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration); } } diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h index 360478e13..6460d65ec 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h @@ -26,6 +26,9 @@ static const char* integrateKernelCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -79,7 +82,7 @@ static const char* integrateKernelCL= \ "}\n" " \n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" @@ -91,8 +94,10 @@ static const char* integrateKernelCL= \ " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " return ans;\n" "}\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" "{\n" +" b3Quat q;\n" +" q=in;\n" " //return b3FastNormalize4(in);\n" " float len = native_sqrt(dot(q, q));\n" " if(len > 0.f)\n" @@ -273,32 +278,26 @@ static const char* integrateKernelCL= \ "};\n" "#endif //B3_RIGIDBODY_DATA_H\n" " \n" -"float4 quatMult(float4 q1, float4 q2)\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" "{\n" -" float4 q;\n" -" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n" -" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n" -" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n" -" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n" -" return q;\n" -"}\n" -"__kernel void \n" -" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" -"{\n" -" int nodeID = get_global_id(0);\n" -" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" -" if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))\n" +" \n" +" if (bodies[nodeID].m_invMass != 0.f)\n" " {\n" +" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" " //angular velocity\n" " {\n" -" float4 axis;\n" +" b3Float4 axis;\n" " //add some hardcoded angular damping\n" " bodies[nodeID].m_angVel.x *= angularDamping;\n" " bodies[nodeID].m_angVel.y *= angularDamping;\n" " bodies[nodeID].m_angVel.z *= angularDamping;\n" " \n" -" float4 angvel = bodies[nodeID].m_angVel;\n" -" float fAngle = native_sqrt(dot(angvel, angvel));\n" +" b3Float4 angvel = bodies[nodeID].m_angVel;\n" +" float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" +" \n" " //limit the angular motion\n" " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" " {\n" @@ -312,13 +311,17 @@ static const char* integrateKernelCL= \ " else\n" " {\n" " // sync(fAngle) = sin(c*fAngle)/t\n" -" axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);\n" +" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" " }\n" -" float4 dorn = axis;\n" -" dorn.w = native_cos(fAngle * timeStep * 0.5f);\n" -" float4 orn0 = bodies[nodeID].m_quat;\n" -" float4 predictedOrn = quatMult(dorn, orn0);\n" -" predictedOrn = b3QuatNormalize(predictedOrn);\n" +" \n" +" b3Quat dorn;\n" +" dorn.x = axis.x;\n" +" dorn.y = axis.y;\n" +" dorn.z = axis.z;\n" +" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" +" b3Quat orn0 = bodies[nodeID].m_quat;\n" +" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" +" predictedOrn = b3QuatNormalized(predictedOrn);\n" " bodies[nodeID].m_quat=predictedOrn;\n" " }\n" " //linear velocity \n" @@ -328,5 +331,65 @@ static const char* integrateKernelCL= \ " bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" " \n" " }\n" +" \n" +"}\n" +"inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" +"{\n" +" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" +" \n" +" if( (body->m_invMass != 0.f))\n" +" {\n" +" //angular velocity\n" +" {\n" +" b3Float4 axis;\n" +" //add some hardcoded angular damping\n" +" body->m_angVel.x *= angularDamping;\n" +" body->m_angVel.y *= angularDamping;\n" +" body->m_angVel.z *= angularDamping;\n" +" \n" +" b3Float4 angvel = body->m_angVel;\n" +" float fAngle = b3Sqrt(b3Dot(angvel, angvel));\n" +" //limit the angular motion\n" +" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" +" {\n" +" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" +" }\n" +" if(fAngle < 0.001f)\n" +" {\n" +" // use Taylor's expansions of sync function\n" +" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" +" }\n" +" else\n" +" {\n" +" // sync(fAngle) = sin(c*fAngle)/t\n" +" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" +" }\n" +" b3Quat dorn;\n" +" dorn.x = axis.x;\n" +" dorn.y = axis.y;\n" +" dorn.z = axis.z;\n" +" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" +" b3Quat orn0 = body->m_quat;\n" +" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" +" predictedOrn = b3QuatNormalized(predictedOrn);\n" +" body->m_quat=predictedOrn;\n" +" }\n" +" //apply gravity\n" +" body->m_linVel += gravityAcceleration * timeStep;\n" +" //linear velocity \n" +" body->m_pos += body->m_linVel * timeStep;\n" +" \n" +" }\n" +" \n" +"}\n" +"__kernel void \n" +" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" +"{\n" +" int nodeID = get_global_id(0);\n" +" \n" +" if( nodeID < numNodes)\n" +" {\n" +" integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" +" }\n" "}\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index 226607745..d854dfe97 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -26,6 +26,9 @@ static const char* solverSetupCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -141,7 +144,7 @@ static const char* solverSetupCL= \ "}\n" " \n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" @@ -153,8 +156,10 @@ static const char* solverSetupCL= \ " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " return ans;\n" "}\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" "{\n" +" b3Quat q;\n" +" q=in;\n" " //return b3FastNormalize4(in);\n" " float len = native_sqrt(dot(q, q));\n" " if(len > 0.f)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index d3df012c1..c16c71685 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -26,6 +26,9 @@ static const char* solverSetup2CL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index e2a907e63..c0263f4fb 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -26,6 +26,9 @@ static const char* solverUtilsCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index d66ccdd36..a560dc324 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -16,6 +16,9 @@ static const char* updateAabbsKernelCL= \ "#else\n" "#define b3AtomicInc atomic_inc\n" "#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -71,7 +74,7 @@ static const char* updateAabbsKernelCL= \ "}\n" " \n" "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" @@ -83,8 +86,10 @@ static const char* updateAabbsKernelCL= \ " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " return ans;\n" "}\n" -"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" "{\n" +" b3Quat q;\n" +" q=in;\n" " //return b3FastNormalize4(in);\n" " float len = native_sqrt(dot(q, q));\n" " if(len > 0.f)\n"