From 9a7414f4e979333cecfc44e374492e15a47c7c0b Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Fri, 15 Mar 2013 16:27:23 -0700 Subject: [PATCH] cleanup of gpu rigid body (removed all Adl stuff) --- build/stringify.bat | 2 + demo/gpudemo/GpuDemo.cpp | 4 +- demo/gpudemo/GpuDemo.h | 6 +- demo/gpudemo/broadphase/PairBench.cpp | 6 +- demo/gpudemo/main_opengl3core.cpp | 5 + demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp | 262 +++++++++ demo/gpudemo/rigidbody/GpuRigidBodyDemo.h | 43 ++ .../gpu_broadphase/host/btGpuSapBroadphase.h | 15 +- opencl/gpu_broadphase/host/btSapAabb.h | 18 + opencl/gpu_rigidbody/host/btConfig.h | 34 ++ .../gpu_rigidbody/host/btGpuNarrowPhase.cpp | 504 ++++++++++++++++++ opencl/gpu_rigidbody/host/btGpuNarrowPhase.h | 76 +++ .../host/btGpuRigidBodyPipeline.cpp | 171 ++++++ .../host/btGpuRigidBodyPipeline.h | 38 ++ .../host/btGpuRigidBodyPipelineInternalData.h | 26 + .../gpu_rigidbody/kernels/integrateKernel.cl | 89 ++++ .../gpu_rigidbody/kernels/integrateKernel.h | 93 ++++ opencl/gpu_sat/host/btCollidable.h | 12 + opencl/gpu_sat/host/btRigidBodyCL.h | 2 +- 19 files changed, 1383 insertions(+), 23 deletions(-) create mode 100644 demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp create mode 100644 demo/gpudemo/rigidbody/GpuRigidBodyDemo.h create mode 100644 opencl/gpu_broadphase/host/btSapAabb.h create mode 100644 opencl/gpu_rigidbody/host/btConfig.h create mode 100644 opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp create mode 100644 opencl/gpu_rigidbody/host/btGpuNarrowPhase.h create mode 100644 opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp create mode 100644 opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h create mode 100644 opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h create mode 100644 opencl/gpu_rigidbody/kernels/integrateKernel.cl create mode 100644 opencl/gpu_rigidbody/kernels/integrateKernel.h diff --git a/build/stringify.bat b/build/stringify.bat index 0c04729f6..74fd737ee 100644 --- a/build/stringify.bat +++ b/build/stringify.bat @@ -14,6 +14,8 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kerne premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/sat.cl" --headerfile="../opencl/gpu_sat/kernels/satKernels.h" --stringname="satKernelsCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_sat/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/integrateKernel.cl" --headerfile="../opencl/gpu_rigidbody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify + pause \ No newline at end of file diff --git a/demo/gpudemo/GpuDemo.cpp b/demo/gpudemo/GpuDemo.cpp index afaef84da..ff257294c 100644 --- a/demo/gpudemo/GpuDemo.cpp +++ b/demo/gpudemo/GpuDemo.cpp @@ -40,9 +40,9 @@ void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex) int ciErrNum = 0; //#ifdef CL_PLATFORM_INTEL - cl_device_type deviceType = CL_DEVICE_TYPE_ALL; + //cl_device_type deviceType = CL_DEVICE_TYPE_ALL; //#else - //cl_device_type deviceType = CL_DEVICE_TYPE_CPU; + cl_device_type deviceType = CL_DEVICE_TYPE_CPU; //#endif cl_platform_id platformId; diff --git a/demo/gpudemo/GpuDemo.h b/demo/gpudemo/GpuDemo.h index b4dd85d8f..0815b79ac 100644 --- a/demo/gpudemo/GpuDemo.h +++ b/demo/gpudemo/GpuDemo.h @@ -34,9 +34,9 @@ public: :useOpenCL(false),//true), preferredOpenCLPlatformIndex(-1), preferredOpenCLDeviceIndex(-1), - arraySizeX(10), - arraySizeY(10 ), - arraySizeZ(10), + arraySizeX(2), + arraySizeY(2 ), + arraySizeZ(2), m_useConcaveMesh(false), gapX(4.3), gapY(4.0), diff --git a/demo/gpudemo/broadphase/PairBench.cpp b/demo/gpudemo/broadphase/PairBench.cpp index 84f480eb9..50b4a9fb3 100644 --- a/demo/gpudemo/broadphase/PairBench.cpp +++ b/demo/gpudemo/broadphase/PairBench.cpp @@ -11,12 +11,12 @@ #include "OpenGLWindow/GLInstanceRendererInternalData.h" #include "parallel_primitives/host/btLauncherCL.h" -btKeyboardCallback oldCallback = 0; +static btKeyboardCallback oldCallback = 0; extern bool gReset; #define MSTRINGIFY(A) #A -const char* s_pairBenchKernelString = MSTRINGIFY( +static const char* s_pairBenchKernelString = MSTRINGIFY( __kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects) { int iGID = get_global_id(0); @@ -134,7 +134,7 @@ PairBench::~PairBench() -void PairKeyboardCallback(int key, int state) +static void PairKeyboardCallback(int key, int state) { if (key=='R' && state) { diff --git a/demo/gpudemo/main_opengl3core.cpp b/demo/gpudemo/main_opengl3core.cpp index b3e58543c..654165b42 100644 --- a/demo/gpudemo/main_opengl3core.cpp +++ b/demo/gpudemo/main_opengl3core.cpp @@ -25,6 +25,7 @@ #include "gwenUserInterface.h" #include "ParticleDemo.h" #include "broadphase/PairBench.h" +#include "rigidbody/GpuRigidBodyDemo.h" //#include "BroadphaseBenchmark.h" @@ -63,8 +64,11 @@ GpuDemo::CreateFunc* allDemos[]= //BroadphaseBenchmark::CreateFunc, //GpuBoxDemo::CreateFunc, PairBench::MyCreateFunc, + GpuRigidBodyDemo::MyCreateFunc, + ParticleDemo::MyCreateFunc, + //SpheresDemo::CreateFunc, //GpuCompoundDemo::CreateFunc, //EmptyDemo::CreateFunc, @@ -664,6 +668,7 @@ int main(int argc, char* argv[]) demo->exitPhysics(); + CProfileManager::CleanupMemory(); delete demo; if (f) fclose(f); diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp new file mode 100644 index 000000000..eb578f097 --- /dev/null +++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp @@ -0,0 +1,262 @@ +#include "GpuRigidBodyDemo.h" +#include "BulletCommon/btQuickprof.h" +#include "OpenGLWindow/ShapeData.h" +#include "OpenGLWindow/GLInstancingRenderer.h" +#include "BulletCommon/btQuaternion.h" +#include "OpenGLWindow/btgWindowInterface.h" +#include "gpu_broadphase/host/btGpuSapBroadphase.h" +#include "../GpuDemoInternalData.h" +#include "basic_initialize/btOpenCLUtils.h" +#include "OpenGLWindow/OpenGLInclude.h" +#include "OpenGLWindow/GLInstanceRendererInternalData.h" +#include "parallel_primitives/host/btLauncherCL.h" +#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h" +#include "gpu_rigidbody/host/btGpuNarrowPhase.h" +#include "gpu_rigidbody/host/btConfig.h" + +static btKeyboardCallback oldCallback = 0; +extern bool gReset; + +#define MSTRINGIFY(A) #A + +static const char* s_rigidBodyKernelString = MSTRINGIFY( + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + unsigned int m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} Body; + +__kernel void + copyTransformsToVBOKernel( __global Body* gBodies, __global float4* posOrnColor, const int numNodes) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + posOrnColor[nodeID] = (float4) (gBodies[nodeID].m_pos.xyz,1.0); + posOrnColor[nodeID + numNodes] = gBodies[nodeID].m_quat; + } +} +); + + +struct GpuRigidBodyDemoInternalData +{ + + cl_kernel m_copyTransformsToVBOKernel; + + btOpenCLArray* m_instancePosOrnColor; + + class btGpuRigidBodyPipeline* m_rigidBodyPipeline; + + btGpuNarrowPhase* m_np; + btGpuSapBroadphase* m_bp; + + GpuRigidBodyDemoInternalData() + :m_instancePosOrnColor(0), + m_copyTransformsToVBOKernel(0), m_rigidBodyPipeline(0), + m_np(0), + m_bp(0) + { + } +}; + + +GpuRigidBodyDemo::GpuRigidBodyDemo() +:m_instancingRenderer(0), +m_window(0) +{ + m_data = new GpuRigidBodyDemoInternalData; +} +GpuRigidBodyDemo::~GpuRigidBodyDemo() +{ + + delete m_data; +} + + + + + + + +static void PairKeyboardCallback(int key, int state) +{ + if (key=='R' && state) + { + gReset = true; + } + + //btDefaultKeyboardCallback(key,state); + oldCallback(key,state); +} + + + +void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci) +{ + initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex); + if (m_clData->m_clContext) + { + int errNum=0; + + cl_program rbProg=0; + m_data->m_copyTransformsToVBOKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_rigidBodyKernelString,"copyTransformsToVBOKernel",&errNum,rbProg); + + btConfig config; + btGpuNarrowPhase* np = new btGpuNarrowPhase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue,config); + btGpuSapBroadphase* bp = new btGpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue); + m_data->m_np = np; + m_data->m_bp = bp; + + m_data->m_rigidBodyPipeline = new btGpuRigidBodyPipeline(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue, np, bp); + + int strideInBytes = 9*sizeof(float); + int numVertices = sizeof(cube_vertices)/strideInBytes; + int numIndices = sizeof(cube_vertices)/sizeof(int); + //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + int group=1; + int mask=1; + int index=10; + float scaling[4] = {1,1,1,1}; + + int colIndex = np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + + float mass = 1.f; + for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); + int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index); + + index++; + } + } + } + np->writeAllBodiesToGpu(); + } + + + + + + if (ci.m_window) + { + m_window = ci.m_window; + oldCallback = ci.m_window->getKeyboardCallback(); + ci.m_window->setKeyboardCallback(PairKeyboardCallback); + + } + + m_instancingRenderer = ci.m_instancingRenderer; + + + float camPos[4]={15.5,12.5,15.5,0}; + m_instancingRenderer->setCameraTargetPosition(camPos); + m_instancingRenderer->setCameraDistance(60); + + m_instancingRenderer->writeTransforms(); + + + +} + +void GpuRigidBodyDemo::exitPhysics() +{ + delete m_data->m_instancePosOrnColor; + delete m_data->m_rigidBodyPipeline; + + m_window->setKeyboardCallback(oldCallback); + + delete m_data->m_np; + m_data->m_np = 0; + delete m_data->m_bp; + m_data->m_bp = 0; + + exitCL(); +} + + +void GpuRigidBodyDemo::renderScene() +{ + m_instancingRenderer->RenderScene(); +} + +void GpuRigidBodyDemo::clientMoveAndDisplay() +{ + bool animate=true; + int numObjects= m_instancingRenderer->getInternalData()->m_totalNumInstances; + btVector4* positions = 0; + if (animate) + { + GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; + int arraySizeInBytes = numObjects * (3)*sizeof(btVector4); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + cl_bool blocking= CL_TRUE; + positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY + GLint err = glGetError(); + assert(err==GL_NO_ERROR); + if (!m_data->m_instancePosOrnColor) + { + m_data->m_instancePosOrnColor = new btOpenCLArray(m_clData->m_clContext,m_clData->m_clQueue); + m_data->m_instancePosOrnColor->resize(3*numObjects); + m_data->m_instancePosOrnColor->copyFromHostPointer(positions,3*numObjects,0); + } + } + + m_data->m_rigidBodyPipeline->stepSimulation(1./60.f); + + { + int ciErrNum = 0; + + + ciErrNum = 0;//clSetKernelArg(fpio.m_copyTransformsToVBOKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer); + + cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); + btLauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); + launch.setBuffer(bodies); + launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); + launch.setConst(numObjects); + + launch.launch1D(numObjects); + //ciErrNum = clSetKernelArg(fpio.m_copyTransformsToVBOKernel, 3, sizeof(cl_mem), (void*)&bodies); + //ciErrNum = clSetKernelArg(fpio.m_copyTransformsToVBOKernel, 1, sizeof(int), &fpio.m_numObjects); + + if (numObjects) + { + size_t workGroupSize = 64; + size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); + + //ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_copyTransformsToVBOKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + + } + + if (animate) + { + GLint err = glGetError(); + assert(err==GL_NO_ERROR); + m_data->m_instancePosOrnColor->copyToHostPointer(positions,3*numObjects,0); + glUnmapBuffer( GL_ARRAY_BUFFER); + + err = glGetError(); + assert(err==GL_NO_ERROR); + } + +} diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.h b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.h new file mode 100644 index 000000000..a5c5f4569 --- /dev/null +++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.h @@ -0,0 +1,43 @@ +#ifndef GPU_RIGID_BODY_DEMO_H +#define GPU_RIGID_BODY_DEMO_H + +#include "../GpuDemo.h" + +class GpuRigidBodyDemo : public GpuDemo +{ + + class GLInstancingRenderer* m_instancingRenderer; + class btgWindowInterface* m_window; + + struct GpuRigidBodyDemoInternalData* m_data; + +public: + + GpuRigidBodyDemo(); + virtual ~GpuRigidBodyDemo(); + + virtual void initPhysics(const ConstructionInfo& ci); + + virtual void exitPhysics(); + + virtual const char* getName() + { + return "GRBD"; + } + static GpuDemo* MyCreateFunc() + { + GpuDemo* demo = new GpuRigidBodyDemo; + return demo; + } + + + + virtual void renderScene(); + + virtual void clientMoveAndDisplay(); + + +}; + +#endif //GPU_RIGID_BODY_DEMO_H + diff --git a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h b/opencl/gpu_broadphase/host/btGpuSapBroadphase.h index e7483726d..c119b8ce7 100644 --- a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h +++ b/opencl/gpu_broadphase/host/btGpuSapBroadphase.h @@ -6,20 +6,7 @@ class btVector3; #include "parallel_primitives/host/btRadixSort32CL.h" -struct btSapAabb -{ - union - { - float m_min[4]; - int m_minIndices[4]; - }; - union - { - float m_max[4]; - int m_signedMaxIndices[4]; - //unsigned int m_unsignedMaxIndices[4]; - }; -}; +#include "btSapAabb.h" diff --git a/opencl/gpu_broadphase/host/btSapAabb.h b/opencl/gpu_broadphase/host/btSapAabb.h new file mode 100644 index 000000000..3354ffe9e --- /dev/null +++ b/opencl/gpu_broadphase/host/btSapAabb.h @@ -0,0 +1,18 @@ +#ifndef BT_SAP_AABB_H +#define BT_SAP_AABB_H + +struct btSapAabb +{ + union + { + float m_min[4]; + int m_minIndices[4]; + }; + union + { + float m_max[4]; + int m_signedMaxIndices[4]; + }; +}; + +#endif //BT_SAP_AABB_H diff --git a/opencl/gpu_rigidbody/host/btConfig.h b/opencl/gpu_rigidbody/host/btConfig.h new file mode 100644 index 000000000..71b1f1b6c --- /dev/null +++ b/opencl/gpu_rigidbody/host/btConfig.h @@ -0,0 +1,34 @@ +#ifndef BT_CONFIG_H +#define BT_CONFIG_H + +struct btConfig +{ + int m_maxConvexBodies; + int m_maxConvexShapes; + int m_maxBroadphasePairs; + + int m_maxVerticesPerFace; + int m_maxFacesPerShape; + int m_maxConvexVertices; + int m_maxConvexIndices; + int m_maxConvexUniqueEdges; + + int m_maxCompoundChildShapes; + + btConfig() + :m_maxConvexBodies(128*1024), + m_maxConvexShapes(8192), + m_maxVerticesPerFace(64), + m_maxFacesPerShape(64), + m_maxConvexVertices(8192), + m_maxConvexIndices(8192), + m_maxConvexUniqueEdges(8192), + m_maxCompoundChildShapes(8192)//?? + { + m_maxBroadphasePairs = 16*m_maxConvexBodies; + } +}; + + +#endif//BT_CONFIG_H + diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp new file mode 100644 index 000000000..43068e086 --- /dev/null +++ b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp @@ -0,0 +1,504 @@ +#include "btGpuNarrowPhase.h" + + +#include "parallel_primitives/host/btOpenCLArray.h" +#include "../../gpu_sat/host/btConvexPolyhedronCL.h" +#include "../../gpu_sat/host/ConvexHullContact.h" +#include "../../gpu_broadphase/host/btSapAabb.h" +#include +#include "btConfig.h" + +struct btGpuNarrowPhaseInternalData +{ + btAlignedObjectArray* m_convexData; + + btAlignedObjectArray m_convexPolyhedra; + btAlignedObjectArray m_uniqueEdges; + btAlignedObjectArray m_convexVertices; + btAlignedObjectArray m_convexIndices; + + btOpenCLArray* m_convexPolyhedraGPU; + btOpenCLArray* m_uniqueEdgesGPU; + btOpenCLArray* m_convexVerticesGPU; + btOpenCLArray* m_convexIndicesGPU; + + btOpenCLArray* m_worldVertsB1GPU; + btOpenCLArray* m_clippingFacesOutGPU; + btOpenCLArray* m_worldNormalsAGPU; + btOpenCLArray* m_worldVertsA1GPU; + btOpenCLArray* m_worldVertsB2GPU; + + btAlignedObjectArray m_cpuChildShapes; + btOpenCLArray* m_gpuChildShapes; + + btAlignedObjectArray m_convexFaces; + btOpenCLArray* m_convexFacesGPU; + + GpuSatCollision* m_gpuSatCollision; + + btAlignedObjectArray* m_pBufPairsCPU; + + btOpenCLArray* m_convexPairsOutGPU; + btOpenCLArray* m_planePairs; + + btOpenCLArray* m_pBufContactOutGPU; + btAlignedObjectArray* m_pBufContactOutCPU; + + + btAlignedObjectArray* m_bodyBufferCPU; + btOpenCLArray* m_bodyBufferGPU; + + btAlignedObjectArray* m_inertiaBufferCPU; + btOpenCLArray* m_inertiaBufferGPU; + + int m_numAcceleratedShapes; + int m_numAcceleratedRigidBodies; + + btAlignedObjectArray m_collidablesCPU; + btOpenCLArray* m_collidablesGPU; + + btOpenCLArray* m_localShapeAABBGPU; + btAlignedObjectArray* m_localShapeAABBCPU; + + btConfig m_config; + +}; + + + + + +btGpuNarrowPhase::btGpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const btConfig& config) +:m_data(0) ,m_planeBodyIndex(-1),m_static0Index(-1), +m_context(ctx), +m_device(device), +m_queue(queue) +{ + + m_data = new btGpuNarrowPhaseInternalData(); + memset(m_data,0,sizeof(btGpuNarrowPhaseInternalData)); + + m_data->m_config = config; + + m_data->m_gpuSatCollision = new GpuSatCollision(ctx,device,queue); + m_data->m_pBufPairsCPU = new btAlignedObjectArray; + m_data->m_pBufPairsCPU->resize(config.m_maxBroadphasePairs); + + m_data->m_convexPairsOutGPU = new btOpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); + m_data->m_planePairs = new btOpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); + + m_data->m_pBufContactOutCPU = new btAlignedObjectArray(); + m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs); + m_data->m_bodyBufferCPU = new btAlignedObjectArray(); + m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies); + + m_data->m_inertiaBufferCPU = new btAlignedObjectArray(); + m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies); + + m_data->m_pBufContactOutGPU = new btOpenCLArray(ctx,queue, config.m_maxBroadphasePairs,true); + btContact4 test = m_data->m_pBufContactOutGPU->forcedAt(0); + + m_data->m_inertiaBufferGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies,false); + m_data->m_collidablesGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexShapes); + + m_data->m_localShapeAABBCPU = new btAlignedObjectArray; + m_data->m_localShapeAABBGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexShapes); + + + //m_data->m_solverDataGPU = adl::Solver::allocate(ctx,queue, config.m_maxBroadphasePairs,false); + m_data->m_bodyBufferGPU = new btOpenCLArray(ctx,queue, config.m_maxConvexBodies,false); + + m_data->m_convexFacesGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false); + m_data->m_gpuChildShapes = new btOpenCLArray(ctx,queue,config.m_maxCompoundChildShapes,false); + + m_data->m_convexPolyhedraGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexShapes,false); + m_data->m_uniqueEdgesGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexUniqueEdges,true); + m_data->m_convexVerticesGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexVertices,true); + m_data->m_convexIndicesGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexIndices,true); + + + m_data->m_worldVertsB1GPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); + m_data->m_clippingFacesOutGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies); + m_data->m_worldNormalsAGPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies); + m_data->m_worldVertsA1GPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); + m_data->m_worldVertsB2GPU = new btOpenCLArray(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); + + + + m_data->m_convexData = new btAlignedObjectArray(); + + + m_data->m_convexData->resize(config.m_maxConvexShapes); + m_data->m_convexPolyhedra.resize(config.m_maxConvexShapes); + + m_data->m_numAcceleratedShapes = 0; + m_data->m_numAcceleratedRigidBodies = 0; + + //m_data->m_contactCGPU = new btOpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); + //m_data->m_frictionCGPU = new btOpenCLArray::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs); + +} + + +btGpuNarrowPhase::~btGpuNarrowPhase() +{ + delete m_data->m_gpuSatCollision; + delete m_data->m_pBufPairsCPU; + delete m_data->m_convexPairsOutGPU; + delete m_data->m_planePairs; + delete m_data->m_pBufContactOutCPU; + delete m_data->m_bodyBufferCPU; + delete m_data->m_inertiaBufferCPU; + delete m_data->m_pBufContactOutGPU; + delete m_data->m_inertiaBufferGPU; + delete m_data->m_collidablesGPU; + delete m_data->m_localShapeAABBCPU; + delete m_data->m_localShapeAABBGPU; + delete m_data->m_bodyBufferGPU; + delete m_data->m_convexFacesGPU; + delete m_data->m_gpuChildShapes; + delete m_data->m_convexPolyhedraGPU; + delete m_data->m_uniqueEdgesGPU; + delete m_data->m_convexVerticesGPU; + delete m_data->m_convexIndicesGPU; + delete m_data->m_worldVertsB1GPU; + delete m_data->m_clippingFacesOutGPU; + delete m_data->m_worldNormalsAGPU; + delete m_data->m_worldVertsA1GPU; + delete m_data->m_worldVertsB2GPU; + + + delete m_data->m_convexData; + delete m_data; +} + + +int btGpuNarrowPhase::allocateCollidable() +{ + int curSize = m_data->m_collidablesCPU.size(); + m_data->m_collidablesCPU.expand(); + return curSize; +} + + + +int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* convexPtr,btCollidable& col) +{ + m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1); + m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1); + + + btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1); + convex.mC = convexPtr->mC; + convex.mE = convexPtr->mE; + convex.m_extents= convexPtr->m_extents; + convex.m_localCenter = convexPtr->m_localCenter; + convex.m_radius = convexPtr->m_radius; + + convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size(); + int edgeOffset = m_data->m_uniqueEdges.size(); + convex.m_uniqueEdgesOffset = edgeOffset; + + m_data->m_uniqueEdges.resize(edgeOffset+convex.m_numUniqueEdges); + + //convex data here + int i; + for ( i=0;im_uniqueEdges.size();i++) + { + m_data->m_uniqueEdges[edgeOffset+i] = convexPtr->m_uniqueEdges[i]; + } + + int faceOffset = m_data->m_convexFaces.size(); + convex.m_faceOffset = faceOffset; + convex.m_numFaces = convexPtr->m_faces.size(); + m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces); + for (i=0;im_faces.size();i++) + { + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[0] = convexPtr->m_faces[i].m_plane[0]; + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[1] = convexPtr->m_faces[i].m_plane[1]; + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[2] = convexPtr->m_faces[i].m_plane[2]; + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[3] = convexPtr->m_faces[i].m_plane[3]; + int indexOffset = m_data->m_convexIndices.size(); + int numIndices = convexPtr->m_faces[i].m_indices.size(); + m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; + m_data->m_convexFaces[convex.m_faceOffset+i].m_indexOffset = indexOffset; + m_data->m_convexIndices.resize(indexOffset+numIndices); + for (int p=0;pm_convexIndices[indexOffset+p] = convexPtr->m_faces[i].m_indices[p]; + } + } + + convex.m_numVertices = convexPtr->m_vertices.size(); + int vertexOffset = m_data->m_convexVertices.size(); + convex.m_vertexOffset =vertexOffset; + m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices); + for (int i=0;im_vertices.size();i++) + { + m_data->m_convexVertices[vertexOffset+i] = convexPtr->m_vertices[i]; + } + + (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr; + + m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces); + + m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra); + m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges); + m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices); + m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices); + + + return m_data->m_numAcceleratedShapes++; +} + + +int btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) +{ + btAlignedObjectArray verts; + + unsigned char* vts = (unsigned char*) vertices; + for (int i=0;iinitializePolyhedralFeatures(&verts[0],verts.size(),merge); + } + + int collidableIndex = registerConvexHullShape(utilPtr); + return collidableIndex; +} + +int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr) +{ + int collidableIndex = allocateCollidable(); + btCollidable& col = getCollidableCpu(collidableIndex); + col.m_shapeType = SHAPE_CONVEX_HULL; + col.m_shapeIndex = -1; + + + { + btVector3 localCenter(0,0,0); + for (int i=0;im_vertices.size();i++) + localCenter+=utilPtr->m_vertices[i]; + localCenter*= (1.f/utilPtr->m_vertices.size()); + utilPtr->m_localCenter = localCenter; + + col.m_shapeIndex = registerConvexHullShape(utilPtr,col); + } + + if (col.m_shapeIndex>=0) + { + btSapAabb aabb; + + btVector3 myAabbMin(1e30f,1e30f,1e30f); + btVector3 myAabbMax(-1e30f,-1e30f,-1e30f); + + for (int i=0;im_vertices.size();i++) + { + myAabbMin.setMin(utilPtr->m_vertices[i]); + myAabbMax.setMax(utilPtr->m_vertices[i]); + } + aabb.m_min[0] = myAabbMin[0]; + aabb.m_min[1] = myAabbMin[1]; + aabb.m_min[2] = myAabbMin[2]; + aabb.m_minIndices[3] = 0; + + aabb.m_max[0] = myAabbMax[0]; + aabb.m_max[1] = myAabbMax[1]; + aabb.m_max[2] = myAabbMax[2]; + aabb.m_signedMaxIndices[3] = 0; + + m_data->m_localShapeAABBCPU->push_back(aabb); + m_data->m_localShapeAABBGPU->push_back(aabb); + } + + return collidableIndex; + +} + + + + + + + +cl_mem btGpuNarrowPhase::getBodiesGpu() +{ + return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL(); +} + + +int btGpuNarrowPhase::getNumBodiesGpu() const +{ + return m_data->m_bodyBufferGPU->size(); +} + +cl_mem btGpuNarrowPhase::getBodyInertiasGpu() +{ + return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL(); +} + +int btGpuNarrowPhase::getNumBodyInertiasGpu() const +{ + return m_data->m_inertiaBufferGPU->size(); +} + + +btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex) +{ + return m_data->m_collidablesCPU[collidableIndex]; +} + +const btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex) const +{ + return m_data->m_collidablesCPU[collidableIndex]; +} + +cl_mem btGpuNarrowPhase::getCollidablesGpu() +{ + return m_data->m_collidablesGPU->getBufferCL(); +} + +int btGpuNarrowPhase::getNumCollidablesGpu() const +{ + return m_data->m_collidablesGPU->size(); +} + + + + + +int btGpuNarrowPhase::getNumContactsGpu() const +{ + return m_data->m_pBufContactOutGPU->size(); +} +cl_mem btGpuNarrowPhase::getContactsGpu() +{ + return m_data->m_pBufContactOutGPU->getBufferCL(); +} + + +void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbs, int numObjects) +{ + +} + +const btSapAabb& btGpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const +{ + return m_data->m_localShapeAABBCPU->at(collidableIndex); +} + + + + + +int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu) +{ + btVector3 aabbMin(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]); + btVector3 aabbMax (aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]); + + btAssert(m_data->m_numAcceleratedRigidBodies< (m_data->m_config.m_maxConvexBodies-1)); + + m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1); + + btRigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); + + float friction = 1.f; + float restitution = 0.f; + + body.m_frictionCoeff = friction; + body.m_restituitionCoeff = restitution; + body.m_angVel.setZero(); + body.m_linVel.setValue(0,-1,0);//.setZero(); + body.m_pos.setValue(position[0],position[1],position[2]); + body.m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]); + body.m_collidableIdx = collidableIndex; + if (collidableIndex>=0) + { +// body.m_shapeType = m_data->m_collidablesCPU.at(collidableIndex).m_shapeType; + } else + { + // body.m_shapeType = CollisionShape::SHAPE_PLANE; + m_planeBodyIndex = m_data->m_numAcceleratedRigidBodies; + } + //body.m_shapeType = shapeType; + + + body.m_invMass = mass? 1.f/mass : 0.f; + + if (writeToGpu) + { + m_data->m_bodyBufferGPU->copyFromHostPointer(&body,1,m_data->m_numAcceleratedRigidBodies); + } + + btInertiaCL& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies); + + if (mass==0.f) + { + if (m_data->m_numAcceleratedRigidBodies==0) + m_static0Index = 0; + + shapeInfo.m_initInvInertia.setValue(0,0,0,0,0,0,0,0,0); + shapeInfo.m_invInertiaWorld.setValue(0,0,0,0,0,0,0,0,0); + } else + { + + assert(body.m_collidableIdx>=0); + + //approximate using the aabb of the shape + + //Aabb aabb = (*m_data->m_shapePointers)[shapeIndex]->m_aabb; + btVector3 halfExtents = (aabbMax-aabbMin);//*0.5f;//fake larger inertia makes demos more stable ;-) + + btVector3 localInertia; + + float lx=2.f*halfExtents[0]; + float ly=2.f*halfExtents[1]; + float lz=2.f*halfExtents[2]; + + localInertia.setValue( (mass/12.0f) * (ly*ly + lz*lz), + (mass/12.0f) * (lx*lx + lz*lz), + (mass/12.0f) * (lx*lx + ly*ly)); + + btVector3 invLocalInertia; + invLocalInertia[0] = 1.f/localInertia[0]; + invLocalInertia[1] = 1.f/localInertia[1]; + invLocalInertia[2] = 1.f/localInertia[2]; + invLocalInertia[3] = 0.f; + + shapeInfo.m_initInvInertia.setValue( + invLocalInertia[0], 0, 0, + 0, invLocalInertia[1], 0, + 0, 0, invLocalInertia[2]); + + btMatrix3x3 m (body.m_quat); + + shapeInfo.m_invInertiaWorld = m.scaled(invLocalInertia) * m.transpose(); + + } + + if (writeToGpu) + m_data->m_inertiaBufferGPU->copyFromHostPointer(&shapeInfo,1,m_data->m_numAcceleratedRigidBodies); + + + + return m_data->m_numAcceleratedRigidBodies++; +} + +void btGpuNarrowPhase::writeAllBodiesToGpu() +{ + m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); + m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); + + m_data->m_bodyBufferGPU->copyFromHostPointer(&m_data->m_bodyBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies); + m_data->m_inertiaBufferGPU->copyFromHostPointer(&m_data->m_inertiaBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies); + + m_data->m_collidablesGPU->copyFromHost(m_data->m_collidablesCPU); + +} diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h new file mode 100644 index 000000000..face8b290 --- /dev/null +++ b/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h @@ -0,0 +1,76 @@ +#ifndef BT_GPU_NARROWPHASE_H +#define BT_GPU_NARROWPHASE_H + +#include "../../gpu_sat/host/btCollidable.h" +#include "basic_initialize/btOpenCLInclude.h" +#include "BulletCommon/btAlignedObjectArray.h" +#include "BulletCommon/btVector3.h" + +class btGpuNarrowPhase +{ +protected: + + struct btGpuNarrowPhaseInternalData* m_data; + int m_acceleratedCompanionShapeIndex; + int m_planeBodyIndex; + int m_static0Index; + + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + + +public: + + + + + btGpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct btConfig& config); + + virtual ~btGpuNarrowPhase(void); + + + int registerCompoundShape(btAlignedObjectArray* childShapes); + int registerFace(const btVector3& faceNormal, float faceConstant); + int registerConcaveMeshShape(btAlignedObjectArray* vertices, btAlignedObjectArray* indices, btCollidable& col, const float* scaling); + int registerConcaveMeshShape(class objLoader* obj, btCollidable& col, const float* scaling); + + //do they need to be merged? + int registerConvexHullShape(class btConvexUtility* convexPtr, btCollidable& col); + int registerConvexHullShape(btConvexUtility* utilPtr); + int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); + + int registerConvexHeightfield(class ConvexHeightField* convexShape,btCollidable& col); + int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu); + void setObjectTransform(const float* position, const float* orientation , int bodyIndex); + + void writeAllBodiesToGpu(); + + void readbackAllBodiesToCpu(); + void getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const; + + virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbs, int numObjects); + + + cl_mem getBodiesGpu(); + int getNumBodiesGpu() const; + + cl_mem getBodyInertiasGpu(); + int getNumBodyInertiasGpu() const; + + cl_mem getCollidablesGpu(); + int getNumCollidablesGpu() const; + + cl_mem getContactsGpu(); + int getNumContactsGpu() const; + + int allocateCollidable(); + + btCollidable& getCollidableCpu(int collidableIndex); + const btCollidable& getCollidableCpu(int collidableIndex) const; + + const struct btSapAabb& getLocalSpaceAabb(int collidableIndex) const; +}; + +#endif //BT_GPU_NARROWPHASE_H + diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp new file mode 100644 index 000000000..3e43b6236 --- /dev/null +++ b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp @@ -0,0 +1,171 @@ +#include "btGpuRigidBodyPipeline.h" +#include "btGpuRigidBodyPipelineInternalData.h" +#include "../kernels/integrateKernel.h" +#include "../../basic_initialize/btOpenCLUtils.h" +#include "btGpuNarrowPhase.h" +#include "BulletGeometry/btAabbUtil2.h" +#include "../../gpu_broadphase/host/btSapAabb.h" +#include "../../gpu_broadphase/host/btGpuSapBroadphase.h" +#include "parallel_primitives/host/btLauncherCL.h" + +btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap ) +{ + m_data = new btGpuRigidBodyPipelineInternalData; + m_data->m_context = ctx; + m_data->m_device = device; + m_data->m_queue = q; + + m_data->m_broadphaseSap = broadphaseSap; + m_data->m_narrowphase = narrowphase; + + cl_int errNum=0; + + { + cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl"); + btAssert(errNum==CL_SUCCESS); + m_data->m_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog); + btAssert(errNum==CL_SUCCESS); + clReleaseProgram(prog); + } + + +} + +btGpuRigidBodyPipeline::~btGpuRigidBodyPipeline() +{ + clReleaseKernel(m_data->m_integrateTransformsKernel); + + delete m_data; +} + +void btGpuRigidBodyPipeline::stepSimulation(float deltaTime) +{ + btLauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel); + //integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping) + + launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); + int numBodies = m_data->m_narrowphase->getNumBodiesGpu(); + launcher.setConst(numBodies); + float timeStep = 1./60.f; + launcher.setConst(timeStep); + float angularDamp = 0.99f; + launcher.setConst(angularDamp); + launcher.launch1D(numBodies); + +} + + +cl_mem btGpuRigidBodyPipeline::getBodyBuffer() +{ + return m_data->m_narrowphase->getBodiesGpu(); +} + +int btGpuRigidBodyPipeline::getNumBodies() const +{ + return m_data->m_narrowphase->getNumBodiesGpu(); +} + + + +int btGpuRigidBodyPipeline::registerConvexPolyhedron(btConvexUtility* utilPtr) +{ +/* + int collidableIndex = m_narrowphaseAndSolver->allocateCollidable(); + + btCollidable& col = m_narrowphaseAndSolver->getCollidableCpu(collidableIndex); + col.m_shapeType = CollisionShape::SHAPE_CONVEX_HULL; + col.m_shapeIndex = -1; + + + if (m_narrowphaseAndSolver) + { + btVector3 localCenter(0,0,0); + for (int i=0;im_vertices.size();i++) + localCenter+=utilPtr->m_vertices[i]; + localCenter*= (1.f/utilPtr->m_vertices.size()); + utilPtr->m_localCenter = localCenter; + + col.m_shapeIndex = m_narrowphaseAndSolver->registerConvexHullShape(utilPtr,col); + } + + if (col.m_shapeIndex>=0) + { + btAABBHost aabbMin, aabbMax; + btVector3 myAabbMin(1e30f,1e30f,1e30f); + btVector3 myAabbMax(-1e30f,-1e30f,-1e30f); + + for (int i=0;im_vertices.size();i++) + { + myAabbMin.setMin(utilPtr->m_vertices[i]); + myAabbMax.setMax(utilPtr->m_vertices[i]); + } + aabbMin.fx = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x; + aabbMin.fy = myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y; + aabbMin.fz= myAabbMin[2];//s_convexHeightField->m_aabb.m_min.z; + aabbMin.uw = 0; + + aabbMax.fx = myAabbMax[0];//s_convexHeightField->m_aabb.m_max.x; + aabbMax.fy = myAabbMax[1];//s_convexHeightField->m_aabb.m_max.y; + aabbMax.fz= myAabbMax[2];//s_convexHeightField->m_aabb.m_max.z; + aabbMax.uw = 0; + + m_data->m_localShapeAABBCPU->push_back(aabbMin); + m_data->m_localShapeAABBGPU->push_back(aabbMin); + + m_data->m_localShapeAABBCPU->push_back(aabbMax); + m_data->m_localShapeAABBGPU->push_back(aabbMax); + + //m_data->m_localShapeAABB->copyFromHostPointer(&aabbMin,1,shapeIndex*2); + //m_data->m_localShapeAABB->copyFromHostPointer(&aabbMax,1,shapeIndex*2+1); + clFinish(g_cqCommandQue); + } + + delete[] eqn; + + + + return collidableIndex; + */ + return 0; +} + + +int btGpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex) +{ + btVector3 aabbMin(0,0,0),aabbMax(0,0,0); + if (collidableIndex>=0) + { + btSapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex); + btVector3 localAabbMin(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]); + btVector3 localAabbMax(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]); + + btScalar margin = 0.01f; + btTransform t; + t.setIdentity(); + t.setOrigin(btVector3(position[0],position[1],position[2])); + t.setRotation(btQuaternion(orientation[0],orientation[1],orientation[2],orientation[3])); + btTransformAabb(localAabbMin,localAabbMax, margin,t,aabbMin,aabbMax); + if (mass) + { + m_data->m_broadphaseSap->createProxy(aabbMin,aabbMax,userIndex,1,1);//m_dispatcher); + } else + { + m_data->m_broadphaseSap->createLargeProxy(aabbMin,aabbMax,userIndex,1,1);//m_dispatcher); + } + } + + bool writeToGpu = false; + int bodyIndex = -1; + + + bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex,mass,position,orientation,&aabbMin.getX(),&aabbMax.getX(),writeToGpu); + + /* + if (mass>0.f) + m_numDynamicPhysicsInstances++; + + m_numPhysicsInstances++; + */ + + return bodyIndex; +} \ No newline at end of file diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h new file mode 100644 index 000000000..1fae98a50 --- /dev/null +++ b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h @@ -0,0 +1,38 @@ +#ifndef BT_GPU_RIGIDBODY_PIPELINE_H +#define BT_GPU_RIGIDBODY_PIPELINE_H + +#include "../../basic_initialize/btOpenCLInclude.h" + +class btGpuRigidBodyPipeline +{ +protected: + struct btGpuRigidBodyPipelineInternalData* m_data; + + int allocateCollidable(); + +public: + + btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap); + virtual ~btGpuRigidBodyPipeline(); + + void stepSimulation(float deltaTime); + + int registerConvexPolyhedron(class btConvexUtility* convex); + + //int registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling); + //int registerSphereShape(float radius); + //int registerPlaneShape(const btVector3& planeNormal, float planeConstant); + + //int registerConcaveMesh(btAlignedObjectArray* vertices, btAlignedObjectArray* indices, const float* scaling); + //int registerCompoundShape(btAlignedObjectArray* childShapes); + + + int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData); + + cl_mem getBodyBuffer(); + + int getNumBodies() const; + +}; + +#endif //BT_GPU_RIGIDBODY_PIPELINE_H \ No newline at end of file diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h new file mode 100644 index 000000000..93925cb1a --- /dev/null +++ b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h @@ -0,0 +1,26 @@ +#ifndef BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H +#define BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H + +#include "../../basic_initialize/btOpenCLInclude.h" +#include "BulletCommon/btAlignedObjectArray.h" + +#include "../../parallel_primitives/host/btOpenCLArray.h" +#include "../../gpu_sat/host/btCollidable.h" + +struct btGpuRigidBodyPipelineInternalData +{ + + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + + cl_kernel m_integrateTransformsKernel; + + class btGpuSapBroadphase* m_broadphaseSap; + + class btGpuNarrowPhase* m_narrowphase; + +}; + +#endif //BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H + diff --git a/opencl/gpu_rigidbody/kernels/integrateKernel.cl b/opencl/gpu_rigidbody/kernels/integrateKernel.cl new file mode 100644 index 000000000..5b5bff268 --- /dev/null +++ b/opencl/gpu_rigidbody/kernels/integrateKernel.cl @@ -0,0 +1,89 @@ + +float4 quatMult(float4 q1, float4 q2) +{ + float4 q; + q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y; + q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z; + q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x; + q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; + return q; +} + +float4 quatNorm(float4 q) +{ + float len = native_sqrt(dot(q, q)); + if(len > 0.f) + { + q *= 1.f / len; + } + else + { + q.x = q.y = q.z = 0.f; + q.w = 1.f; + } + return q; +} + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + unsigned int m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} Body; + + + + +__kernel void + integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping) +{ + int nodeID = get_global_id(0); + float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); + if( nodeID < numNodes ) + { + if (1) + { + float4 axis; + //add some hardcoded angular damping + bodies[nodeID].m_angVel.x *= angularDamping; + bodies[nodeID].m_angVel.y *= angularDamping; + bodies[nodeID].m_angVel.z *= angularDamping; + + float4 angvel = bodies[nodeID].m_angVel; + float fAngle = native_sqrt(dot(angvel, angvel)); + //limit the angular motion + if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) + { + fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; + } + if(fAngle < 0.001f) + { + // use Taylor's expansions of sync function + axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle); + } + else + { + // sync(fAngle) = sin(c*fAngle)/t + axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle); + } + float4 dorn = axis; + dorn.w = native_cos(fAngle * timeStep * 0.5f); + float4 orn0 = bodies[nodeID].m_quat; + + float4 predictedOrn = quatMult(dorn, orn0); + predictedOrn = quatNorm(predictedOrn); + bodies[nodeID].m_quat=predictedOrn; + } + + //linear velocity + bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep; + + } +} diff --git a/opencl/gpu_rigidbody/kernels/integrateKernel.h b/opencl/gpu_rigidbody/kernels/integrateKernel.h new file mode 100644 index 000000000..6ace9b1aa --- /dev/null +++ b/opencl/gpu_rigidbody/kernels/integrateKernel.h @@ -0,0 +1,93 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* integrateKernelCL= \ +"\n" +"float4 quatMult(float4 q1, float4 q2)\n" +"{\n" +" float4 q;\n" +" q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;\n" +" q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;\n" +" q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;\n" +" q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; \n" +" return q;\n" +"}\n" +"\n" +"float4 quatNorm(float4 q)\n" +"{\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +"\n" +" unsigned int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} Body;\n" +"\n" +"\n" +"\n" +"\n" +"__kernel void \n" +" integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping)\n" +"{\n" +" int nodeID = get_global_id(0);\n" +" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" +" if( nodeID < numNodes )\n" +" {\n" +" if (1)\n" +" {\n" +" float4 axis;\n" +" //add some hardcoded angular damping\n" +" bodies[nodeID].m_angVel.x *= angularDamping;\n" +" bodies[nodeID].m_angVel.y *= angularDamping;\n" +" bodies[nodeID].m_angVel.z *= angularDamping;\n" +" \n" +" float4 angvel = bodies[nodeID].m_angVel;\n" +" float fAngle = native_sqrt(dot(angvel, angvel));\n" +" //limit the angular motion\n" +" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" +" {\n" +" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" +" }\n" +" if(fAngle < 0.001f)\n" +" {\n" +" // use Taylor's expansions of sync function\n" +" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" +" }\n" +" else\n" +" {\n" +" // sync(fAngle) = sin(c*fAngle)/t\n" +" axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);\n" +" }\n" +" float4 dorn = axis;\n" +" dorn.w = native_cos(fAngle * timeStep * 0.5f);\n" +" float4 orn0 = bodies[nodeID].m_quat;\n" +"\n" +" float4 predictedOrn = quatMult(dorn, orn0);\n" +" predictedOrn = quatNorm(predictedOrn);\n" +" bodies[nodeID].m_quat=predictedOrn;\n" +" }\n" +"\n" +" //linear velocity \n" +" bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" +" \n" +" }\n" +"}\n" +"\n" +; diff --git a/opencl/gpu_sat/host/btCollidable.h b/opencl/gpu_sat/host/btCollidable.h index 6209671de..e9633ca42 100644 --- a/opencl/gpu_sat/host/btCollidable.h +++ b/opencl/gpu_sat/host/btCollidable.h @@ -2,6 +2,18 @@ #ifndef BT_COLLIDABLE_H #define BT_COLLIDABLE_H +enum btShapeTypes +{ + SHAPE_HEIGHT_FIELD=1, + SHAPE_CONVEX_HEIGHT_FIELD=2, + SHAPE_CONVEX_HULL=3, + SHAPE_PLANE=4, + SHAPE_CONCAVE_TRIMESH=5, + SHAPE_COMPOUND_OF_CONVEX_HULLS=6, + SHAPE_SPHERE=7, + MAX_NUM_SHAPE_TYPES, +}; + struct btCollidable { int m_numChildShapes; diff --git a/opencl/gpu_sat/host/btRigidBodyCL.h b/opencl/gpu_sat/host/btRigidBodyCL.h index 4a2c360f8..e91e4ad09 100644 --- a/opencl/gpu_sat/host/btRigidBodyCL.h +++ b/opencl/gpu_sat/host/btRigidBodyCL.h @@ -25,7 +25,7 @@ ATTRIBUTE_ALIGNED16(struct) btRigidBodyCL }; -struct Inertia +struct btInertiaCL { btMatrix3x3 m_invInertiaWorld; btMatrix3x3 m_initInvInertia;