530 lines
14 KiB
C++
530 lines
14 KiB
C++
/*
|
|
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it freely,
|
|
subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
//Originally written by Erwin Coumans
|
|
|
|
#include "OpenGLInclude.h"
|
|
|
|
#include "CLPhysicsDemo.h"
|
|
#include "LinearMath/btAlignedObjectArray.h"
|
|
#include "DemoSettings.h"
|
|
#include "../basic_initialize/btOpenCLUtils.h"
|
|
#include "../opengl_interop/btOpenCLGLInteropBuffer.h"
|
|
#include "../broadphase_benchmark/findPairsOpenCL.h"
|
|
#include "LinearMath/btVector3.h"
|
|
#include "LinearMath/btQuaternion.h"
|
|
#include "LinearMath/btMatrix3x3.h"
|
|
#include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h"
|
|
#include "../../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"
|
|
#include "../../dynamics/basic_demo/ConvexHeightFieldShape.h"
|
|
#include "../broadphase_benchmark/btGridBroadphaseCl.h"
|
|
#include "LinearMath/btQuickprof.h"
|
|
|
|
|
|
#define MSTRINGIFY(A) #A
|
|
static char* interopKernelString =
|
|
#include "../broadphase_benchmark/integrateKernel.cl"
|
|
|
|
#define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl"
|
|
|
|
cl_kernel g_integrateTransformsKernel;
|
|
|
|
|
|
|
|
bool runOpenCLKernels = true;
|
|
|
|
|
|
btGpuNarrowphaseAndSolver* narrowphaseAndSolver = 0;
|
|
ConvexHeightField* s_convexHeightField = 0 ;
|
|
btOpenCLGLInteropBuffer* g_interopBuffer = 0;
|
|
|
|
extern GLuint cube_vbo;
|
|
extern int VBOsize;
|
|
|
|
cl_mem clBuffer=0;
|
|
char* hostPtr=0;
|
|
cl_bool blocking= CL_TRUE;
|
|
|
|
|
|
|
|
btFindPairsIO gFpIO;
|
|
|
|
cl_context g_cxMainContext;
|
|
cl_command_queue g_cqCommandQue;
|
|
cl_device_id g_device;
|
|
|
|
cl_mem gLinVelMem=0;
|
|
cl_mem gAngVelMem=0;
|
|
cl_mem gBodyTimes=0;
|
|
|
|
#include <Adl/Adl.h>
|
|
|
|
adl::DeviceCL* g_deviceCL=0;
|
|
|
|
struct btAABBHost //keep this in sync with btAABBCL!
|
|
{
|
|
float fx;
|
|
float fy;
|
|
float fz;
|
|
unsigned int uw;
|
|
};
|
|
|
|
struct InternalData
|
|
{
|
|
adl::Buffer<btVector3>* m_linVelBuf;
|
|
adl::Buffer<btVector3>* m_angVelBuf;
|
|
adl::Buffer<float>* m_bodyTimes;
|
|
bool m_useInterop;
|
|
btGridBroadphaseCl* m_Broadphase;
|
|
|
|
adl::Buffer<btAABBHost>* m_localShapeAABB;
|
|
|
|
btVector3* m_linVelHost;
|
|
btVector3* m_angVelHost;
|
|
float* m_bodyTimesHost;
|
|
|
|
InternalData():m_linVelBuf(0),m_angVelBuf(0),m_bodyTimes(0),m_useInterop(0),m_Broadphase(0)
|
|
{
|
|
m_linVelHost= new btVector3[MAX_CONVEX_BODIES_CL];
|
|
m_angVelHost = new btVector3[MAX_CONVEX_BODIES_CL];
|
|
m_bodyTimesHost = new float[MAX_CONVEX_BODIES_CL];
|
|
}
|
|
~InternalData()
|
|
{
|
|
delete[] m_linVelHost;
|
|
delete[] m_angVelHost;
|
|
delete[] m_bodyTimesHost;
|
|
|
|
}
|
|
};
|
|
|
|
|
|
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop)
|
|
{
|
|
void* glCtx=0;
|
|
void* glDC = 0;
|
|
|
|
#ifdef _WIN32
|
|
glCtx = wglGetCurrentContext();
|
|
#else //!_WIN32
|
|
GLXContext glCtx = glXGetCurrentContext();
|
|
#endif //!_WIN32
|
|
glDC = wglGetCurrentDC();
|
|
|
|
int ciErrNum = 0;
|
|
#ifdef CL_PLATFORM_INTEL
|
|
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
|
|
#else
|
|
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
|
|
#endif
|
|
|
|
|
|
|
|
if (useInterop)
|
|
{
|
|
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
|
|
} else
|
|
{
|
|
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
|
|
}
|
|
|
|
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
|
|
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
|
|
|
|
if (numDev>0)
|
|
{
|
|
g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
|
|
btOpenCLDeviceInfo clInfo;
|
|
btOpenCLUtils::getDeviceInfo(g_device,clInfo);
|
|
btOpenCLUtils::printDeviceInfo(g_device);
|
|
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CLPhysicsDemo::CLPhysicsDemo(Win32OpenGLWindow* renderer)
|
|
{
|
|
m_numCollisionShapes=0;
|
|
m_numPhysicsInstances=0;
|
|
|
|
m_data = new InternalData;
|
|
}
|
|
|
|
CLPhysicsDemo::~CLPhysicsDemo()
|
|
{
|
|
|
|
}
|
|
|
|
|
|
void CLPhysicsDemo::writeBodiesToGpu()
|
|
{
|
|
if (narrowphaseAndSolver)
|
|
narrowphaseAndSolver->writeAllBodiesToGpu();
|
|
}
|
|
|
|
int CLPhysicsDemo::registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
|
|
{
|
|
btAlignedObjectArray<btVector3> verts;
|
|
|
|
unsigned char* vts = (unsigned char*) vertices;
|
|
for (int i=0;i<numVertices;i++)
|
|
{
|
|
float* vertex = (float*) &vts[i*strideInBytes];
|
|
verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
|
|
}
|
|
|
|
btConvexUtility util;
|
|
bool merge = true;
|
|
util.initializePolyhedralFeatures(verts,merge);
|
|
|
|
int numFaces= util.m_faces.size();
|
|
float4* eqn = new float4[numFaces];
|
|
for (int i=0;i<numFaces;i++)
|
|
{
|
|
eqn[i].x = util.m_faces[i].m_plane[0];
|
|
eqn[i].y = util.m_faces[i].m_plane[1];
|
|
eqn[i].z = util.m_faces[i].m_plane[2];
|
|
eqn[i].w = util.m_faces[i].m_plane[3];
|
|
}
|
|
printf("numFaces = %d\n", numFaces);
|
|
|
|
|
|
s_convexHeightField = new ConvexHeightField(eqn,numFaces);
|
|
|
|
int shapeIndex=-1;
|
|
|
|
if (narrowphaseAndSolver)
|
|
shapeIndex = narrowphaseAndSolver->registerShape(s_convexHeightField);
|
|
|
|
if (shapeIndex>=0)
|
|
{
|
|
btAABBHost aabbMin, aabbMax;
|
|
aabbMin.fx = s_convexHeightField->m_aabb.m_min.x;
|
|
aabbMin.fy = s_convexHeightField->m_aabb.m_min.y;
|
|
aabbMin.fz= s_convexHeightField->m_aabb.m_min.z;
|
|
aabbMin.uw = shapeIndex;
|
|
|
|
aabbMax.fx = s_convexHeightField->m_aabb.m_max.x;
|
|
aabbMax.fy = s_convexHeightField->m_aabb.m_max.y;
|
|
aabbMax.fz= s_convexHeightField->m_aabb.m_max.z;
|
|
aabbMax.uw = shapeIndex;
|
|
|
|
m_data->m_localShapeAABB->write(&aabbMin,1,shapeIndex*2);
|
|
m_data->m_localShapeAABB->write(&aabbMax,1,shapeIndex*2+1);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
}
|
|
|
|
m_numCollisionShapes++;
|
|
delete[] eqn;
|
|
return shapeIndex;
|
|
}
|
|
|
|
int CLPhysicsDemo::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer)
|
|
{
|
|
btVector3 aabbMin(position[0],position[0],position[0]);
|
|
btVector3 aabbMax = aabbMin;
|
|
aabbMin -= btVector3(1.f,1.f,1.f);
|
|
aabbMax += btVector3(1.f,1.f,1.f);
|
|
|
|
if (collisionShapeIndex>=0)
|
|
{
|
|
btBroadphaseProxy* proxy = m_data->m_Broadphase->createProxy(aabbMin,aabbMax,collisionShapeIndex,userPointer,1,1,0,0);//m_dispatcher);
|
|
}
|
|
|
|
bool writeToGpu = false;
|
|
int bodyIndex = -1;
|
|
|
|
if (narrowphaseAndSolver)
|
|
bodyIndex = narrowphaseAndSolver->registerRigidBody(collisionShapeIndex,mass,position,orientation,writeToGpu);
|
|
|
|
m_numPhysicsInstances++;
|
|
return bodyIndex;
|
|
}
|
|
|
|
|
|
|
|
void CLPhysicsDemo::init(int preferredDevice, int preferredPlatform, bool useInterop)
|
|
{
|
|
|
|
InitCL(-1,-1,useInterop);
|
|
|
|
#define CUSTOM_CL_INITIALIZATION
|
|
#ifdef CUSTOM_CL_INITIALIZATION
|
|
g_deviceCL = new adl::DeviceCL();
|
|
g_deviceCL->m_deviceIdx = g_device;
|
|
g_deviceCL->m_context = g_cxMainContext;
|
|
g_deviceCL->m_commandQueue = g_cqCommandQue;
|
|
g_deviceCL->m_kernelManager = new adl::KernelManager;
|
|
|
|
#else
|
|
DeviceUtils::Config cfg;
|
|
cfg.m_type = DeviceUtils::Config::DEVICE_CPU;
|
|
g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg );
|
|
#endif
|
|
|
|
//adl::Solver<adl::TYPE_CL>::allocate(g_deviceCL->allocate(
|
|
m_data->m_linVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
|
m_data->m_angVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
|
m_data->m_bodyTimes = new adl::Buffer<float>(g_deviceCL,MAX_CONVEX_BODIES_CL);
|
|
|
|
m_data->m_localShapeAABB = new adl::Buffer<btAABBHost>(g_deviceCL,MAX_CONVEX_SHAPES_CL);
|
|
|
|
gLinVelMem = (cl_mem)m_data->m_linVelBuf->m_ptr;
|
|
gAngVelMem = (cl_mem)m_data->m_angVelBuf->m_ptr;
|
|
gBodyTimes = (cl_mem)m_data->m_bodyTimes->m_ptr;
|
|
|
|
|
|
|
|
|
|
narrowphaseAndSolver = new btGpuNarrowphaseAndSolver(g_deviceCL);
|
|
|
|
|
|
|
|
int maxObjects = btMax(256,MAX_CONVEX_BODIES_CL);
|
|
int maxPairsSmallProxy = 32;
|
|
btOverlappingPairCache* overlappingPairCache=0;
|
|
|
|
m_data->m_Broadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
|
|
g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL);
|
|
|
|
|
|
|
|
cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH);
|
|
g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog);
|
|
|
|
|
|
initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, MAX_CONVEX_BODIES_CL);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void CLPhysicsDemo::writeVelocitiesToGpu()
|
|
{
|
|
m_data->m_linVelBuf->write(m_data->m_linVelHost,MAX_CONVEX_BODIES_CL);
|
|
m_data->m_angVelBuf->write(m_data->m_angVelHost,MAX_CONVEX_BODIES_CL);
|
|
m_data->m_bodyTimes->write(m_data->m_bodyTimesHost,MAX_CONVEX_BODIES_CL);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
}
|
|
|
|
|
|
void CLPhysicsDemo::setupInterop()
|
|
{
|
|
m_data->m_useInterop = true;
|
|
|
|
g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);
|
|
clFinish(g_cqCommandQue);
|
|
}
|
|
|
|
void CLPhysicsDemo::cleanup()
|
|
{
|
|
delete narrowphaseAndSolver;
|
|
|
|
delete m_data->m_linVelBuf;
|
|
delete m_data->m_angVelBuf;
|
|
delete m_data->m_bodyTimes;
|
|
delete m_data->m_localShapeAABB;
|
|
|
|
delete m_data->m_Broadphase;
|
|
delete m_data;
|
|
|
|
delete g_deviceCL->m_kernelManager;
|
|
delete g_deviceCL;
|
|
|
|
m_data=0;
|
|
g_deviceCL=0;
|
|
delete g_interopBuffer;
|
|
delete s_convexHeightField;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CLPhysicsDemo::stepSimulation()
|
|
{
|
|
BT_PROFILE("simulationLoop");
|
|
|
|
{
|
|
BT_PROFILE("glFinish");
|
|
glFinish();
|
|
}
|
|
cl_int ciErrNum = CL_SUCCESS;
|
|
|
|
|
|
if(m_data->m_useInterop)
|
|
{
|
|
clBuffer = g_interopBuffer->getCLBUffer();
|
|
BT_PROFILE("clEnqueueAcquireGLObjects");
|
|
ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
} else
|
|
{
|
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
|
|
glFlush();
|
|
|
|
BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");
|
|
|
|
blocking= CL_TRUE;
|
|
hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
|
|
if (!clBuffer)
|
|
{
|
|
clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
|
|
}
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
|
|
ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue,
|
|
clBuffer,
|
|
blocking,
|
|
0,
|
|
VBOsize,
|
|
hostPtr,0,0,0
|
|
);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
}
|
|
|
|
|
|
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
if (runOpenCLKernels && m_numPhysicsInstances)
|
|
{
|
|
|
|
gFpIO.m_numObjects = m_numPhysicsInstances;
|
|
gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
|
|
gFpIO.m_clObjectsBuffer = clBuffer;
|
|
gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
|
|
gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
|
|
gFpIO.m_numOverlap = 0;
|
|
{
|
|
BT_PROFILE("setupGpuAabbs");
|
|
setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
|
|
}
|
|
if (1)
|
|
{
|
|
BT_PROFILE("calculateOverlappingPairs");
|
|
m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
|
|
gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
|
|
gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
|
|
}
|
|
|
|
//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
|
|
if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
|
|
{
|
|
colorPairsOpenCL(gFpIO);
|
|
|
|
if (1)
|
|
{
|
|
{
|
|
//BT_PROFILE("setupBodies");
|
|
if (narrowphaseAndSolver)
|
|
setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
|
|
}
|
|
if (gFpIO.m_numOverlap)
|
|
{
|
|
BT_PROFILE("computeContactsAndSolver");
|
|
if (narrowphaseAndSolver)
|
|
narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
|
|
}
|
|
|
|
{
|
|
BT_PROFILE("copyBodyVelocities");
|
|
if (narrowphaseAndSolver)
|
|
copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
|
|
}
|
|
}
|
|
|
|
} else
|
|
{
|
|
printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
|
|
btAssert(0);
|
|
}
|
|
|
|
|
|
{
|
|
BT_PROFILE("integrateTransforms");
|
|
|
|
if (runOpenCLKernels)
|
|
{
|
|
int numObjects = m_numPhysicsInstances;
|
|
int offset = SHAPE_VERTEX_BUFFER_SIZE/4;
|
|
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
|
|
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
|
|
ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
|
|
|
|
|
|
|
|
|
|
size_t workGroupSize = 64;
|
|
size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
|
|
|
|
if (workGroupSize>numWorkItems)
|
|
workGroupSize=numWorkItems;
|
|
|
|
ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
if(m_data->m_useInterop)
|
|
{
|
|
BT_PROFILE("clEnqueueReleaseGLObjects");
|
|
ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
}
|
|
else
|
|
{
|
|
BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
|
|
ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue,
|
|
clBuffer,
|
|
blocking,
|
|
0,
|
|
VBOsize,
|
|
hostPtr,0,0,0);
|
|
|
|
//clReleaseMemObject(clBuffer);
|
|
adl::DeviceUtils::waitForCompletion( g_deviceCL );
|
|
glUnmapBuffer( GL_ARRAY_BUFFER);
|
|
glFlush();
|
|
}
|
|
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
|
|
|
|
if (runOpenCLKernels)
|
|
{
|
|
BT_PROFILE("clFinish");
|
|
clFinish(g_cqCommandQue);
|
|
}
|
|
|
|
|
|
}
|