Apply a patch that updates vertex position on GPU for the OpenCL version, by Dongsoo Han (saggita), work-in-progress

Removed the unused OpenCL kernels
Add example how to cache binary kernels, see SoftDemo compiled with OpenCL AMD using msvc/vs2008_opencl.bat
This commit is contained in:
erwin.coumans
2011-12-20 18:02:48 +00:00
parent 3035ee69c4
commit fff709635e
4 changed files with 524 additions and 73 deletions

View File

@@ -52,7 +52,7 @@ static btRigidBody* staticBody = 0;
static float waveheight = 5.f;
const float TRIANGLE_SIZE=8.f;
unsigned int current_demo=29;
unsigned int current_demo=7;
#define DEMO_MODE_TIMEOUT 15.f //15 seconds for each demo
@@ -67,6 +67,151 @@ const int maxNumObjects = 32760;
#define CUBE_HALF_EXTENTS 1.5
#define EXTRA_HEIGHT -10.f
#ifdef USE_AMD_OPENCL
#include "btOpenCLUtils.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h"
btOpenCLSoftBodySolver* g_openCLSIMDSolver=0;
btSoftBodySolverOutputCLtoCPU* g_softBodyOutput = 0;
cl_context g_cxMainContext;
cl_device_id g_cdDevice;
cl_command_queue g_cqCommandQue;
void initCL( void* glCtx, void* glDC )
{
int ciErrNum = 0;
#if defined(CL_PLATFORM_MINI_CL)
cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//or use CL_DEVICE_TYPE_DEBUG to debug MiniCL
#elif defined(CL_PLATFORM_INTEL)
cl_device_type deviceType = CL_DEVICE_TYPE_CPU;
#elif defined(CL_PLATFORM_AMD)
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#elif defined(CL_PLATFORM_NVIDIA)
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#else
#ifdef __APPLE__
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU;
#else
cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//CL_DEVICE_TYPE_ALL
#endif//__APPLE__
#endif
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
if (!numDev)
{
btAssert(0);
exit(0);//this is just a demo, exit now
}
g_cdDevice = btOpenCLUtils::getDevice(g_cxMainContext,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
btOpenCLDeviceInfo clInfo;
btOpenCLUtils::getDeviceInfo(g_cdDevice,clInfo);
btOpenCLUtils::printDeviceInfo(g_cdDevice);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
class CachingCLFunctions : public CLFunctions
{
protected:
cl_device_id m_device;
const char* strip(const char* name, const char* pattern);
public:
CachingCLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
CLFunctions(cqCommandQue,cxMainContext)
{
size_t actualSize;
cl_int retval = clGetCommandQueueInfo ( cqCommandQue, CL_QUEUE_DEVICE, sizeof(cl_device_id),
&m_device, &actualSize);
}
/**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/
virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros , const char* orgSrcFileNameForCaching)
{
char srcFileNameForCaching[1024];
sprintf(srcFileNameForCaching,"%s/%s","../../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL",orgSrcFileNameForCaching);
btAssert(additionalMacros);
btAssert(srcFileNameForCaching && strlen(srcFileNameForCaching));
printf("compiling kernelName: %s ",kernelName);
cl_kernel kernel=0;
cl_int ciErrNum;
size_t program_length = strlen(kernelSource);
cl_program m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext, m_device, kernelSource, &ciErrNum, additionalMacros,srcFileNameForCaching);
// Create the kernel
kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
const char* msg = "";
switch(ciErrNum)
{
case CL_INVALID_PROGRAM:
msg = "Program is not a valid program object.";
break;
case CL_INVALID_PROGRAM_EXECUTABLE:
msg = "There is no successfully built executable for program.";
break;
case CL_INVALID_KERNEL_NAME:
msg = "kernel_name is not found in program.";
break;
case CL_INVALID_KERNEL_DEFINITION:
msg = "the function definition for __kernel function given by kernel_name such as the number of arguments, the argument types are not the same for all devices for which the program executable has been built.";
break;
case CL_INVALID_VALUE:
msg = "kernel_name is NULL.";
break;
case CL_OUT_OF_HOST_MEMORY:
msg = "Failure to allocate resources required by the OpenCL implementation on the host.";
break;
default:
{
}
}
printf("Error in clCreateKernel for kernel '%s', error is \"%s\", Line %u in file %s !!!\n\n", kernelName, msg, __LINE__, __FILE__);
#ifndef BT_SUPPRESS_OPENCL_ASSERTS
btAssert(0);
#endif //BT_SUPPRESS_OPENCL_ASSERTS
m_kernelCompilationFailures++;
return 0;
}
printf("ready. \n");
if (!kernel)
m_kernelCompilationFailures++;
return kernel;
}
};
#endif //USE_AMD_OPENCL
//
void SoftDemo::createStack( btCollisionShape* boxShape, float halfCubeSize, int size, float zPos )
{
@@ -409,6 +554,50 @@ static void Init_Impact(SoftDemo* pdemo)
pdemo->localCreateRigidBody(10,startTransform,new btBoxShape(btVector3(2,2,2)));
}
static void Init_CapsuleCollision(SoftDemo* pdemo)
{
#ifdef USE_AMD_OPENCL
btAlignedObjectArray<btSoftBody*> emptyArray;
if (g_openCLSIMDSolver)
g_openCLSIMDSolver->optimize(emptyArray);
#endif //USE_AMD_OPENCL
//TRACEDEMO
const btScalar s=4;
const btScalar h=6;
const int r=20;
btTransform startTransform;
startTransform.setIdentity();
startTransform.setOrigin(btVector3(0,h-2,0));
btCollisionShape* capsuleShape= new btCapsuleShapeX(1,5);
// capsuleShape->setMargin( 0.5 );
// capsule->setLocalScaling(btVector3(5,1,1));
// btRigidBody* body=pdemo->localCreateRigidBody(20,startTransform,capsuleShape);
btRigidBody* body=pdemo->localCreateRigidBody(0,startTransform,capsuleShape);
// body->setFriction( 0.8f );
int fixed=0;//4+8;
btSoftBody* psb=btSoftBodyHelpers::CreatePatch(pdemo->m_softBodyWorldInfo,btVector3(-s,h,-s),
btVector3(+s,h,-s),
btVector3(-s,h,+s),
btVector3(+s,h,+s),r,r,fixed,true);
pdemo->getSoftDynamicsWorld()->addSoftBody(psb);
// psb->setTotalMass(1);
psb->m_cfg.piterations = 10;
psb->m_cfg.citerations = 10;
psb->m_cfg.diterations = 10;
// psb->m_cfg.viterations = 10;
// psb->appendAnchor(0,body);
// psb->appendAnchor(r-1,body);
// pdemo->m_cutting=true;
}
//
// Collide
//
@@ -1318,7 +1507,8 @@ static void Init_TetraCube(SoftDemo* pdemo)
Init_Ropes,
Init_RopeAttach,
Init_ClothAttach,
Init_Sticks,
Init_Sticks,
Init_CapsuleCollision,
Init_Collide,
Init_Collide2,
Init_Collide3,
@@ -1499,6 +1689,11 @@ void SoftDemo::clientMoveAndDisplay()
#endif
#ifdef USE_AMD_OPENCL
if (g_openCLSIMDSolver)
g_openCLSIMDSolver->copyBackToSoftBodies();
#endif //USE_AMD_OPENCL
if(m_drag)
{
m_node->m_v*=0;
@@ -1615,26 +1810,29 @@ void SoftDemo::renderme()
/* Cast rays */
{
m_clock.reset();
btVector3* org=&origins[0];
btScalar* fraction=&fractions[0];
btSoftBody** psbs=&sbs[0];
btSoftBody::sRayCast results;
for(int i=0,ni=origins.size(),nb=sbs.size();i<ni;++i)
if (sbs.size())
{
for(int ib=0;ib<nb;++ib)
btVector3* org=&origins[0];
btScalar* fraction=&fractions[0];
btSoftBody** psbs=&sbs[0];
btSoftBody::sRayCast results;
for(int i=0,ni=origins.size(),nb=sbs.size();i<ni;++i)
{
btVector3 rayFrom = *org;
btVector3 rayTo = rayFrom+dir*rayLength;
if(psbs[ib]->rayTest(rayFrom,rayTo,results))
for(int ib=0;ib<nb;++ib)
{
*fraction=results.fraction;
btVector3 rayFrom = *org;
btVector3 rayTo = rayFrom+dir*rayLength;
if(psbs[ib]->rayTest(rayFrom,rayTo,results))
{
*fraction=results.fraction;
}
}
++org;++fraction;
}
++org;++fraction;
long ms=btMax<long>(m_clock.getTimeMilliseconds(),1);
long rayperseconds=(1000*(origins.size()*sbs.size()))/ms;
printf("%d ms (%d rays/s)\r\n",int(ms),int(rayperseconds));
}
long ms=btMax<long>(m_clock.getTimeMilliseconds(),1);
long rayperseconds=(1000*(origins.size()*sbs.size()))/ms;
printf("%d ms (%d rays/s)\r\n",int(ms),int(rayperseconds));
}
/* Draw rays */
const btVector3 c[]={ origins[0],
@@ -1994,7 +2192,35 @@ void SoftDemo::initPhysics()
m_solver = solver;
btDiscreteDynamicsWorld* world = new btSoftRigidDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration);
btSoftBodySolver* softBodySolver = 0;
#ifdef USE_AMD_OPENCL
static bool once = true;
if (once)
{
once=false;
initCL(0,0);
}
if( g_openCLSIMDSolver )
delete g_openCLSIMDSolver;
if( g_softBodyOutput )
delete g_softBodyOutput;
if (0)
{
g_openCLSIMDSolver = new btOpenCLSoftBodySolverSIMDAware( g_cqCommandQue, g_cxMainContext);
// g_openCLSIMDSolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext);
g_openCLSIMDSolver->setCLFunctions(new CachingCLFunctions(g_cqCommandQue, g_cxMainContext));
}
softBodySolver = g_openCLSIMDSolver;
g_softBodyOutput = new btSoftBodySolverOutputCLtoCPU;
#endif //USE_AMD_OPENCL
btDiscreteDynamicsWorld* world = new btSoftRigidDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration,softBodySolver);
m_dynamicsWorld = world;
m_dynamicsWorld->setInternalTickCallback(pickingPreTickCallback,this,true);
@@ -2003,11 +2229,6 @@ void SoftDemo::initPhysics()
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
m_softBodyWorldInfo.m_gravity.setValue(0,-10,0);
// clientResetScene();
m_softBodyWorldInfo.m_sparsesdf.Initialize();