Synchronize changes from branches/GpuClothAMD to trunk

Main improvements are: GPU cloth collision detection against a capsule shape
,OpenCL-OpenGL interoperability (keeping data buffers on GPU), and bug fixes
Thanks to Lee Howes
This commit is contained in:
erwin.coumans
2011-02-27 09:07:07 +00:00
parent ec1bd45f4f
commit d52f58edd8
37 changed files with 3267 additions and 2481 deletions

View File

@@ -29,10 +29,11 @@ static char* spPlatformVendor =
"Unknown Vendor";
#endif
#ifndef CL_PLATFORM_MINI_CL
#include "CL/cl_gl.h"
#endif
cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum)
cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC )
{
cl_uint numPlatforms;
cl_platform_id platform = NULL;
@@ -76,12 +77,27 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int*
* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
* implementation thinks we should be using.
*/
cl_context_properties cps[3] =
cl_context_properties cps[7] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
0
(cl_context_properties)platform,
0,
0,
0,
0,
0
};
#ifndef CL_PLATFORM_MINI_CL
// If we have a gl context then enable interop
if( pGLContext )
{
cps[2] = CL_GL_CONTEXT_KHR;
cps[3] = (cl_context_properties)pGLContext;
cps[4] = CL_WGL_HDC_KHR;
cps[5] = (cl_context_properties)pGLDC;
}
#endif //CL_PLATFORM_MINI_CL
/* Use NULL for backward compatibility */
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
cl_context retContext = clCreateContextFromType(cprops,

View File

@@ -34,7 +34,10 @@ subject to the following restrictions:
class btOclCommon
{
public:
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum);
// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
// to have to understand GL types.
// It is a HGLRC in _WIN32 or a GLXContext otherwise.
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0);
};

View File

@@ -87,7 +87,32 @@ cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext)
cl_uint clock_frequency;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
max_flops = compute_units * clock_frequency;
cl_device_type device_type;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
int SIMDmultiplier = 1;
if( device_type == CL_DEVICE_TYPE_CPU )
{
// For simplicity assume that the CPU is running single SSE instructions
// This will of course depend on the kernel
SIMDmultiplier = 4;
} else if( device_type == CL_DEVICE_TYPE_GPU ) {
// Approximation to GPU compute power
// As long as this beats the CPU number that's the important thing, really
#if defined(CL_PLATFORM_AMD)
// 16 processing elements, 5 ALUs each
SIMDmultiplier = 80;
#elif defined(CL_PLATFORM_NVIDIA)
// 8 processing elements, dual issue - pre-Fermi at least
SIMDmultiplier = 16;
#else
SIMDmultiplier = 1;
#endif
}
max_flops = compute_units * clock_frequency * SIMDmultiplier;
++current_device;
while( current_device < device_count )