Synchronize changes from branches/GpuClothAMD to trunk
Main improvements are: GPU cloth collision detection against a capsule shape ,OpenCL-OpenGL interoperability (keeping data buffers on GPU), and bug fixes Thanks to Lee Howes
This commit is contained in:
@@ -29,10 +29,11 @@ static char* spPlatformVendor =
|
||||
"Unknown Vendor";
|
||||
#endif
|
||||
|
||||
#ifndef CL_PLATFORM_MINI_CL
|
||||
#include "CL/cl_gl.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum)
|
||||
cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC )
|
||||
{
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
@@ -76,12 +77,27 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int*
|
||||
* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
|
||||
* implementation thinks we should be using.
|
||||
*/
|
||||
cl_context_properties cps[3] =
|
||||
cl_context_properties cps[7] =
|
||||
{
|
||||
CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform,
|
||||
0
|
||||
(cl_context_properties)platform,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
};
|
||||
#ifndef CL_PLATFORM_MINI_CL
|
||||
// If we have a gl context then enable interop
|
||||
if( pGLContext )
|
||||
{
|
||||
cps[2] = CL_GL_CONTEXT_KHR;
|
||||
cps[3] = (cl_context_properties)pGLContext;
|
||||
cps[4] = CL_WGL_HDC_KHR;
|
||||
cps[5] = (cl_context_properties)pGLDC;
|
||||
}
|
||||
#endif //CL_PLATFORM_MINI_CL
|
||||
|
||||
/* Use NULL for backward compatibility */
|
||||
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
|
||||
cl_context retContext = clCreateContextFromType(cprops,
|
||||
|
||||
@@ -34,7 +34,10 @@ subject to the following restrictions:
|
||||
class btOclCommon
|
||||
{
|
||||
public:
|
||||
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum);
|
||||
// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
|
||||
// to have to understand GL types.
|
||||
// It is a HGLRC in _WIN32 or a GLXContext otherwise.
|
||||
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -87,7 +87,32 @@ cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext)
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
|
||||
max_flops = compute_units * clock_frequency;
|
||||
cl_device_type device_type;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
|
||||
|
||||
int SIMDmultiplier = 1;
|
||||
|
||||
if( device_type == CL_DEVICE_TYPE_CPU )
|
||||
{
|
||||
// For simplicity assume that the CPU is running single SSE instructions
|
||||
// This will of course depend on the kernel
|
||||
SIMDmultiplier = 4;
|
||||
} else if( device_type == CL_DEVICE_TYPE_GPU ) {
|
||||
// Approximation to GPU compute power
|
||||
// As long as this beats the CPU number that's the important thing, really
|
||||
#if defined(CL_PLATFORM_AMD)
|
||||
// 16 processing elements, 5 ALUs each
|
||||
SIMDmultiplier = 80;
|
||||
#elif defined(CL_PLATFORM_NVIDIA)
|
||||
// 8 processing elements, dual issue - pre-Fermi at least
|
||||
SIMDmultiplier = 16;
|
||||
#else
|
||||
SIMDmultiplier = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
max_flops = compute_units * clock_frequency * SIMDmultiplier;
|
||||
++current_device;
|
||||
|
||||
while( current_device < device_count )
|
||||
|
||||
Reference in New Issue
Block a user