Synchronize changes from branches/GpuClothAMD to trunk

Main improvements are: GPU cloth collision detection against a capsule shape ,OpenCL-OpenGL interoperability (keeping data buffers on GPU), and bug fixes Thanks to Lee Howes
2011-02-27 09:07:07 +00:00
parent ec1bd45f4f
commit d52f58edd8
37 changed files with 3267 additions and 2481 deletions
--- a/Demos/SharedOpenCL/btOclCommon.cpp
+++ b/Demos/SharedOpenCL/btOclCommon.cpp
@@ -29,10 +29,11 @@ static char* spPlatformVendor =
 "Unknown Vendor";
 #endif

+#ifndef CL_PLATFORM_MINI_CL
+#include "CL/cl_gl.h"
+#endif

-
-
-cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum)
+cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC )
 {
    cl_uint numPlatforms;    
 	cl_platform_id platform = NULL;    
@@ -76,12 +77,27 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int*
 	 * If we could find our platform, use it. Otherwise pass a NULL and get whatever the     
 	 * implementation thinks we should be using.     
 	 */
-    cl_context_properties cps[3] =     
+    cl_context_properties cps[7] =     
 	{        
 		CL_CONTEXT_PLATFORM,         
-		(cl_context_properties)platform,         
-		0    
+		(cl_context_properties)platform, 
+		0,
+		0,
+		0,
+		0,
+		0
 	};    
+#ifndef CL_PLATFORM_MINI_CL
+	// If we have a gl context then enable interop
+	if( pGLContext )
+	{
+		cps[2] = CL_GL_CONTEXT_KHR;
+		cps[3] = (cl_context_properties)pGLContext;
+		cps[4] = CL_WGL_HDC_KHR;
+		cps[5] = (cl_context_properties)pGLDC;
+	}
+#endif //CL_PLATFORM_MINI_CL
+
 	/* Use NULL for backward compatibility */    
 	cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
    cl_context retContext = clCreateContextFromType(cprops, 
--- a/Demos/SharedOpenCL/btOclCommon.h
+++ b/Demos/SharedOpenCL/btOclCommon.h
@@ -34,7 +34,10 @@ subject to the following restrictions:
 class btOclCommon
 {
 public:
-	static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum);
+	// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
+	// to have to understand GL types.
+	// It is a HGLRC in _WIN32 or a GLXContext otherwise.
+	static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0);
 };


--- a/Demos/SharedOpenCL/btOclUtils.cpp
+++ b/Demos/SharedOpenCL/btOclUtils.cpp
@@ -87,7 +87,32 @@ cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext)
    cl_uint clock_frequency;
    clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
    
-	max_flops = compute_units * clock_frequency;
+	cl_device_type device_type;
+    clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+
+	int SIMDmultiplier = 1;
+
+	if( device_type == CL_DEVICE_TYPE_CPU )
+	{
+		// For simplicity assume that the CPU is running single SSE instructions
+		// This will of course depend on the kernel
+		SIMDmultiplier = 4;
+	} else if( device_type == CL_DEVICE_TYPE_GPU ) {
+		// Approximation to GPU compute power
+		// As long as this beats the CPU number that's the important thing, really
+#if defined(CL_PLATFORM_AMD)
+		// 16 processing elements, 5 ALUs each
+		SIMDmultiplier = 80;
+#elif defined(CL_PLATFORM_NVIDIA)
+		// 8 processing elements, dual issue - pre-Fermi at least
+		SIMDmultiplier = 16;
+#else
+		SIMDmultiplier = 1;
+#endif
+	}
+
+
+	max_flops = compute_units * clock_frequency * SIMDmultiplier;
 	++current_device;

 	while( current_device < device_count )