From 7d4e2873e22687af07aaac25100357c755329b8d Mon Sep 17 00:00:00 2001 From: "erwin.coumans" Date: Mon, 8 Feb 2010 22:42:58 +0000 Subject: [PATCH] move some recent MiniCL work to trunk --- Demos/CMakeLists.txt | 36 +- Demos/MiniCL_VectorAdd/CMakeLists.txt | 2 +- Demos/MiniCL_VectorAdd/Jamfile | 5 - Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp | 18 +- Demos/MiniCL_VectorAdd/VectorAddKernels.cl | 47 + src/BulletMultiThreaded/CMakeLists.txt | 2 + .../BulletMultiThreaded}/MiniCL.cpp | 858 +++++++++++------- .../MiniCLTask/MiniCLTask.cpp | 66 +- .../MiniCLTask/MiniCLTask.h | 47 +- .../MiniCLTaskScheduler.cpp | 312 ++++++- src/BulletMultiThreaded/MiniCLTaskScheduler.h | 35 +- src/BulletMultiThreaded/PlatformDefinitions.h | 2 + src/BulletMultiThreaded/PpuAddressSpace.h | 4 +- src/MiniCL/cl_MiniCL_Defs.h | 263 ++++++ src/MiniCL/cl_platform.h | 498 +++++----- 15 files changed, 1471 insertions(+), 724 deletions(-) delete mode 100644 Demos/MiniCL_VectorAdd/Jamfile create mode 100644 Demos/MiniCL_VectorAdd/VectorAddKernels.cl rename {Demos/MiniCL_VectorAdd => src/BulletMultiThreaded}/MiniCL.cpp (53%) create mode 100644 src/MiniCL/cl_MiniCL_Defs.h diff --git a/Demos/CMakeLists.txt b/Demos/CMakeLists.txt index 96d5ff1e1..ac5f4a8a1 100644 --- a/Demos/CMakeLists.txt +++ b/Demos/CMakeLists.txt @@ -1,26 +1,27 @@ IF (USE_GLUT) +SET(SharedDemoSubdirs + OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld + CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer + RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo + UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo + CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo + DoublePrecisionDemo ConcaveDemo CollisionDemo + ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo + MultiMaterialDemo SerializeDemo InternalEdgeDemo +) + + if (CMAKE_SIZEOF_VOID_P MATCHES "8") - SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld - CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer - RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo - UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo - CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo - DoublePrecisionDemo ConcaveDemo CollisionDemo - ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo - MultiMaterialDemo SerializeDemo InternalEdgeDemo + SUBDIRS( OpenGL + ${SharedDemoSubdirs} ) else (CMAKE_SIZEOF_VOID_P MATCHES "8") - SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld - MultiThreadedDemo CcdPhysicsDemo ConstraintDemo SliderConstraintDemo Raytracer - GenericJointDemo RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo - VehicleDemo UserCollisionAlgorithm CharacterDemo SoftDemo - HeightFieldFluidDemo - CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo - DoublePrecisionDemo ConcaveDemo CollisionDemo - ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo - MultiMaterialDemo SerializeDemo InternalEdgeDemo + SUBDIRS( OpenGL + ${SharedDemoSubdirs} + MultiThreadedDemo + MiniCL_VectorAdd ) endif (CMAKE_SIZEOF_VOID_P MATCHES "8") @@ -40,6 +41,7 @@ SUBDIRS( GenericJointDemo SerializeDemo SoftDemo + MiniCL_VectorAdd ) ENDIF (USE_GLUT) \ No newline at end of file diff --git a/Demos/MiniCL_VectorAdd/CMakeLists.txt b/Demos/MiniCL_VectorAdd/CMakeLists.txt index 05ae081db..849e18220 100644 --- a/Demos/MiniCL_VectorAdd/CMakeLists.txt +++ b/Demos/MiniCL_VectorAdd/CMakeLists.txt @@ -11,7 +11,7 @@ LINK_LIBRARIES( ADD_EXECUTABLE(AppMiniCLVectorAdd MiniCL_VectorAdd.cpp -MiniCL.cpp +VectorAddKernels.cl ) IF (UNIX) diff --git a/Demos/MiniCL_VectorAdd/Jamfile b/Demos/MiniCL_VectorAdd/Jamfile deleted file mode 100644 index 53ed64874..000000000 --- a/Demos/MiniCL_VectorAdd/Jamfile +++ /dev/null @@ -1,5 +0,0 @@ -SubDir TOP Demos MiniCL_VectorAdd ; - -BulletMiniCLDemo MiniCL_VectorAdd : [ Wildcard *.h *.cpp ] ; - -MsvcIncDirs MiniCL_VectorAdd : "../../src" ; diff --git a/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp b/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp index e737b179f..890d1e384 100644 --- a/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp +++ b/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp @@ -5,7 +5,13 @@ ///Instead of #include we include ///Apart from this include file, all other code should compile and work on OpenCL compliant implementation -#include +#define USE_MINICL 1 +#ifdef USE_MINICL +#include "MiniCL/cl.h" +#else //USE_MINICL +#include +#endif//USE_MINICL + #include #include #include @@ -170,3 +176,13 @@ int main(int argc, char **argv) free(srcB); free (dst); } + + +#ifdef USE_MINICL +#include "MiniCL/cl_MiniCL_Defs.h" +extern "C" +{ + #include "VectorAddKernels.cl" +} +MINICL_REGISTER(VectorAdd) +#endif//USE_MINICL \ No newline at end of file diff --git a/Demos/MiniCL_VectorAdd/VectorAddKernels.cl b/Demos/MiniCL_VectorAdd/VectorAddKernels.cl new file mode 100644 index 000000000..4e2e836d0 --- /dev/null +++ b/Demos/MiniCL_VectorAdd/VectorAddKernels.cl @@ -0,0 +1,47 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation +#ifndef GUID_ARG +#define GUID_ARG +#endif + +/////////////////////////////////////////////////// +// OpenCL Kernel Function for element by element vector addition +__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG) +{ + // get oct-float index into global data array + int iGID = get_global_id(0); + + // read inputs into registers + float8 f8InA = a[iGID]; + float8 f8InB = b[iGID]; + float8 f8Out = (float8)0.0f; + + + // add the vector elements + f8Out.s0 = f8InA.s0 + f8InB.s0; + f8Out.s1 = f8InA.s1 + f8InB.s1; + f8Out.s2 = f8InA.s2 + f8InB.s2; + f8Out.s3 = f8InA.s3 + f8InB.s3; + f8Out.s4 = f8InA.s4 + f8InB.s4; + f8Out.s5 = f8InA.s5 + f8InB.s5; + f8Out.s6 = f8InA.s6 + f8InB.s6; + f8Out.s7 = f8InA.s7 + f8InB.s7; + + // write back out to GMEM + c[get_global_id(0)] = f8Out; +} + diff --git a/src/BulletMultiThreaded/CMakeLists.txt b/src/BulletMultiThreaded/CMakeLists.txt index 0e25f5099..90ac8a467 100644 --- a/src/BulletMultiThreaded/CMakeLists.txt +++ b/src/BulletMultiThreaded/CMakeLists.txt @@ -56,6 +56,7 @@ ADD_LIBRARY(BulletMultiThreaded btGpuUtilsSharedDefs.h #MiniCL provides a small subset of OpenCL + MiniCL.cpp MiniCLTaskScheduler.cpp MiniCLTaskScheduler.h MiniCLTask/MiniCLTask.cpp @@ -63,6 +64,7 @@ ADD_LIBRARY(BulletMultiThreaded ../MiniCL/cl.h ../MiniCL/cl_gl.h ../MiniCL/cl_platform.h + ../MiniCL/cl_MiniCL_Defs.h ) IF (BUILD_SHARED_LIBS) diff --git a/Demos/MiniCL_VectorAdd/MiniCL.cpp b/src/BulletMultiThreaded/MiniCL.cpp similarity index 53% rename from Demos/MiniCL_VectorAdd/MiniCL.cpp rename to src/BulletMultiThreaded/MiniCL.cpp index 9f4595290..78741e6a8 100644 --- a/Demos/MiniCL_VectorAdd/MiniCL.cpp +++ b/src/BulletMultiThreaded/MiniCL.cpp @@ -1,346 +1,512 @@ - -#include -#define __PHYSICS_COMMON_H__ 1 -#ifdef WIN32 -#include "BulletMultiThreaded/Win32ThreadSupport.h" -#else -#include "BulletMultiThreaded/SequentialThreadSupport.h" -#endif -#include "BulletMultiThreaded/MiniCLTaskScheduler.h" -#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h" -#include "LinearMath/btMinMax.h" - -/* - m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( - "collision", - processCollisionTask, - createCollisionLocalStoreMemory, - maxNumOutstandingTasks)); - - if (!m_spuCollisionTaskProcess) - m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks); - - m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa); - - m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex); - - //make sure all SPU work is done - m_spuCollisionTaskProcess->flush2(); - - -*/ - - - -CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( - cl_device_id device , - cl_device_info param_name , - size_t param_value_size , - void * param_value , - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 -{ - - switch (param_name) - { - case CL_DEVICE_NAME: - { - char deviceName[] = "CPU"; - int nameLen = strlen(deviceName)+1; - assert(param_value_size>strlen(deviceName)); - if (nameLen < param_value_size) - { - sprintf((char*)param_value,"CPU"); - } else - { - printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); - } - break; - } - case CL_DEVICE_TYPE: - { - if (param_value_size>=sizeof(cl_device_type)) - { - cl_device_type* deviceType = (cl_device_type*)param_value; - *deviceType = CL_DEVICE_TYPE_CPU; - } else - { - printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); - } - break; - } - case CL_DEVICE_MAX_COMPUTE_UNITS: - { - if (param_value_size>=sizeof(cl_uint)) - { - cl_uint* numUnits = (cl_uint*)param_value; - *numUnits= 4; - } else - { - printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); - } - - break; - } - case CL_DEVICE_MAX_WORK_ITEM_SIZES: - { - size_t workitem_size[3]; - - if (param_value_size>=sizeof(workitem_size)) - { - size_t* workItemSize = (size_t*)param_value; - workItemSize[0] = 64; - workItemSize[1] = 24; - workItemSize[2] = 16; - } else - { - printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); - } - break; - } - default: - { - printf("error: unsupported param_name:%d\n",param_name); - } - } - - - return 0; -} - -CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0 -{ - return 0; -} - - - -CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 -{ - return 0; -} - -CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0 -{ - return 0; -} - -CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0 -{ - return 0; -} - - -// Enqueued Commands APIs -CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue , - cl_mem buffer , - cl_bool /* blocking_read */, - size_t /* offset */, - size_t cb , - void * ptr , - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 -{ - MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; - - ///wait for all work items to be completed - scheduler->flush(); - - memcpy(ptr,buffer,cb); - return 0; -} - - -CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, - cl_kernel clKernel , - cl_uint work_dim , - const size_t * /* global_work_offset */, - const size_t * global_work_size , - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 -{ - - - MiniCLKernel* kernel = (MiniCLKernel*) clKernel; - for (int ii=0;iim_scheduler->getMaxNumOutstandingTasks(); - int numWorkItems = global_work_size[ii]; - - //at minimum 64 work items per task - int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask); - - for (int t=0;tm_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes); - t = endIndex; - } - } -/* - - void* bla = 0; - - scheduler->issueTask(bla,2,3); - scheduler->flush(); - - */ - - return 0; -} - -CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel , - cl_uint arg_index , - size_t arg_size , - const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 -{ - MiniCLKernel* kernel = (MiniCLKernel* ) clKernel; - assert(arg_size < MINICL_MAX_ARGLENGTH); - if (arg_index>MINI_CL_MAX_ARG) - { - printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG); - } else - { - if (arg_size>=MINICL_MAX_ARGLENGTH) - { - printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH); - } else - { - memcpy( kernel->m_argData[arg_index],arg_value,arg_size); - kernel->m_argSizes[arg_index] = arg_size; - } - } - return 0; -} - -// Kernel Object APIs -CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program , - const char * kernel_name , - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 -{ - MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program; - MiniCLKernel* kernel = new MiniCLKernel(); - - kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name); - kernel->m_scheduler = scheduler; - - return (cl_kernel)kernel; - -} - - -CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void (*pfn_notify)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0 -{ - return 0; -} - -CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context , - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 -{ - return (cl_program)context; -} - - -// Memory Object APIs -CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, - cl_mem_flags flags , - size_t size, - void * host_ptr , - cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 -{ - cl_mem buf = (cl_mem)malloc(size); - if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr) - { - memcpy(buf,host_ptr,size); - } - return buf; -} - -// Command Queue APIs -CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context , - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 -{ - return (cl_command_queue) context; -} - -extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, - cl_context_info param_name , - size_t param_value_size , - void * param_value, - size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 -{ - - switch (param_name) - { - case CL_CONTEXT_DEVICES: - { - if (!param_value_size) - { - *param_value_size_ret = 13; - } else - { - sprintf((char*)param_value,"MiniCL_Test."); - } - break; - }; - default: - { - printf("unsupported\n"); - } - } - - return 0; -} - -CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 -{ - int maxNumOutstandingTasks = 4; - -#ifdef WIN32 - Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( - "MiniCL", - processMiniCLTask, //processCollisionTask, - createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory, - maxNumOutstandingTasks)); -#else - SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); - SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); - -#endif - - - MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); - - return (cl_context)scheduler; -} - -CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0 -{ - - MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context; - - btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface(); - delete scheduler; - delete threadSupport; - - return 0; -} +/* + Copyright (C) 2010 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#include "MiniCL/cl.h" +#define __PHYSICS_COMMON_H__ 1 +#ifdef _WIN32 +#include "BulletMultiThreaded/Win32ThreadSupport.h" +#endif + +#include "BulletMultiThreaded/SequentialThreadSupport.h" +#include "MiniCLTaskScheduler.h" +#include "MiniCLTask/MiniCLTask.h" +#include "LinearMath/btMinMax.h" + +//#define DEBUG_MINICL_KERNELS 1 + + + + +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( + cl_device_id device , + cl_device_info param_name , + size_t param_value_size , + void * param_value , + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_DEVICE_NAME: + { + char deviceName[] = "CPU"; + unsigned int nameLen = strlen(deviceName)+1; + assert(param_value_size>strlen(deviceName)); + if (nameLen < param_value_size) + { + sprintf_s((char*)param_value,param_value_size, "CPU"); + } else + { + printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); + } + break; + } + case CL_DEVICE_TYPE: + { + if (param_value_size>=sizeof(cl_device_type)) + { + cl_device_type* deviceType = (cl_device_type*)param_value; + *deviceType = CL_DEVICE_TYPE_CPU; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); + } + break; + } + case CL_DEVICE_MAX_COMPUTE_UNITS: + { + if (param_value_size>=sizeof(cl_uint)) + { + cl_uint* numUnits = (cl_uint*)param_value; + *numUnits= 4; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + + break; + } + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + { + size_t workitem_size[3]; + + if (param_value_size>=sizeof(workitem_size)) + { + size_t* workItemSize = (size_t*)param_value; + workItemSize[0] = 64; + workItemSize[1] = 24; + workItemSize[2] = 16; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + break; + } + case CL_DEVICE_MAX_CLOCK_FREQUENCY: + { + cl_uint* clock_frequency = (cl_uint*)param_value; + *clock_frequency = 3*1024; + break; + } + default: + { + printf("error: unsupported param_name:%d\n",param_name); + } + } + + + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + + +CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + +// Enqueued Commands APIs +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue , + cl_mem buffer , + cl_bool /* blocking_read */, + size_t offset , + size_t cb , + void * ptr , + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + + ///wait for all work items to be completed + scheduler->flush(); + + memcpy(ptr,(char*)buffer + offset,cb); + return 0; +} + + +CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + + return 0; +} + + +// Program Object APIs +CL_API_ENTRY cl_program +clCreateProgramWithSource(cl_context context , + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + *errcode_ret = CL_SUCCESS; + return (cl_program)context; +} + +CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue , + cl_mem buffer , + cl_bool /* blocking_read */, + size_t offset, + size_t cb , + const void * ptr , + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + + ///wait for all work items to be completed + scheduler->flush(); + + memcpy((char*)buffer + offset, ptr,cb); + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue) +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + ///wait for all work items to be completed + scheduler->flush(); + return 0; +} + + +CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel clKernel , + cl_uint work_dim , + const size_t * /* global_work_offset */, + const size_t * global_work_size , + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + + + MiniCLKernel* kernel = (MiniCLKernel*) clKernel; + for (unsigned int ii=0;iim_scheduler->getMaxNumOutstandingTasks(); + int numWorkItems = global_work_size[ii]; + + //at minimum 64 work items per task + int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask); + + for (int t=0;tm_scheduler->issueTask(t, endIndex, kernel); + t = endIndex; + } + } +/* + + void* bla = 0; + + scheduler->issueTask(bla,2,3); + scheduler->flush(); + + */ + + return 0; +} + +#define LOCAL_BUF_SIZE 32768 +static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16]; +static int* spLocalBufCurr = NULL; +static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call +static void* localBufMalloc(int size) +{ + int size16 = (size + 15) >> 4; // in 16-byte units + if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE) + { // reset + spLocalBufCurr = sLocalMemBuf; + while((int)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes + sLocalBufUsed = 0; + } + void* ret = spLocalBufCurr; + spLocalBufCurr += size16 * 4; + sLocalBufUsed += size; + return ret; +} + + + +CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel , + cl_uint arg_index , + size_t arg_size , + const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLKernel* kernel = (MiniCLKernel* ) clKernel; + btAssert(arg_size <= MINICL_MAX_ARGLENGTH); + if (arg_index>MINI_CL_MAX_ARG) + { + printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG); + } else + { +// if (arg_size>=MINICL_MAX_ARGLENGTH) + if (arg_size != MINICL_MAX_ARGLENGTH) + { + printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH); + } + else + { + if(arg_value == NULL) + { // this is only for __local memory qualifier + void* ptr = localBufMalloc(arg_size); + kernel->m_argData[arg_index] = ptr; + } + else + { + memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size); + } + kernel->m_argSizes[arg_index] = arg_size; + if(arg_index >= kernel->m_numArgs) + { + kernel->m_numArgs = arg_index + 1; + kernel->updateLauncher(); + } + } + } + return 0; +} + +// Kernel Object APIs +CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program , + const char * kernel_name , + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program; + MiniCLKernel* kernel = new MiniCLKernel(); + int nameLen = strlen(kernel_name); + if(nameLen >= MINI_CL_MAX_KERNEL_NAME) + { + *errcode_ret = CL_INVALID_KERNEL_NAME; + return NULL; + } + strcpy_s(kernel->m_name, kernel_name); + kernel->m_numArgs = 0; + + //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name); + //if (kernel->m_kernelProgramCommandId>=0) + //{ + // *errcode_ret = CL_SUCCESS; + //} else + //{ + // *errcode_ret = CL_INVALID_KERNEL_NAME; + //} + kernel->m_scheduler = scheduler; + if(kernel->registerSelf() == NULL) + { + *errcode_ret = CL_INVALID_KERNEL_NAME; + return NULL; + } + else + { + *errcode_ret = CL_SUCCESS; + } + + return (cl_kernel)kernel; + +} + + +CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (*pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0 +{ + return CL_SUCCESS; +} + +CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context , + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + return (cl_program)context; +} + + +// Memory Object APIs +CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, + cl_mem_flags flags , + size_t size, + void * host_ptr , + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + cl_mem buf = (cl_mem)malloc(size); + if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr) + { + memcpy(buf,host_ptr,size); + } + *errcode_ret = 0; + return buf; +} + +// Command Queue APIs +CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context , + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + *errcode_ret = 0; + return (cl_command_queue) context; +} + +extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, + cl_context_info param_name , + size_t param_value_size , + void * param_value, + size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_CONTEXT_DEVICES: + { + if (!param_value_size) + { + *param_value_size_ret = 13; + } else + { + sprintf_s((char*)param_value, param_value_size, "MiniCL_Test."); + } + break; + }; + default: + { + printf("unsupported\n"); + } + } + + return 0; +} + +CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + int maxNumOutstandingTasks = 4; +// int maxNumOutstandingTasks = 1; + gMiniCLNumOutstandingTasks = maxNumOutstandingTasks; + const int maxNumOfThreadSupports = 8; + static int sUniqueThreadSupportIndex = 0; + static char* sUniqueThreadSupportName[maxNumOfThreadSupports] = + { + "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" + }; + +#ifdef DEBUG_MINICL_KERNELS + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); +#else + +#if _WIN32 + btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports); + Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( +// "MiniCL", + sUniqueThreadSupportName[sUniqueThreadSupportIndex++], + processMiniCLTask, //processCollisionTask, + createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory, + maxNumOutstandingTasks)); +#else + ///todo: add posix thread support for other platforms + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); +#endif + +#endif //DEBUG_MINICL_KERNELS + + + MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); + + *errcode_ret = 0; + return (cl_context)scheduler; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0 +{ + + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context; + + btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface(); + delete scheduler; + delete threadSupport; + + return 0; +} +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 +{ + return CL_SUCCESS; +} + + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel , + cl_device_id /* device */, + cl_kernel_work_group_info wgi/* param_name */, + size_t sz /* param_value_size */, + void * ptr /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + if((wgi == CL_KERNEL_WORK_GROUP_SIZE) + &&(sz == sizeof(int)) + &&(ptr != NULL)) + { + MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel; + MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler; + *((int*)ptr) = scheduler->getMaxNumOutstandingTasks(); + return CL_SUCCESS; + } + else + { + return CL_INVALID_VALUE; + } +} diff --git a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp index e7f424b1e..0d4799285 100644 --- a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp +++ b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp @@ -15,10 +15,12 @@ subject to the following restrictions: #include "MiniCLTask.h" -#include "../PlatformDefinitions.h" -#include "../SpuFakeDma.h" +#include "BulletMultiThreaded/PlatformDefinitions.h" +#include "BulletMultiThreaded/SpuFakeDma.h" #include "LinearMath/btMinMax.h" -#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h" +#include "MiniCLTask.h" +#include "BulletMultiThreaded/MiniCLTaskScheduler.h" + #ifdef __SPU__ #include @@ -27,9 +29,7 @@ subject to the following restrictions: #define spu_printf printf #endif -#define __kernel -#define __global -#define get_global_id(a) guid +int gMiniCLNumOutstandingTasks = 0; struct MiniCLTask_LocalStoreMemory { @@ -37,65 +37,23 @@ struct MiniCLTask_LocalStoreMemory }; -/////////////////////////////////////////////////// -// OpenCL Kernel Function for element by element vector addition -__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid) -{ - // get oct-float index into global data array - int iGID = get_global_id(0); - - // read inputs into registers - float8 f8InA = a[iGID]; - float8 f8InB = b[iGID]; - float8 f8Out = (float8)0.0f; - - // add the vector elements - f8Out.s0 = f8InA.s0 + f8InB.s0; - f8Out.s1 = f8InA.s1 + f8InB.s1; - f8Out.s2 = f8InA.s2 + f8InB.s2; - f8Out.s3 = f8InA.s3 + f8InB.s3; - f8Out.s4 = f8InA.s4 + f8InB.s4; - f8Out.s5 = f8InA.s5 + f8InB.s5; - f8Out.s6 = f8InA.s6 + f8InB.s6; - f8Out.s7 = f8InA.s7 + f8InB.s7; - - // write back out to GMEM - c[get_global_id(0)] = f8Out; -} -/////////////////////////////////////////////////// - - //-- MAIN METHOD void processMiniCLTask(void* userPtr, void* lsMemory) { // BT_PROFILE("processSampleTask"); - //MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory; + MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory; MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr; MiniCLTaskDesc& taskDesc = *taskDescPtr; - printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit); - - - switch (taskDesc.m_kernelProgramId) + for (unsigned int i=taskDesc.m_firstWorkUnit;im_launcher(&taskDesc, i); + } +// printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit); + } diff --git a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h index a8c889ad7..ca09d177a 100644 --- a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h +++ b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h @@ -16,39 +16,17 @@ subject to the following restrictions: #ifndef MINICL__TASK_H #define MINICL__TASK_H -#include "../PlatformDefinitions.h" +#include "BulletMultiThreaded/PlatformDefinitions.h" #include "LinearMath/btScalar.h" #include "LinearMath/btAlignedAllocator.h" -enum -{ - CMD_MINICL_1= 1, - CMD_MINICL_ADDVECTOR -}; +#define MINICL_MAX_ARGLENGTH (sizeof(void*)) +#define MINI_CL_MAX_ARG 16 +#define MINI_CL_MAX_KERNEL_NAME 256 - - -struct float8 -{ - float s0; - float s1; - float s2; - float s3; - float s4; - float s5; - float s6; - float s7; - - float8(float scalar) - { - s0=s1=s2=s3=s4=s5=s6=s7=scalar; - } -}; - -#define MINICL_MAX_ARGLENGTH 128 -#define MINI_CL_MAX_ARG 8 +struct MiniCLKernel; ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc { @@ -62,16 +40,19 @@ ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc } } - uint32_t m_taskId; + uint32_t m_taskId; - uint32_t m_kernelProgramId; - uint32_t m_firstWorkUnit; - uint32_t m_lastWorkUnit; + uint32_t m_firstWorkUnit; + uint32_t m_lastWorkUnit; - char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH]; - int m_argSizes[MINI_CL_MAX_ARG]; + MiniCLKernel* m_kernel; + + void* m_argData[MINI_CL_MAX_ARG]; + int m_argSizes[MINI_CL_MAX_ARG]; }; +extern "C" int gMiniCLNumOutstandingTasks; + void processMiniCLTask(void* userPtr, void* lsMemory); void* createMiniCLLocalStoreMemory(); diff --git a/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp b/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp index 5543111e4..d1cb5bda2 100644 --- a/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp +++ b/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp @@ -14,6 +14,7 @@ subject to the following restrictions: */ //#define __CELLOS_LV2__ 1 +#define __BT_SKIP_UINT64_H 1 #define USE_SAMPLE_PROCESS 1 #ifdef USE_SAMPLE_PROCESS @@ -43,21 +44,18 @@ void* SamplelsMemoryFunc() #else -#include "btThreadSupportInterface.h" +#include "BulletMultiThreaded/btThreadSupportInterface.h" //# include "SPUAssert.h" #include - +#include "MiniCL/cl_platform.h" extern "C" { extern char SPU_SAMPLE_ELF_SYMBOL[]; } - - - MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks) :m_threadInterface(threadInterface), m_maxNumOutstandingTasks(maxNumOutstandingTasks) @@ -66,6 +64,8 @@ m_maxNumOutstandingTasks(maxNumOutstandingTasks) m_taskBusy.resize(m_maxNumOutstandingTasks); m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks); + m_kernels.resize(0); + for (int i = 0; i < m_maxNumOutstandingTasks; i++) { m_taskBusy[i] = false; @@ -105,7 +105,7 @@ void MiniCLTaskScheduler::initialize() } -void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes) +void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel) { #ifdef DEBUG_SPU_TASK_SCHEDULING @@ -120,16 +120,18 @@ void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kern // send task description in event message taskDesc.m_firstWorkUnit = firstWorkUnit; taskDesc.m_lastWorkUnit = lastWorkUnit; - taskDesc.m_kernelProgramId = kernelProgramId; + taskDesc.m_kernel = kernel; //some bookkeeping to recognize finished tasks taskDesc.m_taskId = m_currentTask; - for (int i=0;im_numArgs; i++) { - taskDesc.m_argSizes[i] = argSizes[i]; + taskDesc.m_argSizes[i] = kernel->m_argSizes[i]; if (taskDesc.m_argSizes[i]) { - memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]); + taskDesc.m_argData[i] = kernel->m_argData[i]; +// memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]); } } } @@ -221,6 +223,296 @@ void MiniCLTaskScheduler::flush() } + + +typedef void (*MiniCLKernelLauncher0)(int); +typedef void (*MiniCLKernelLauncher1)(void*, int); +typedef void (*MiniCLKernelLauncher2)(void*, void*, int); +typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); + + +static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid); +} +static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + guid); +} +static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + guid); +} +static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + guid); +} +static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + guid); +} +static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + guid); +} +static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + guid); +} +static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + guid); +} +static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + guid); +} +static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + guid); +} +static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + guid); +} +static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + guid); +} +static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + guid); +} +static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + guid); +} +static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + guid); +} +static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + taskDesc->m_argData[14], + guid); +} +static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + taskDesc->m_argData[14], + taskDesc->m_argData[15], + guid); +} + +static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] = +{ + kernelLauncher0, + kernelLauncher1, + kernelLauncher2, + kernelLauncher3, + kernelLauncher4, + kernelLauncher5, + kernelLauncher6, + kernelLauncher7, + kernelLauncher8, + kernelLauncher9, + kernelLauncher10, + kernelLauncher11, + kernelLauncher12, + kernelLauncher13, + kernelLauncher14, + kernelLauncher15, + kernelLauncher16 +}; + +void MiniCLKernel::updateLauncher() +{ + m_launcher = spLauncherList[m_numArgs]; +} + +struct MiniCLKernelDescEntry +{ + void* pCode; + char* pName; +}; +static MiniCLKernelDescEntry spKernelDesc[256]; +static int sNumKernelDesc = 0; + +MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName) +{ + for(int i = 0; i < sNumKernelDesc; i++) + { + if(!strcmp(pName, spKernelDesc[i].pName)) + { // already registered + btAssert(spKernelDesc[i].pCode == pCode); + return; + } + } + spKernelDesc[sNumKernelDesc].pCode = pCode; + spKernelDesc[sNumKernelDesc].pName = pName; + sNumKernelDesc++; +} + + +MiniCLKernel* MiniCLKernel::registerSelf() +{ + m_scheduler->registerKernel(this); + for(int i = 0; i < sNumKernelDesc; i++) + { + if(!strcmp(m_name, spKernelDesc[i].pName)) + { + m_pCode = spKernelDesc[i].pCode; + return this; + } + } + return NULL; +} + #endif diff --git a/src/BulletMultiThreaded/MiniCLTaskScheduler.h b/src/BulletMultiThreaded/MiniCLTaskScheduler.h index 580b509b8..3061a7134 100644 --- a/src/BulletMultiThreaded/MiniCLTaskScheduler.h +++ b/src/BulletMultiThreaded/MiniCLTaskScheduler.h @@ -21,7 +21,7 @@ subject to the following restrictions: #include -#include "PlatformDefinitions.h" +#include "BulletMultiThreaded/PlatformDefinitions.h" #include @@ -30,11 +30,10 @@ subject to the following restrictions: #include "MiniCLTask/MiniCLTask.h" - //just add your commands here, try to keep them globally unique for debugging purposes #define CMD_SAMPLE_TASK_COMMAND 10 - +struct MiniCLKernel; /// MiniCLTaskScheduler handles SPU processing of collision pairs. /// When PPU issues a task, it will look for completed task buffers @@ -44,7 +43,11 @@ class MiniCLTaskScheduler // track task buffers that are being used, and total busy tasks btAlignedObjectArray m_taskBusy; btAlignedObjectArray m_spuSampleTaskDesc; - + + + btAlignedObjectArray m_kernels; + + int m_numBusyTasks; // the current task and the current entry to insert a new work unit @@ -68,7 +71,7 @@ public: ///call initialize in the beginning of the frame, before addCollisionPairToTask void initialize(); - void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes); + void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel); ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished void flush(); @@ -78,25 +81,35 @@ public: return m_threadInterface; } - int findProgramCommandIdByName(const char* programName) const - { - return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support - } + int findProgramCommandIdByName(const char* programName) const; int getMaxNumOutstandingTasks() const { return m_maxNumOutstandingTasks; } + + void registerKernel(MiniCLKernel* kernel) + { + m_kernels.push_back(kernel); + } }; +typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid); struct MiniCLKernel { MiniCLTaskScheduler* m_scheduler; - int m_kernelProgramCommandId; +// int m_kernelProgramCommandId; - char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH]; + char m_name[MINI_CL_MAX_KERNEL_NAME]; + unsigned int m_numArgs; + kernelLauncherCB m_launcher; + void* m_pCode; + void updateLauncher(); + MiniCLKernel* registerSelf(); + + void* m_argData[MINI_CL_MAX_ARG]; int m_argSizes[MINI_CL_MAX_ARG]; }; diff --git a/src/BulletMultiThreaded/PlatformDefinitions.h b/src/BulletMultiThreaded/PlatformDefinitions.h index c9ae7b359..16362f4bc 100644 --- a/src/BulletMultiThreaded/PlatformDefinitions.h +++ b/src/BulletMultiThreaded/PlatformDefinitions.h @@ -19,7 +19,9 @@ typedef union typedef unsigned char uint8_t; #ifndef __PHYSICS_COMMON_H__ +#ifndef __BT_SKIP_UINT64_H typedef unsigned long int uint64_t; +#endif //__BT_SKIP_UINT64_H typedef unsigned int uint32_t; #endif //__PHYSICS_COMMON_H__ typedef unsigned short uint16_t; diff --git a/src/BulletMultiThreaded/PpuAddressSpace.h b/src/BulletMultiThreaded/PpuAddressSpace.h index b96b84d63..f36fdfb3c 100644 --- a/src/BulletMultiThreaded/PpuAddressSpace.h +++ b/src/BulletMultiThreaded/PpuAddressSpace.h @@ -2,11 +2,11 @@ #define __PPU_ADDRESS_SPACE_H -#ifdef WIN32 +#ifdef _WIN32 //stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions #pragma warning (disable: 4311) #pragma warning (disable: 4312) -#endif //WIN32 +#endif //_WIN32 #if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64) typedef uint64_t ppu_address_t; diff --git a/src/MiniCL/cl_MiniCL_Defs.h b/src/MiniCL/cl_MiniCL_Defs.h new file mode 100644 index 000000000..437676cd0 --- /dev/null +++ b/src/MiniCL/cl_MiniCL_Defs.h @@ -0,0 +1,263 @@ +/* +Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#include +#include +#include "LinearMath/btScalar.h" + +#include "MiniCL/cl.h" + + +#define __kernel +#define __global +#define __local +#define get_global_id(a) __guid_arg +#define get_local_id(a) ((__guid_arg) % gMiniCLNumOutstandingTasks) +#define get_local_size(a) (gMiniCLNumOutstandingTasks) +#define get_group_id(a) ((__guid_arg) / gMiniCLNumOutstandingTasks) + +#define CLK_LOCAL_MEM_FENCE 0x01 +#define CLK_GLOBAL_MEM_FENCE 0x02 + +static void barrier(unsigned int a) +{ + // TODO : implement +} + +ATTRIBUTE_ALIGNED16(struct) float8 +{ + float s0; + float s1; + float s2; + float s3; + float s4; + float s5; + float s6; + float s7; + + float8(float scalar) + { + s0=s1=s2=s3=s4=s5=s6=s7=scalar; + } +}; + +ATTRIBUTE_ALIGNED16(struct) float4 +{ + float x,y,z,w; + float4() {} + float4(float v) + { + x = y = z = w = v; + } + float4 operator*(const float4& other) + { + float4 tmp; + tmp.x = x*other.x; + tmp.y = y*other.y; + tmp.z = z*other.z; + tmp.w = w*other.w; + return tmp; + } + + float4 operator*(const float& other) + { + float4 tmp; + tmp.x = x*other; + tmp.y = y*other; + tmp.z = z*other; + tmp.w = w*other; + return tmp; + } + + + + float4& operator+=(const float4& other) + { + x += other.x; + y += other.y; + z += other.z; + w += other.w; + return *this; + } + + float4& operator-=(const float4& other) + { + x -= other.x; + y -= other.y; + z -= other.z; + w -= other.w; + return *this; + } + + float4& operator *=(float scalar) + { + x *= scalar; + y *= scalar; + z *= scalar; + w *= scalar; + return (*this); + } + + + + + +}; + +static float4 fabs(const float4& a) +{ + float4 tmp; + tmp.x = a.x < 0.f ? 0.f : a.x; + tmp.y = a.y < 0.f ? 0.f : a.y; + tmp.z = a.z < 0.f ? 0.f : a.z; + tmp.w = a.w < 0.f ? 0.f : a.w; + return tmp; +} +static float4 operator+(const float4& a,const float4& b) +{ + float4 tmp; + tmp.x = a.x + b.x; + tmp.y = a.y + b.y; + tmp.z = a.z + b.z; + tmp.w = a.w + b.w; + return tmp; +} + +static float4 operator-(const float4& a,const float4& b) +{ + float4 tmp; + tmp.x = a.x - b.x; + tmp.y = a.y - b.y; + tmp.z = a.z - b.z; + tmp.w = a.w - b.w; + return tmp; +} +static float4 operator*(float a,const float4& b) +{ + float4 tmp; + tmp.x = a * b.x; + tmp.y = a * b.y; + tmp.z = a * b.z; + tmp.w = a * b.w; + return tmp; +} + + +static float dot(const float4&a ,const float4& b) +{ + float4 tmp; + tmp.x = a.x*b.x; + tmp.y = a.y*b.y; + tmp.z = a.z*b.z; + tmp.w = a.w*b.w; + return tmp.x+tmp.y+tmp.z+tmp.w; +} + +static float4 cross(const float4&a ,const float4& b) +{ + float4 tmp; + tmp.x = a.y*b.z - a.z*b.y; + tmp.y = -a.x*b.z + a.z*b.x; + tmp.z = a.x*b.y - a.y*b.x; + tmp.w = 0.f; + return tmp; +} + +static float max(float a, float b) +{ + return (a >= b) ? a : b; +} + + +static float min(float a, float b) +{ + return (a <= b) ? a : b; +} + +static float fmax(float a, float b) +{ + return (a >= b) ? a : b; +} + +static float fmin(float a, float b) +{ + return (a <= b) ? a : b; +} + +struct int2 +{ + int x,y; +}; + +struct uint2 +{ + unsigned int x,y; +}; + +//typedef int2 uint2; + +typedef unsigned int uint; + +struct int4 +{ + int x,y,z,w; +}; + +struct uint4 +{ + unsigned int x,y,z,w; + uint4() {} + uint4(uint val) { x = y = z = w = val; } + uint4& operator+=(const uint4& other) + { + x += other.x; + y += other.y; + z += other.z; + w += other.w; + return *this; + } +}; +static uint4 operator+(const uint4& a,const uint4& b) +{ + uint4 tmp; + tmp.x = a.x + b.x; + tmp.y = a.y + b.y; + tmp.z = a.z + b.z; + tmp.w = a.w + b.w; + return tmp; +} +static uint4 operator-(const uint4& a,const uint4& b) +{ + uint4 tmp; + tmp.x = a.x - b.x; + tmp.y = a.y - b.y; + tmp.z = a.z - b.z; + tmp.w = a.w - b.w; + return tmp; +} + +#define native_sqrt sqrtf +#define native_sin sinf +#define native_cos cosf +#define native_powr powf + +#define GUID_ARG ,int __guid_arg +#define GUID_ARG_VAL ,__guid_arg + + +#define as_int(a) (*((int*)&(a))) + +extern "C" int gMiniCLNumOutstandingTasks; +// extern "C" void __kernel_func(); \ diff --git a/src/MiniCL/cl_platform.h b/src/MiniCL/cl_platform.h index 572a1ec86..c323e2448 100644 --- a/src/MiniCL/cl_platform.h +++ b/src/MiniCL/cl_platform.h @@ -1,244 +1,254 @@ -/********************************************************************************** - * Copyright (c) 2008-2009 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - **********************************************************************************/ - -#ifndef __CL_PLATFORM_H -#define __CL_PLATFORM_H - -#ifdef __APPLE__ - /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ - #include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define CL_API_ENTRY -#define CL_API_CALL -#ifdef __APPLE__ -#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER -#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) -#else -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXTENSION_WEAK_LINK -#endif - -#ifdef _WIN32 -typedef signed __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef signed __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef signed __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; - -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short ; -typedef uint16_t cl_ushort ; -typedef int32_t cl_int ; -typedef uint32_t cl_uint ; -typedef int64_t cl_long ; -typedef uint64_t cl_ulong ; - -typedef uint16_t cl_half ; -typedef float cl_float ; -typedef double cl_double ; - - -typedef int8_t cl_char2[2] ; -typedef int8_t cl_char4[4] ; -typedef int8_t cl_char8[8] ; -typedef int8_t cl_char16[16] ; -typedef uint8_t cl_uchar2[2] ; -typedef uint8_t cl_uchar4[4] ; -typedef uint8_t cl_uchar8[8] ; -typedef uint8_t cl_uchar16[16] ; - -typedef int16_t cl_short2[2] ; -typedef int16_t cl_short4[4] ; -typedef int16_t cl_short8[8] ; -typedef int16_t cl_short16[16] ; -typedef uint16_t cl_ushort2[2] ; -typedef uint16_t cl_ushort4[4] ; -typedef uint16_t cl_ushort8[8] ; -typedef uint16_t cl_ushort16[16] ; - -typedef int32_t cl_int2[2] ; -typedef int32_t cl_int4[4] ; -typedef int32_t cl_int8[8] ; -typedef int32_t cl_int16[16] ; -typedef uint32_t cl_uint2[2] ; -typedef uint32_t cl_uint4[4] ; -typedef uint32_t cl_uint8[8] ; -typedef uint32_t cl_uint16[16] ; - -typedef int64_t cl_long2[2] ; -typedef int64_t cl_long4[4] ; -typedef int64_t cl_long8[8] ; -typedef int64_t cl_long16[16] ; -typedef uint64_t cl_ulong2[2] ; -typedef uint64_t cl_ulong4[4] ; -typedef uint64_t cl_ulong8[8] ; -typedef uint64_t cl_ulong16[16] ; - -typedef float cl_float2[2] ; -typedef float cl_float4[4] ; -typedef float cl_float8[8] ; -typedef float cl_float16[16] ; - -typedef double cl_double2[2] ; -typedef double cl_double4[4] ; -typedef double cl_double8[8] ; -typedef double cl_double16[16] ; - - -#else -#include - -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short __attribute__((aligned(2))); -typedef uint16_t cl_ushort __attribute__((aligned(2))); -typedef int32_t cl_int __attribute__((aligned(4))); -typedef uint32_t cl_uint __attribute__((aligned(4))); -typedef int64_t cl_long __attribute__((aligned(8))); -typedef uint64_t cl_ulong __attribute__((aligned(8))); - -typedef uint16_t cl_half __attribute__((aligned(2))); -typedef float cl_float __attribute__((aligned(4))); -typedef double cl_double __attribute__((aligned(8))); - - -/* - * Vector types - * - * Note: OpenCL requires that all types be naturally aligned. - * This means that vector types must be naturally aligned. - * For example, a vector of four floats must be aligned to - * a 16 byte boundary (calculated as 4 * the natural 4-byte - * alignment of the float). The alignment qualifiers here - * will only function properly if your compiler supports them - * and if you don't actively work to defeat them. For example, - * in order for a cl_float4 to be 16 byte aligned in a struct, - * the start of the struct must itself be 16-byte aligned. - * - * Maintaining proper alignment is the user's responsibility. - */ -typedef int8_t cl_char2[2] __attribute__((aligned(2))); -typedef int8_t cl_char4[4] __attribute__((aligned(4))); -typedef int8_t cl_char8[8] __attribute__((aligned(8))); -typedef int8_t cl_char16[16] __attribute__((aligned(16))); -typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); -typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); -typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); -typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); - -typedef int16_t cl_short2[2] __attribute__((aligned(4))); -typedef int16_t cl_short4[4] __attribute__((aligned(8))); -typedef int16_t cl_short8[8] __attribute__((aligned(16))); -typedef int16_t cl_short16[16] __attribute__((aligned(32))); -typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); -typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); -typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); -typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); - -typedef int32_t cl_int2[2] __attribute__((aligned(8))); -typedef int32_t cl_int4[4] __attribute__((aligned(16))); -typedef int32_t cl_int8[8] __attribute__((aligned(32))); -typedef int32_t cl_int16[16] __attribute__((aligned(64))); -typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); -typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); -typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); -typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); - -typedef int64_t cl_long2[2] __attribute__((aligned(16))); -typedef int64_t cl_long4[4] __attribute__((aligned(32))); -typedef int64_t cl_long8[8] __attribute__((aligned(64))); -typedef int64_t cl_long16[16] __attribute__((aligned(128))); -typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); -typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); -typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); -typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); - -typedef float cl_float2[2] __attribute__((aligned(8))); -typedef float cl_float4[4] __attribute__((aligned(16))); -typedef float cl_float8[8] __attribute__((aligned(32))); -typedef float cl_float16[16] __attribute__((aligned(64))); - -typedef double cl_double2[2] __attribute__((aligned(16))); -typedef double cl_double4[4] __attribute__((aligned(32))); -typedef double cl_double8[8] __attribute__((aligned(64))); -typedef double cl_double16[16] __attribute__((aligned(128))); -#endif - -#include - -/* and a few goodies to go with them */ -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127-1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767-1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647-1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#define CL_FLT_MAX 0x1.fffffep127f -#define CL_FLT_MIN 0x1.0p-126f -#define CL_FLT_EPSILON 0x1.0p-23f - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#define CL_DBL_MAX 0x1.fffffffffffffp1023 -#define CL_DBL_MIN 0x1.0p-1022 -#define CL_DBL_EPSILON 0x1.0p-52 - -/* There are no vector types for half */ - -#ifdef __cplusplus -} -#endif - -#endif // __CL_PLATFORM_H +/********************************************************************************** + * Copyright (c) 2008-2009 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#define CL_PLATFORM_MINI_CL 0x12345 + +struct MiniCLKernelDesc +{ + MiniCLKernelDesc(void* pCode, char* pName); +}; + +#define MINICL_REGISTER(__kernel_func) static MiniCLKernelDesc __kernel_func##Desc(__kernel_func, #__kernel_func); + + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define CL_API_ENTRY +#define CL_API_CALL +#ifdef __APPLE__ +#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) +#else +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXTENSION_WEAK_LINK +#endif + +#ifdef _WIN32 +typedef signed __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef signed __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef signed __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short ; +typedef uint16_t cl_ushort ; +typedef int32_t cl_int ; +typedef uint32_t cl_uint ; +typedef int64_t cl_long ; +typedef uint64_t cl_ulong ; + +typedef uint16_t cl_half ; +typedef float cl_float ; +typedef double cl_double ; + + +typedef int8_t cl_char2[2] ; +typedef int8_t cl_char4[4] ; +typedef int8_t cl_char8[8] ; +typedef int8_t cl_char16[16] ; +typedef uint8_t cl_uchar2[2] ; +typedef uint8_t cl_uchar4[4] ; +typedef uint8_t cl_uchar8[8] ; +typedef uint8_t cl_uchar16[16] ; + +typedef int16_t cl_short2[2] ; +typedef int16_t cl_short4[4] ; +typedef int16_t cl_short8[8] ; +typedef int16_t cl_short16[16] ; +typedef uint16_t cl_ushort2[2] ; +typedef uint16_t cl_ushort4[4] ; +typedef uint16_t cl_ushort8[8] ; +typedef uint16_t cl_ushort16[16] ; + +typedef int32_t cl_int2[2] ; +typedef int32_t cl_int4[4] ; +typedef int32_t cl_int8[8] ; +typedef int32_t cl_int16[16] ; +typedef uint32_t cl_uint2[2] ; +typedef uint32_t cl_uint4[4] ; +typedef uint32_t cl_uint8[8] ; +typedef uint32_t cl_uint16[16] ; + +typedef int64_t cl_long2[2] ; +typedef int64_t cl_long4[4] ; +typedef int64_t cl_long8[8] ; +typedef int64_t cl_long16[16] ; +typedef uint64_t cl_ulong2[2] ; +typedef uint64_t cl_ulong4[4] ; +typedef uint64_t cl_ulong8[8] ; +typedef uint64_t cl_ulong16[16] ; + +typedef float cl_float2[2] ; +typedef float cl_float4[4] ; +typedef float cl_float8[8] ; +typedef float cl_float16[16] ; + +typedef double cl_double2[2] ; +typedef double cl_double4[4] ; +typedef double cl_double8[8] ; +typedef double cl_double16[16] ; + + +#else +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ +typedef int8_t cl_char2[2] __attribute__((aligned(2))); +typedef int8_t cl_char4[4] __attribute__((aligned(4))); +typedef int8_t cl_char8[8] __attribute__((aligned(8))); +typedef int8_t cl_char16[16] __attribute__((aligned(16))); +typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); +typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); +typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); +typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); + +typedef int16_t cl_short2[2] __attribute__((aligned(4))); +typedef int16_t cl_short4[4] __attribute__((aligned(8))); +typedef int16_t cl_short8[8] __attribute__((aligned(16))); +typedef int16_t cl_short16[16] __attribute__((aligned(32))); +typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); +typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); +typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); +typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); + +typedef int32_t cl_int2[2] __attribute__((aligned(8))); +typedef int32_t cl_int4[4] __attribute__((aligned(16))); +typedef int32_t cl_int8[8] __attribute__((aligned(32))); +typedef int32_t cl_int16[16] __attribute__((aligned(64))); +typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); +typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); +typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); +typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); + +typedef int64_t cl_long2[2] __attribute__((aligned(16))); +typedef int64_t cl_long4[4] __attribute__((aligned(32))); +typedef int64_t cl_long8[8] __attribute__((aligned(64))); +typedef int64_t cl_long16[16] __attribute__((aligned(128))); +typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); +typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); +typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); +typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); + +typedef float cl_float2[2] __attribute__((aligned(8))); +typedef float cl_float4[4] __attribute__((aligned(16))); +typedef float cl_float8[8] __attribute__((aligned(32))); +typedef float cl_float16[16] __attribute__((aligned(64))); + +typedef double cl_double2[2] __attribute__((aligned(16))); +typedef double cl_double4[4] __attribute__((aligned(32))); +typedef double cl_double8[8] __attribute__((aligned(64))); +typedef double cl_double16[16] __attribute__((aligned(128))); +#endif + +#include + +/* and a few goodies to go with them */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +/* There are no vector types for half */ + +#ifdef __cplusplus +} +#endif + +#endif // __CL_PLATFORM_H