diff --git a/Demos/MiniCL_VectorAdd/CMakeLists.txt b/Demos/MiniCL_VectorAdd/CMakeLists.txt new file mode 100644 index 000000000..5951358b1 --- /dev/null +++ b/Demos/MiniCL_VectorAdd/CMakeLists.txt @@ -0,0 +1,16 @@ +# AppMiniCLVectorAdd is a very basic test for MiniCL. + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src } +) + +LINK_LIBRARIES( + BulletMultiThreaded LinearMath +) + +ADD_EXECUTABLE(AppMiniCLVectorAdd +MiniCL_VectorAdd.cpp +MiniCL.cpp +) + diff --git a/Demos/MiniCL_VectorAdd/MiniCL.cpp b/Demos/MiniCL_VectorAdd/MiniCL.cpp new file mode 100644 index 000000000..9f4595290 --- /dev/null +++ b/Demos/MiniCL_VectorAdd/MiniCL.cpp @@ -0,0 +1,346 @@ + +#include +#define __PHYSICS_COMMON_H__ 1 +#ifdef WIN32 +#include "BulletMultiThreaded/Win32ThreadSupport.h" +#else +#include "BulletMultiThreaded/SequentialThreadSupport.h" +#endif +#include "BulletMultiThreaded/MiniCLTaskScheduler.h" +#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h" +#include "LinearMath/btMinMax.h" + +/* + m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( + "collision", + processCollisionTask, + createCollisionLocalStoreMemory, + maxNumOutstandingTasks)); + + if (!m_spuCollisionTaskProcess) + m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks); + + m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa); + + m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex); + + //make sure all SPU work is done + m_spuCollisionTaskProcess->flush2(); + + +*/ + + + +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( + cl_device_id device , + cl_device_info param_name , + size_t param_value_size , + void * param_value , + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_DEVICE_NAME: + { + char deviceName[] = "CPU"; + int nameLen = strlen(deviceName)+1; + assert(param_value_size>strlen(deviceName)); + if (nameLen < param_value_size) + { + sprintf((char*)param_value,"CPU"); + } else + { + printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); + } + break; + } + case CL_DEVICE_TYPE: + { + if (param_value_size>=sizeof(cl_device_type)) + { + cl_device_type* deviceType = (cl_device_type*)param_value; + *deviceType = CL_DEVICE_TYPE_CPU; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); + } + break; + } + case CL_DEVICE_MAX_COMPUTE_UNITS: + { + if (param_value_size>=sizeof(cl_uint)) + { + cl_uint* numUnits = (cl_uint*)param_value; + *numUnits= 4; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + + break; + } + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + { + size_t workitem_size[3]; + + if (param_value_size>=sizeof(workitem_size)) + { + size_t* workItemSize = (size_t*)param_value; + workItemSize[0] = 64; + workItemSize[1] = 24; + workItemSize[2] = 16; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + break; + } + default: + { + printf("error: unsupported param_name:%d\n",param_name); + } + } + + + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + + +CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + +// Enqueued Commands APIs +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue , + cl_mem buffer , + cl_bool /* blocking_read */, + size_t /* offset */, + size_t cb , + void * ptr , + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + + ///wait for all work items to be completed + scheduler->flush(); + + memcpy(ptr,buffer,cb); + return 0; +} + + +CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel clKernel , + cl_uint work_dim , + const size_t * /* global_work_offset */, + const size_t * global_work_size , + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + + + MiniCLKernel* kernel = (MiniCLKernel*) clKernel; + for (int ii=0;iim_scheduler->getMaxNumOutstandingTasks(); + int numWorkItems = global_work_size[ii]; + + //at minimum 64 work items per task + int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask); + + for (int t=0;tm_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes); + t = endIndex; + } + } +/* + + void* bla = 0; + + scheduler->issueTask(bla,2,3); + scheduler->flush(); + + */ + + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel , + cl_uint arg_index , + size_t arg_size , + const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLKernel* kernel = (MiniCLKernel* ) clKernel; + assert(arg_size < MINICL_MAX_ARGLENGTH); + if (arg_index>MINI_CL_MAX_ARG) + { + printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG); + } else + { + if (arg_size>=MINICL_MAX_ARGLENGTH) + { + printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH); + } else + { + memcpy( kernel->m_argData[arg_index],arg_value,arg_size); + kernel->m_argSizes[arg_index] = arg_size; + } + } + return 0; +} + +// Kernel Object APIs +CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program , + const char * kernel_name , + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program; + MiniCLKernel* kernel = new MiniCLKernel(); + + kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name); + kernel->m_scheduler = scheduler; + + return (cl_kernel)kernel; + +} + + +CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (*pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context , + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + return (cl_program)context; +} + + +// Memory Object APIs +CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, + cl_mem_flags flags , + size_t size, + void * host_ptr , + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + cl_mem buf = (cl_mem)malloc(size); + if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr) + { + memcpy(buf,host_ptr,size); + } + return buf; +} + +// Command Queue APIs +CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context , + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + return (cl_command_queue) context; +} + +extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, + cl_context_info param_name , + size_t param_value_size , + void * param_value, + size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_CONTEXT_DEVICES: + { + if (!param_value_size) + { + *param_value_size_ret = 13; + } else + { + sprintf((char*)param_value,"MiniCL_Test."); + } + break; + }; + default: + { + printf("unsupported\n"); + } + } + + return 0; +} + +CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + int maxNumOutstandingTasks = 4; + +#ifdef WIN32 + Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( + "MiniCL", + processMiniCLTask, //processCollisionTask, + createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory, + maxNumOutstandingTasks)); +#else + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); + +#endif + + + MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); + + return (cl_context)scheduler; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0 +{ + + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context; + + btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface(); + delete scheduler; + delete threadSupport; + + return 0; +} diff --git a/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp b/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp new file mode 100644 index 000000000..e737b179f --- /dev/null +++ b/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp @@ -0,0 +1,172 @@ + +///VectorAdd sample, from the NVidia JumpStart Guide +///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf + +///Instead of #include we include +///Apart from this include file, all other code should compile and work on OpenCL compliant implementation + +#include +#include +#include +#include + + +void printDevInfo(cl_device_id device) +{ + char device_string[1024]; + + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); + printf( " Device %s:\n", device_string); + + // CL_DEVICE_INFO + cl_device_type type; + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); + if( type & CL_DEVICE_TYPE_CPU ) + printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU"); + if( type & CL_DEVICE_TYPE_GPU ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU"); + if( type & CL_DEVICE_TYPE_ACCELERATOR ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); + if( type & CL_DEVICE_TYPE_DEFAULT ) + printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); + + // CL_DEVICE_MAX_COMPUTE_UNITS + cl_uint compute_units; + clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); + printf( " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units); + + // CL_DEVICE_MAX_WORK_GROUP_SIZE + size_t workitem_size[3]; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); + printf( " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]); + +} + + + + +// Main function +// ********************************************************************* +int main(int argc, char **argv) +{ + void *srcA, *srcB, *dst; // Host buffers for OpenCL test + cl_context cxGPUContext; // OpenCL context + cl_command_queue cqCommandQue; // OpenCL command que + cl_device_id* cdDevices; // OpenCL device list + cl_program cpProgram; // OpenCL program + cl_kernel ckKernel; // OpenCL kernel + cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device + size_t szGlobalWorkSize[1]; // 1D var for Total # of work items + size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group + size_t szParmDataBytes; // Byte size of context information + cl_int ciErr1, ciErr2; // Error code var + int iTestN = 100000 * 8; // Size of Vectors to process + + // set Global and Local work size dimensions + szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item + szLocalWorkSize[0]= iTestN>>3; + + + // Allocate and initialize host arrays + srcA = (void *)malloc (sizeof(cl_float) * iTestN); + srcB = (void *)malloc (sizeof(cl_float) * iTestN); + dst = (void *)malloc (sizeof(cl_float) * iTestN); + + int i; + + // Initialize arrays with some values + for (i=0;i +#else +#include +#define spu_printf printf +#endif + +#define __kernel +#define __global +#define get_global_id(a) guid + +struct MiniCLTask_LocalStoreMemory +{ + +}; + + +/////////////////////////////////////////////////// +// OpenCL Kernel Function for element by element vector addition +__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid) +{ + // get oct-float index into global data array + int iGID = get_global_id(0); + + // read inputs into registers + float8 f8InA = a[iGID]; + float8 f8InB = b[iGID]; + float8 f8Out = (float8)0.0f; + + // add the vector elements + f8Out.s0 = f8InA.s0 + f8InB.s0; + f8Out.s1 = f8InA.s1 + f8InB.s1; + f8Out.s2 = f8InA.s2 + f8InB.s2; + f8Out.s3 = f8InA.s3 + f8InB.s3; + f8Out.s4 = f8InA.s4 + f8InB.s4; + f8Out.s5 = f8InA.s5 + f8InB.s5; + f8Out.s6 = f8InA.s6 + f8InB.s6; + f8Out.s7 = f8InA.s7 + f8InB.s7; + + // write back out to GMEM + c[get_global_id(0)] = f8Out; +} +/////////////////////////////////////////////////// + + +//-- MAIN METHOD +void processMiniCLTask(void* userPtr, void* lsMemory) +{ + // BT_PROFILE("processSampleTask"); + + MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory; + + MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr; + MiniCLTaskDesc& taskDesc = *taskDescPtr; + + printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit); + + + switch (taskDesc.m_kernelProgramId) + { + case CMD_MINICL_ADDVECTOR: + { + for (unsigned int i=taskDesc.m_firstWorkUnit;i + +#ifdef __SPU__ + + + +void SampleThreadFunc(void* userPtr,void* lsMemory) +{ + //do nothing + printf("hello world\n"); +} + + +void* SamplelsMemoryFunc() +{ + //don't create local store memory, just return 0 + return 0; +} + + +#else + + +#include "btThreadSupportInterface.h" + +//# include "SPUAssert.h" +#include + + + +extern "C" { + extern char SPU_SAMPLE_ELF_SYMBOL[]; +} + + + + + +MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks) +:m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(maxNumOutstandingTasks) +{ + + m_taskBusy.resize(m_maxNumOutstandingTasks); + m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks); + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + + m_initialized = false; + + m_threadInterface->startSPU(); + + +} + +MiniCLTaskScheduler::~MiniCLTaskScheduler() +{ + m_threadInterface->stopSPU(); + +} + + + +void MiniCLTaskScheduler::initialize() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("MiniCLTaskScheduler::initialize()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_initialized = true; + +} + + +void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes) +{ + +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask); +#endif //DEBUG_SPU_TASK_SCHEDULING + + m_taskBusy[m_currentTask] = true; + m_numBusyTasks++; + + MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask]; + { + // send task description in event message + taskDesc.m_firstWorkUnit = firstWorkUnit; + taskDesc.m_lastWorkUnit = lastWorkUnit; + taskDesc.m_kernelProgramId = kernelProgramId; + //some bookkeeping to recognize finished tasks + taskDesc.m_taskId = m_currentTask; + + for (int i=0;isendRequest(1, (ppu_address_t) &taskDesc, m_currentTask); + + // if all tasks busy, wait for spu event to clear the task. + + if (m_numBusyTasks >= m_maxNumOutstandingTasks) + { + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + + //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + // find new task buffer + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + break; + } + } +} + + +///Optional PPU-size post processing for each task +void MiniCLTaskScheduler::postProcess(int taskId, int outputSize) +{ + +} + + +void MiniCLTaskScheduler::flush() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("\nSpuCollisionTaskProcess::flush()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + + // all tasks are issued, wait for all tasks to be complete + while(m_numBusyTasks > 0) + { +// Consolidating SPU code + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + } + + //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + +} + +#endif + + +#endif //USE_SAMPLE_PROCESS diff --git a/src/BulletMultiThreaded/MiniCLTaskScheduler.h b/src/BulletMultiThreaded/MiniCLTaskScheduler.h new file mode 100644 index 000000000..580b509b8 --- /dev/null +++ b/src/BulletMultiThreaded/MiniCLTaskScheduler.h @@ -0,0 +1,181 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#ifndef MINICL_TASK_SCHEDULER_H +#define MINICL_TASK_SCHEDULER_H + +#include + + +#include "PlatformDefinitions.h" + +#include + +#include "LinearMath/btAlignedObjectArray.h" + + +#include "MiniCLTask/MiniCLTask.h" + + +//just add your commands here, try to keep them globally unique for debugging purposes +#define CMD_SAMPLE_TASK_COMMAND 10 + + + +/// MiniCLTaskScheduler handles SPU processing of collision pairs. +/// When PPU issues a task, it will look for completed task buffers +/// PPU will do postprocessing, dependent on workunit output (not likely) +class MiniCLTaskScheduler +{ + // track task buffers that are being used, and total busy tasks + btAlignedObjectArray m_taskBusy; + btAlignedObjectArray m_spuSampleTaskDesc; + + int m_numBusyTasks; + + // the current task and the current entry to insert a new work unit + int m_currentTask; + + bool m_initialized; + + void postProcess(int taskId, int outputSize); + + class btThreadSupportInterface* m_threadInterface; + + int m_maxNumOutstandingTasks; + + + +public: + MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks); + + ~MiniCLTaskScheduler(); + + ///call initialize in the beginning of the frame, before addCollisionPairToTask + void initialize(); + + void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes); + + ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished + void flush(); + + class btThreadSupportInterface* getThreadSupportInterface() + { + return m_threadInterface; + } + + int findProgramCommandIdByName(const char* programName) const + { + return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support + } + + int getMaxNumOutstandingTasks() const + { + return m_maxNumOutstandingTasks; + } +}; + + +struct MiniCLKernel +{ + MiniCLTaskScheduler* m_scheduler; + + int m_kernelProgramCommandId; + + char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH]; + int m_argSizes[MINI_CL_MAX_ARG]; +}; + + +#if defined(USE_LIBSPE2) && defined(__SPU__) +////////////////////MAIN///////////////////////////// +#include "../SpuLibspe2Support.h" +#include +#include +#include + +void * SamplelsMemoryFunc(); +void SampleThreadFunc(void* userPtr,void* lsMemory); + +//#define DEBUG_LIBSPE2_MAINLOOP + +int main(unsigned long long speid, addr64 argp, addr64 envp) +{ + printf("SPU is up \n"); + + ATTRIBUTE_ALIGNED128(btSpuStatus status); + ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ; + unsigned int received_message = Spu_Mailbox_Event_Nothing; + bool shutdown = false; + + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + status.m_status = Spu_Status_Free; + status.m_lsMemory.p = SamplelsMemoryFunc(); + + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + while (!shutdown) + { + received_message = spu_read_in_mbox(); + + + + switch(received_message) + { + case Spu_Mailbox_Event_Shutdown: + shutdown = true; + break; + case Spu_Mailbox_Event_Task: + // refresh the status +#ifdef DEBUG_LIBSPE2_MAINLOOP + printf("SPU recieved Task \n"); +#endif //DEBUG_LIBSPE2_MAINLOOP + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + btAssert(status.m_status==Spu_Status_Occupied); + + cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + SampleThreadFunc((void*)&taskDesc, reinterpret_cast (taskDesc.m_mainMemoryPtr) ); + break; + case Spu_Mailbox_Event_Nothing: + default: + break; + } + + // set to status free and wait for next task + status.m_status = Spu_Status_Free; + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + } + return 0; +} +////////////////////////////////////////////////////// +#endif + + + +#endif // MINICL_TASK_SCHEDULER_H + diff --git a/src/MiniCL/cl.h b/src/MiniCL/cl.h new file mode 100644 index 000000000..b0cda4237 --- /dev/null +++ b/src/MiniCL/cl.h @@ -0,0 +1,865 @@ +/******************************************************************************* + * Copyright (c) 2008-2009 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_address_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_uint cl_image_info; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +/******************************************************************************/ + +// Error Codes +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_DEVICE_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 + +// OpenCL Version +#define CL_VERSION_1_0 1 + +// cl_bool +#define CL_FALSE 0 +#define CL_TRUE 1 + +// cl_platform_info +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +// cl_device_type - bitfield +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +// cl_device_info +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 + +// cl_device_address_info - bitfield +#define CL_DEVICE_ADDRESS_32_BITS (1 << 0) +#define CL_DEVICE_ADDRESS_64_BITS (1 << 1) + +// cl_device_fp_config - bitfield +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) + +// cl_device_mem_cache_type +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +// cl_device_local_mem_type +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +// cl_device_exec_capabilities - bitfield +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +// cl_command_queue_properties - bitfield +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +// cl_context_info +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_NUM_DEVICES 0x1081 +#define CL_CONTEXT_DEVICES 0x1082 +#define CL_CONTEXT_PROPERTIES 0x1083 +#define CL_CONTEXT_PLATFORM 0x1084 + +// cl_command_queue_info +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 + +// cl_mem_flags - bitfield +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) + +// cl_channel_order +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 + +// cl_channel_type +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE + +// cl_mem_object_type +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 + +// cl_mem_info +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 + +// cl_image_info +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 + +// cl_addressing_mode +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 + +// cl_filter_mode +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +// cl_sampler_info +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +// cl_map_flags - bitfield +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) + +// cl_program_info +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 + +// cl_program_build_info +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 + +// cl_build_status +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +// cl_kernel_info +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 + +// cl_kernel_work_group_info +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 + +// cl_event_info +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 + +// cl_command_type +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_WAIT_FOR_EVENTS 0x11FF +#define CL_COMMAND_BARRIER 0x1200 +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x1201 +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1202 + +// command execution status +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +// cl_profiling_info +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +/********************************************************************************************************/ + +// Platform API +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Context APIs +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetCommandQueueProperty(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Sampler APIs +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Program Object APIs +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (*pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Event Object APIs +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* cb */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* cb */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (*user_func)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif // __OPENCL_CL_H + diff --git a/src/MiniCL/cl_gl.h b/src/MiniCL/cl_gl.h new file mode 100644 index 000000000..71bdaaa6e --- /dev/null +++ b/src/MiniCL/cl_gl.h @@ -0,0 +1,113 @@ +/********************************************************************************** + * Copyright (c) 2008-2009 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// NOTE: Make sure that appropriate GL header file is included separately + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; + +// cl_gl_object_type +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 + +// cl_gl_texture_info +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + GLuint /* bufobj */, + int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + GLenum /* target */, + GLint /* miplevel */, + GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + GLenum /* target */, + GLint /* miplevel */, + GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context /* context */, + cl_mem_flags /* flags */, + GLuint /* renderbuffer */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif // __OPENCL_CL_GL_H diff --git a/src/MiniCL/cl_platform.h b/src/MiniCL/cl_platform.h new file mode 100644 index 000000000..522512996 --- /dev/null +++ b/src/MiniCL/cl_platform.h @@ -0,0 +1,244 @@ +/********************************************************************************** + * Copyright (c) 2008-2009 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define CL_API_ENTRY +#define CL_API_CALL +#ifdef __APPLE__ +#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) +#else +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXTENSION_WEAK_LINK +#endif + +#ifdef WIN32 +typedef signed __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef signed __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef signed __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short ; +typedef uint16_t cl_ushort ; +typedef int32_t cl_int ; +typedef uint32_t cl_uint ; +typedef int64_t cl_long ; +typedef uint64_t cl_ulong ; + +typedef uint16_t cl_half ; +typedef float cl_float ; +typedef double cl_double ; + + +typedef int8_t cl_char2[2] ; +typedef int8_t cl_char4[4] ; +typedef int8_t cl_char8[8] ; +typedef int8_t cl_char16[16] ; +typedef uint8_t cl_uchar2[2] ; +typedef uint8_t cl_uchar4[4] ; +typedef uint8_t cl_uchar8[8] ; +typedef uint8_t cl_uchar16[16] ; + +typedef int16_t cl_short2[2] ; +typedef int16_t cl_short4[4] ; +typedef int16_t cl_short8[8] ; +typedef int16_t cl_short16[16] ; +typedef uint16_t cl_ushort2[2] ; +typedef uint16_t cl_ushort4[4] ; +typedef uint16_t cl_ushort8[8] ; +typedef uint16_t cl_ushort16[16] ; + +typedef int32_t cl_int2[2] ; +typedef int32_t cl_int4[4] ; +typedef int32_t cl_int8[8] ; +typedef int32_t cl_int16[16] ; +typedef uint32_t cl_uint2[2] ; +typedef uint32_t cl_uint4[4] ; +typedef uint32_t cl_uint8[8] ; +typedef uint32_t cl_uint16[16] ; + +typedef int64_t cl_long2[2] ; +typedef int64_t cl_long4[4] ; +typedef int64_t cl_long8[8] ; +typedef int64_t cl_long16[16] ; +typedef uint64_t cl_ulong2[2] ; +typedef uint64_t cl_ulong4[4] ; +typedef uint64_t cl_ulong8[8] ; +typedef uint64_t cl_ulong16[16] ; + +typedef float cl_float2[2] ; +typedef float cl_float4[4] ; +typedef float cl_float8[8] ; +typedef float cl_float16[16] ; + +typedef double cl_double2[2] ; +typedef double cl_double4[4] ; +typedef double cl_double8[8] ; +typedef double cl_double16[16] ; + + +#else +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ +typedef int8_t cl_char2[2] __attribute__((aligned(2))); +typedef int8_t cl_char4[4] __attribute__((aligned(4))); +typedef int8_t cl_char8[8] __attribute__((aligned(8))); +typedef int8_t cl_char16[16] __attribute__((aligned(16))); +typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); +typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); +typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); +typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); + +typedef int16_t cl_short2[2] __attribute__((aligned(4))); +typedef int16_t cl_short4[4] __attribute__((aligned(8))); +typedef int16_t cl_short8[8] __attribute__((aligned(16))); +typedef int16_t cl_short16[16] __attribute__((aligned(32))); +typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); +typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); +typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); +typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); + +typedef int32_t cl_int2[2] __attribute__((aligned(8))); +typedef int32_t cl_int4[4] __attribute__((aligned(16))); +typedef int32_t cl_int8[8] __attribute__((aligned(32))); +typedef int32_t cl_int16[16] __attribute__((aligned(64))); +typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); +typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); +typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); +typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); + +typedef int64_t cl_long2[2] __attribute__((aligned(16))); +typedef int64_t cl_long4[4] __attribute__((aligned(32))); +typedef int64_t cl_long8[8] __attribute__((aligned(64))); +typedef int64_t cl_long16[16] __attribute__((aligned(128))); +typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); +typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); +typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); +typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); + +typedef float cl_float2[2] __attribute__((aligned(8))); +typedef float cl_float4[4] __attribute__((aligned(16))); +typedef float cl_float8[8] __attribute__((aligned(32))); +typedef float cl_float16[16] __attribute__((aligned(64))); + +typedef double cl_double2[2] __attribute__((aligned(16))); +typedef double cl_double4[4] __attribute__((aligned(32))); +typedef double cl_double8[8] __attribute__((aligned(64))); +typedef double cl_double16[16] __attribute__((aligned(128))); +#endif + +#include + +/* and a few goodies to go with them */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +/* There are no vector types for half */ + +#ifdef __cplusplus +} +#endif + +#endif // __CL_PLATFORM_H