Added MiniCL, a limited subset of OpenCL, the open standard for parallel programming of heterogeneous systems.

MiniCL includes a cross-platform run-time frontend based on pthreads, Win32 Threads, or libspe2 for Cell SPU.
It is there, to bridge the gap until OpenCL is more widely available.

See Bullet/Demos/VectorAdd, influenced by NVidia OpenCL Jumpstart Guide:
http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf
This commit is contained in:
erwin.coumans
2009-05-22 01:43:37 +00:00
parent 2f1014268b
commit fb6146f0be
10 changed files with 2361 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
# AppMiniCLVectorAdd is a very basic test for MiniCL.
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src }
)
LINK_LIBRARIES(
BulletMultiThreaded LinearMath
)
ADD_EXECUTABLE(AppMiniCLVectorAdd
MiniCL_VectorAdd.cpp
MiniCL.cpp
)

View File

@@ -0,0 +1,346 @@
#include <MiniCL/cl.h>
#define __PHYSICS_COMMON_H__ 1
#ifdef WIN32
#include "BulletMultiThreaded/Win32ThreadSupport.h"
#else
#include "BulletMultiThreaded/SequentialThreadSupport.h"
#endif
#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
#include "LinearMath/btMinMax.h"
/*
m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
"collision",
processCollisionTask,
createCollisionLocalStoreMemory,
maxNumOutstandingTasks));
if (!m_spuCollisionTaskProcess)
m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
//make sure all SPU work is done
m_spuCollisionTaskProcess->flush2();
*/
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
cl_device_id device ,
cl_device_info param_name ,
size_t param_value_size ,
void * param_value ,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
{
switch (param_name)
{
case CL_DEVICE_NAME:
{
char deviceName[] = "CPU";
int nameLen = strlen(deviceName)+1;
assert(param_value_size>strlen(deviceName));
if (nameLen < param_value_size)
{
sprintf((char*)param_value,"CPU");
} else
{
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
}
break;
}
case CL_DEVICE_TYPE:
{
if (param_value_size>=sizeof(cl_device_type))
{
cl_device_type* deviceType = (cl_device_type*)param_value;
*deviceType = CL_DEVICE_TYPE_CPU;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
}
break;
}
case CL_DEVICE_MAX_COMPUTE_UNITS:
{
if (param_value_size>=sizeof(cl_uint))
{
cl_uint* numUnits = (cl_uint*)param_value;
*numUnits= 4;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
}
break;
}
case CL_DEVICE_MAX_WORK_ITEM_SIZES:
{
size_t workitem_size[3];
if (param_value_size>=sizeof(workitem_size))
{
size_t* workItemSize = (size_t*)param_value;
workItemSize[0] = 64;
workItemSize[1] = 24;
workItemSize[2] = 16;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
}
break;
}
default:
{
printf("error: unsupported param_name:%d\n",param_name);
}
}
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
// Enqueued Commands APIs
CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
cl_mem buffer ,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t cb ,
void * ptr ,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
///wait for all work items to be completed
scheduler->flush();
memcpy(ptr,buffer,cb);
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel clKernel ,
cl_uint work_dim ,
const size_t * /* global_work_offset */,
const size_t * global_work_size ,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
for (int ii=0;ii<work_dim;ii++)
{
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
int numWorkItems = global_work_size[ii];
//at minimum 64 work items per task
int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
for (int t=0;t<numWorkItems;)
{
//Performance Hint: tweak this number during benchmarking
int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
kernel->m_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes);
t = endIndex;
}
}
/*
void* bla = 0;
scheduler->issueTask(bla,2,3);
scheduler->flush();
*/
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
cl_uint arg_index ,
size_t arg_size ,
const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
{
MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
assert(arg_size < MINICL_MAX_ARGLENGTH);
if (arg_index>MINI_CL_MAX_ARG)
{
printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
} else
{
if (arg_size>=MINICL_MAX_ARGLENGTH)
{
printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
} else
{
memcpy( kernel->m_argData[arg_index],arg_value,arg_size);
kernel->m_argSizes[arg_index] = arg_size;
}
}
return 0;
}
// Kernel Object APIs
CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
const char * kernel_name ,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
MiniCLKernel* kernel = new MiniCLKernel();
kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
kernel->m_scheduler = scheduler;
return (cl_kernel)kernel;
}
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
return (cl_program)context;
}
// Memory Object APIs
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
cl_mem_flags flags ,
size_t size,
void * host_ptr ,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
{
cl_mem buf = (cl_mem)malloc(size);
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
{
memcpy(buf,host_ptr,size);
}
return buf;
}
// Command Queue APIs
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
return (cl_command_queue) context;
}
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
cl_context_info param_name ,
size_t param_value_size ,
void * param_value,
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
{
switch (param_name)
{
case CL_CONTEXT_DEVICES:
{
if (!param_value_size)
{
*param_value_size_ret = 13;
} else
{
sprintf((char*)param_value,"MiniCL_Test.");
}
break;
};
default:
{
printf("unsupported\n");
}
}
return 0;
}
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
int maxNumOutstandingTasks = 4;
#ifdef WIN32
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
"MiniCL",
processMiniCLTask, //processCollisionTask,
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
maxNumOutstandingTasks));
#else
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
#endif
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
return (cl_context)scheduler;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
delete scheduler;
delete threadSupport;
return 0;
}

View File

@@ -0,0 +1,172 @@
///VectorAdd sample, from the NVidia JumpStart Guide
///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf
///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
#include <MiniCL/cl.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
void printDevInfo(cl_device_id device)
{
char device_string[1024];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
printf( " Device %s:\n", device_string);
// CL_DEVICE_INFO
cl_device_type type;
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
if( type & CL_DEVICE_TYPE_CPU )
printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU");
if( type & CL_DEVICE_TYPE_GPU )
printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU");
if( type & CL_DEVICE_TYPE_ACCELERATOR )
printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
if( type & CL_DEVICE_TYPE_DEFAULT )
printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
// CL_DEVICE_MAX_COMPUTE_UNITS
cl_uint compute_units;
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
printf( " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units);
// CL_DEVICE_MAX_WORK_GROUP_SIZE
size_t workitem_size[3];
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
printf( " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]);
}
// Main function
// *********************************************************************
int main(int argc, char **argv)
{
void *srcA, *srcB, *dst; // Host buffers for OpenCL test
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device
size_t szGlobalWorkSize[1]; // 1D var for Total # of work items
size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group
size_t szParmDataBytes; // Byte size of context information
cl_int ciErr1, ciErr2; // Error code var
int iTestN = 100000 * 8; // Size of Vectors to process
// set Global and Local work size dimensions
szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item
szLocalWorkSize[0]= iTestN>>3;
// Allocate and initialize host arrays
srcA = (void *)malloc (sizeof(cl_float) * iTestN);
srcB = (void *)malloc (sizeof(cl_float) * iTestN);
dst = (void *)malloc (sizeof(cl_float) * iTestN);
int i;
// Initialize arrays with some values
for (i=0;i<iTestN;i++)
{
((cl_float*)srcA)[i] = cl_float(i);
((cl_float*)srcB)[i] = 2;
((cl_float*)dst)[i]=-1;
}
// Create OpenCL context & context
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &ciErr1); //could also be CL_DEVICE_TYPE_GPU
// Query all devices available to the context
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
if (cdDevices)
{
printDevInfo(cdDevices[0]);
}
// Create a command queue for first device the context reported
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr2);
ciErr1 |= ciErr2;
// Allocate the OpenCL source and result buffer memory objects on the device GMEM
cmMemObjs[0] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcA, &ciErr2);
ciErr1 |= ciErr2;
cmMemObjs[1] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcB, &ciErr2);
ciErr1 |= ciErr2;
cmMemObjs[2] = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float8) * szGlobalWorkSize[0], NULL, &ciErr2);
ciErr1 |= ciErr2;
///create kernels from binary
int numDevices = 1;
cl_int err;
::size_t* lengths = (::size_t*) malloc(numDevices * sizeof(::size_t));
const unsigned char** images = (const unsigned char**) malloc(numDevices * sizeof(const void*));
for (i = 0; i < numDevices; ++i) {
images[i] = 0;
lengths[i] = 0;
}
cpProgram = clCreateProgramWithBinary(cxGPUContext, numDevices,cdDevices,lengths, images, 0, &err);
// Build the executable program from a binary
ciErr1 |= clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
// Create the kernel
ckKernel = clCreateKernel(cpProgram, "VectorAdd", &ciErr1);
// Set the Argument values
ciErr1 |= clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmMemObjs[0]);
ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmMemObjs[1]);
ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmMemObjs[2]);
// Copy input data from host to GPU and launch kernel
ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
// Read back results and check accumulated errors
ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL);
// Release kernel, program, and memory objects
// NOTE: Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity.
free(cdDevices);
clReleaseKernel(ckKernel);
clReleaseProgram(cpProgram);
clReleaseCommandQueue(cqCommandQue);
clReleaseContext(cxGPUContext);
// print the results
int iErrorCount = 0;
for (i = 0; i < iTestN; i++)
{
if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i])
iErrorCount++;
}
if (iErrorCount)
{
printf("MiniCL validation FAILED\n");
} else
{
printf("MiniCL validation SUCCESSFULL\n");
}
// Free host memory, close log and return success
for (i = 0; i < 3; i++)
{
clReleaseMemObject(cmMemObjs[i]);
}
free(srcA);
free(srcB);
free (dst);
}

View File

@@ -0,0 +1,116 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "MiniCLTask.h"
#include "../PlatformDefinitions.h"
#include "../SpuFakeDma.h"
#include "LinearMath/btMinMax.h"
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
#ifdef __SPU__
#include <spu_printf.h>
#else
#include <stdio.h>
#define spu_printf printf
#endif
#define __kernel
#define __global
#define get_global_id(a) guid
struct MiniCLTask_LocalStoreMemory
{
};
///////////////////////////////////////////////////
// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid)
{
// get oct-float index into global data array
int iGID = get_global_id(0);
// read inputs into registers
float8 f8InA = a[iGID];
float8 f8InB = b[iGID];
float8 f8Out = (float8)0.0f;
// add the vector elements
f8Out.s0 = f8InA.s0 + f8InB.s0;
f8Out.s1 = f8InA.s1 + f8InB.s1;
f8Out.s2 = f8InA.s2 + f8InB.s2;
f8Out.s3 = f8InA.s3 + f8InB.s3;
f8Out.s4 = f8InA.s4 + f8InB.s4;
f8Out.s5 = f8InA.s5 + f8InB.s5;
f8Out.s6 = f8InA.s6 + f8InB.s6;
f8Out.s7 = f8InA.s7 + f8InB.s7;
// write back out to GMEM
c[get_global_id(0)] = f8Out;
}
///////////////////////////////////////////////////
//-- MAIN METHOD
void processMiniCLTask(void* userPtr, void* lsMemory)
{
// BT_PROFILE("processSampleTask");
MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
MiniCLTaskDesc& taskDesc = *taskDescPtr;
printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
switch (taskDesc.m_kernelProgramId)
{
case CMD_MINICL_ADDVECTOR:
{
for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
{
VectorAdd(*(const float8**)&taskDesc.m_argData[0][0],*(const float8**)&taskDesc.m_argData[1][0],*(float8**)&taskDesc.m_argData[2][0],i);
}
break;
}
default:
{
printf("error in processMiniCLTask: unknown command id: %d\n",taskDesc.m_kernelProgramId);
}
};
}
#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory gLocalStoreMemory);
void* createMiniCLLocalStoreMemory()
{
return &gLocalStoreMemory;
}
#else
void* createMiniCLLocalStoreMemory()
{
return new MiniCLTask_LocalStoreMemory;
};
#endif

View File

@@ -0,0 +1,81 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef MINICL__TASK_H
#define MINICL__TASK_H
#include "../PlatformDefinitions.h"
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedAllocator.h"
enum
{
CMD_MINICL_1= 1,
CMD_MINICL_ADDVECTOR
};
struct float8
{
float s0;
float s1;
float s2;
float s3;
float s4;
float s5;
float s6;
float s7;
float8(float scalar)
{
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
}
};
#define MINICL_MAX_ARGLENGTH 128
#define MINI_CL_MAX_ARG 8
ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
{
BT_DECLARE_ALIGNED_ALLOCATOR();
MiniCLTaskDesc()
{
for (int i=0;i<MINI_CL_MAX_ARG;i++)
{
m_argSizes[i]=0;
}
}
uint32_t m_taskId;
uint32_t m_kernelProgramId;
uint32_t m_firstWorkUnit;
uint32_t m_lastWorkUnit;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
int m_argSizes[MINI_CL_MAX_ARG];
};
void processMiniCLTask(void* userPtr, void* lsMemory);
void* createMiniCLLocalStoreMemory();
#endif //MINICL__TASK_H

View File

@@ -0,0 +1,227 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#define __CELLOS_LV2__ 1
#define USE_SAMPLE_PROCESS 1
#ifdef USE_SAMPLE_PROCESS
#include "MiniCLTaskScheduler.h"
#include <stdio.h>
#ifdef __SPU__
void SampleThreadFunc(void* userPtr,void* lsMemory)
{
//do nothing
printf("hello world\n");
}
void* SamplelsMemoryFunc()
{
//don't create local store memory, just return 0
return 0;
}
#else
#include "btThreadSupportInterface.h"
//# include "SPUAssert.h"
#include <string.h>
extern "C" {
extern char SPU_SAMPLE_ELF_SYMBOL[];
}
MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
:m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = false;
m_threadInterface->startSPU();
}
MiniCLTaskScheduler::~MiniCLTaskScheduler()
{
m_threadInterface->stopSPU();
}
void MiniCLTaskScheduler::initialize()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("MiniCLTaskScheduler::initialize()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = true;
}
void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes)
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
#endif //DEBUG_SPU_TASK_SCHEDULING
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
{
// send task description in event message
taskDesc.m_firstWorkUnit = firstWorkUnit;
taskDesc.m_lastWorkUnit = lastWorkUnit;
taskDesc.m_kernelProgramId = kernelProgramId;
//some bookkeeping to recognize finished tasks
taskDesc.m_taskId = m_currentTask;
for (int i=0;i<MINI_CL_MAX_ARG;i++)
{
taskDesc.m_argSizes[i] = argSizes[i];
if (taskDesc.m_argSizes[i])
{
memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
}
}
}
m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
// if all tasks busy, wait for spu event to clear the task.
if (m_numBusyTasks >= m_maxNumOutstandingTasks)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
// find new task buffer
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
break;
}
}
}
///Optional PPU-size post processing for each task
void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
{
}
void MiniCLTaskScheduler::flush()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("\nSpuCollisionTaskProcess::flush()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
// all tasks are issued, wait for all tasks to be complete
while(m_numBusyTasks > 0)
{
// Consolidating SPU code
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
{
m_threadInterface->waitForResponse(&taskId, &outputSize);
}
//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}
#endif
#endif //USE_SAMPLE_PROCESS

View File

@@ -0,0 +1,181 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef MINICL_TASK_SCHEDULER_H
#define MINICL_TASK_SCHEDULER_H
#include <assert.h>
#include "PlatformDefinitions.h"
#include <stdlib.h>
#include "LinearMath/btAlignedObjectArray.h"
#include "MiniCLTask/MiniCLTask.h"
//just add your commands here, try to keep them globally unique for debugging purposes
#define CMD_SAMPLE_TASK_COMMAND 10
/// MiniCLTaskScheduler handles SPU processing of collision pairs.
/// When PPU issues a task, it will look for completed task buffers
/// PPU will do postprocessing, dependent on workunit output (not likely)
class MiniCLTaskScheduler
{
// track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc;
int m_numBusyTasks;
// the current task and the current entry to insert a new work unit
int m_currentTask;
bool m_initialized;
void postProcess(int taskId, int outputSize);
class btThreadSupportInterface* m_threadInterface;
int m_maxNumOutstandingTasks;
public:
MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
~MiniCLTaskScheduler();
///call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize();
void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes);
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush();
class btThreadSupportInterface* getThreadSupportInterface()
{
return m_threadInterface;
}
int findProgramCommandIdByName(const char* programName) const
{
return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support
}
int getMaxNumOutstandingTasks() const
{
return m_maxNumOutstandingTasks;
}
};
struct MiniCLKernel
{
MiniCLTaskScheduler* m_scheduler;
int m_kernelProgramCommandId;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
int m_argSizes[MINI_CL_MAX_ARG];
};
#if defined(USE_LIBSPE2) && defined(__SPU__)
////////////////////MAIN/////////////////////////////
#include "../SpuLibspe2Support.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <SpuFakeDma.h>
void * SamplelsMemoryFunc();
void SampleThreadFunc(void* userPtr,void* lsMemory);
//#define DEBUG_LIBSPE2_MAINLOOP
int main(unsigned long long speid, addr64 argp, addr64 envp)
{
printf("SPU is up \n");
ATTRIBUTE_ALIGNED128(btSpuStatus status);
ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
unsigned int received_message = Spu_Mailbox_Event_Nothing;
bool shutdown = false;
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
status.m_status = Spu_Status_Free;
status.m_lsMemory.p = SamplelsMemoryFunc();
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
while (!shutdown)
{
received_message = spu_read_in_mbox();
switch(received_message)
{
case Spu_Mailbox_Event_Shutdown:
shutdown = true;
break;
case Spu_Mailbox_Event_Task:
// refresh the status
#ifdef DEBUG_LIBSPE2_MAINLOOP
printf("SPU recieved Task \n");
#endif //DEBUG_LIBSPE2_MAINLOOP
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
btAssert(status.m_status==Spu_Status_Occupied);
cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
break;
case Spu_Mailbox_Event_Nothing:
default:
break;
}
// set to status free and wait for next task
status.m_status = Spu_Status_Free;
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
}
return 0;
}
//////////////////////////////////////////////////////
#endif
#endif // MINICL_TASK_SCHEDULER_H

865
src/MiniCL/cl.h Normal file
View File

@@ -0,0 +1,865 @@
/*******************************************************************************
* Copyright (c) 2008-2009 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_H
#define __OPENCL_CL_H
#ifdef __APPLE__
#include <MiniCL/cl_platform.h>
#else
#include <MiniCL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
typedef struct _cl_platform_id * cl_platform_id;
typedef struct _cl_device_id * cl_device_id;
typedef struct _cl_context * cl_context;
typedef struct _cl_command_queue * cl_command_queue;
typedef struct _cl_mem * cl_mem;
typedef struct _cl_program * cl_program;
typedef struct _cl_kernel * cl_kernel;
typedef struct _cl_event * cl_event;
typedef struct _cl_sampler * cl_sampler;
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef cl_ulong cl_bitfield;
typedef cl_bitfield cl_device_type;
typedef cl_uint cl_platform_info;
typedef cl_uint cl_device_info;
typedef cl_bitfield cl_device_address_info;
typedef cl_bitfield cl_device_fp_config;
typedef cl_uint cl_device_mem_cache_type;
typedef cl_uint cl_device_local_mem_type;
typedef cl_bitfield cl_device_exec_capabilities;
typedef cl_bitfield cl_command_queue_properties;
typedef intptr_t cl_context_properties;
typedef cl_uint cl_context_info;
typedef cl_uint cl_command_queue_info;
typedef cl_uint cl_channel_order;
typedef cl_uint cl_channel_type;
typedef cl_bitfield cl_mem_flags;
typedef cl_uint cl_mem_object_type;
typedef cl_uint cl_mem_info;
typedef cl_uint cl_image_info;
typedef cl_uint cl_addressing_mode;
typedef cl_uint cl_filter_mode;
typedef cl_uint cl_sampler_info;
typedef cl_bitfield cl_map_flags;
typedef cl_uint cl_program_info;
typedef cl_uint cl_program_build_info;
typedef cl_int cl_build_status;
typedef cl_uint cl_kernel_info;
typedef cl_uint cl_kernel_work_group_info;
typedef cl_uint cl_event_info;
typedef cl_uint cl_command_type;
typedef cl_uint cl_profiling_info;
typedef struct _cl_image_format {
cl_channel_order image_channel_order;
cl_channel_type image_channel_data_type;
} cl_image_format;
/******************************************************************************/
// Error Codes
#define CL_SUCCESS 0
#define CL_DEVICE_NOT_FOUND -1
#define CL_DEVICE_NOT_AVAILABLE -2
#define CL_DEVICE_COMPILER_NOT_AVAILABLE -3
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
#define CL_OUT_OF_RESOURCES -5
#define CL_OUT_OF_HOST_MEMORY -6
#define CL_PROFILING_INFO_NOT_AVAILABLE -7
#define CL_MEM_COPY_OVERLAP -8
#define CL_IMAGE_FORMAT_MISMATCH -9
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
#define CL_BUILD_PROGRAM_FAILURE -11
#define CL_MAP_FAILURE -12
#define CL_INVALID_VALUE -30
#define CL_INVALID_DEVICE_TYPE -31
#define CL_INVALID_PLATFORM -32
#define CL_INVALID_DEVICE -33
#define CL_INVALID_CONTEXT -34
#define CL_INVALID_QUEUE_PROPERTIES -35
#define CL_INVALID_COMMAND_QUEUE -36
#define CL_INVALID_HOST_PTR -37
#define CL_INVALID_MEM_OBJECT -38
#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
#define CL_INVALID_IMAGE_SIZE -40
#define CL_INVALID_SAMPLER -41
#define CL_INVALID_BINARY -42
#define CL_INVALID_BUILD_OPTIONS -43
#define CL_INVALID_PROGRAM -44
#define CL_INVALID_PROGRAM_EXECUTABLE -45
#define CL_INVALID_KERNEL_NAME -46
#define CL_INVALID_KERNEL_DEFINITION -47
#define CL_INVALID_KERNEL -48
#define CL_INVALID_ARG_INDEX -49
#define CL_INVALID_ARG_VALUE -50
#define CL_INVALID_ARG_SIZE -51
#define CL_INVALID_KERNEL_ARGS -52
#define CL_INVALID_WORK_DIMENSION -53
#define CL_INVALID_WORK_GROUP_SIZE -54
#define CL_INVALID_WORK_ITEM_SIZE -55
#define CL_INVALID_GLOBAL_OFFSET -56
#define CL_INVALID_EVENT_WAIT_LIST -57
#define CL_INVALID_EVENT -58
#define CL_INVALID_OPERATION -59
#define CL_INVALID_GL_OBJECT -60
#define CL_INVALID_BUFFER_SIZE -61
#define CL_INVALID_MIP_LEVEL -62
// OpenCL Version
#define CL_VERSION_1_0 1
// cl_bool
#define CL_FALSE 0
#define CL_TRUE 1
// cl_platform_info
#define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903
#define CL_PLATFORM_EXTENSIONS 0x0904
// cl_device_type - bitfield
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
// cl_device_info
#define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
#define CL_DEVICE_ADDRESS_BITS 0x100D
#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
#define CL_DEVICE_IMAGE_SUPPORT 0x1016
#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
#define CL_DEVICE_MAX_SAMPLERS 0x1018
#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
#define CL_DEVICE_ENDIAN_LITTLE 0x1026
#define CL_DEVICE_AVAILABLE 0x1027
#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
#define CL_DEVICE_NAME 0x102B
#define CL_DEVICE_VENDOR 0x102C
#define CL_DRIVER_VERSION 0x102D
#define CL_DEVICE_PROFILE 0x102E
#define CL_DEVICE_VERSION 0x102F
#define CL_DEVICE_EXTENSIONS 0x1030
#define CL_DEVICE_PLATFORM 0x1031
// cl_device_address_info - bitfield
#define CL_DEVICE_ADDRESS_32_BITS (1 << 0)
#define CL_DEVICE_ADDRESS_64_BITS (1 << 1)
// cl_device_fp_config - bitfield
#define CL_FP_DENORM (1 << 0)
#define CL_FP_INF_NAN (1 << 1)
#define CL_FP_ROUND_TO_NEAREST (1 << 2)
#define CL_FP_ROUND_TO_ZERO (1 << 3)
#define CL_FP_ROUND_TO_INF (1 << 4)
#define CL_FP_FMA (1 << 5)
// cl_device_mem_cache_type
#define CL_NONE 0x0
#define CL_READ_ONLY_CACHE 0x1
#define CL_READ_WRITE_CACHE 0x2
// cl_device_local_mem_type
#define CL_LOCAL 0x1
#define CL_GLOBAL 0x2
// cl_device_exec_capabilities - bitfield
#define CL_EXEC_KERNEL (1 << 0)
#define CL_EXEC_NATIVE_KERNEL (1 << 1)
// cl_command_queue_properties - bitfield
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
// cl_context_info
#define CL_CONTEXT_REFERENCE_COUNT 0x1080
#define CL_CONTEXT_NUM_DEVICES 0x1081
#define CL_CONTEXT_DEVICES 0x1082
#define CL_CONTEXT_PROPERTIES 0x1083
#define CL_CONTEXT_PLATFORM 0x1084
// cl_command_queue_info
#define CL_QUEUE_CONTEXT 0x1090
#define CL_QUEUE_DEVICE 0x1091
#define CL_QUEUE_REFERENCE_COUNT 0x1092
#define CL_QUEUE_PROPERTIES 0x1093
// cl_mem_flags - bitfield
#define CL_MEM_READ_WRITE (1 << 0)
#define CL_MEM_WRITE_ONLY (1 << 1)
#define CL_MEM_READ_ONLY (1 << 2)
#define CL_MEM_USE_HOST_PTR (1 << 3)
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
#define CL_MEM_COPY_HOST_PTR (1 << 5)
// cl_channel_order
#define CL_R 0x10B0
#define CL_A 0x10B1
#define CL_RG 0x10B2
#define CL_RA 0x10B3
#define CL_RGB 0x10B4
#define CL_RGBA 0x10B5
#define CL_BGRA 0x10B6
#define CL_ARGB 0x10B7
#define CL_INTENSITY 0x10B8
#define CL_LUMINANCE 0x10B9
// cl_channel_type
#define CL_SNORM_INT8 0x10D0
#define CL_SNORM_INT16 0x10D1
#define CL_UNORM_INT8 0x10D2
#define CL_UNORM_INT16 0x10D3
#define CL_UNORM_SHORT_565 0x10D4
#define CL_UNORM_SHORT_555 0x10D5
#define CL_UNORM_INT_101010 0x10D6
#define CL_SIGNED_INT8 0x10D7
#define CL_SIGNED_INT16 0x10D8
#define CL_SIGNED_INT32 0x10D9
#define CL_UNSIGNED_INT8 0x10DA
#define CL_UNSIGNED_INT16 0x10DB
#define CL_UNSIGNED_INT32 0x10DC
#define CL_HALF_FLOAT 0x10DD
#define CL_FLOAT 0x10DE
// cl_mem_object_type
#define CL_MEM_OBJECT_BUFFER 0x10F0
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
// cl_mem_info
#define CL_MEM_TYPE 0x1100
#define CL_MEM_FLAGS 0x1101
#define CL_MEM_SIZE 0x1102
#define CL_MEM_HOST_PTR 0x1103
#define CL_MEM_MAP_COUNT 0x1104
#define CL_MEM_REFERENCE_COUNT 0x1105
#define CL_MEM_CONTEXT 0x1106
// cl_image_info
#define CL_IMAGE_FORMAT 0x1110
#define CL_IMAGE_ELEMENT_SIZE 0x1111
#define CL_IMAGE_ROW_PITCH 0x1112
#define CL_IMAGE_SLICE_PITCH 0x1113
#define CL_IMAGE_WIDTH 0x1114
#define CL_IMAGE_HEIGHT 0x1115
#define CL_IMAGE_DEPTH 0x1116
// cl_addressing_mode
#define CL_ADDRESS_NONE 0x1130
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
#define CL_ADDRESS_CLAMP 0x1132
#define CL_ADDRESS_REPEAT 0x1133
// cl_filter_mode
#define CL_FILTER_NEAREST 0x1140
#define CL_FILTER_LINEAR 0x1141
// cl_sampler_info
#define CL_SAMPLER_REFERENCE_COUNT 0x1150
#define CL_SAMPLER_CONTEXT 0x1151
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
#define CL_SAMPLER_ADDRESSING_MODE 0x1153
#define CL_SAMPLER_FILTER_MODE 0x1154
// cl_map_flags - bitfield
#define CL_MAP_READ (1 << 0)
#define CL_MAP_WRITE (1 << 1)
// cl_program_info
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
#define CL_PROGRAM_CONTEXT 0x1161
#define CL_PROGRAM_NUM_DEVICES 0x1162
#define CL_PROGRAM_DEVICES 0x1163
#define CL_PROGRAM_SOURCE 0x1164
#define CL_PROGRAM_BINARY_SIZES 0x1165
#define CL_PROGRAM_BINARIES 0x1166
// cl_program_build_info
#define CL_PROGRAM_BUILD_STATUS 0x1181
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
#define CL_PROGRAM_BUILD_LOG 0x1183
// cl_build_status
#define CL_BUILD_SUCCESS 0
#define CL_BUILD_NONE -1
#define CL_BUILD_ERROR -2
#define CL_BUILD_IN_PROGRESS -3
// cl_kernel_info
#define CL_KERNEL_FUNCTION_NAME 0x1190
#define CL_KERNEL_NUM_ARGS 0x1191
#define CL_KERNEL_REFERENCE_COUNT 0x1192
#define CL_KERNEL_CONTEXT 0x1193
#define CL_KERNEL_PROGRAM 0x1194
// cl_kernel_work_group_info
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
// cl_event_info
#define CL_EVENT_COMMAND_QUEUE 0x11D0
#define CL_EVENT_COMMAND_TYPE 0x11D1
#define CL_EVENT_REFERENCE_COUNT 0x11D2
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
// cl_command_type
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
#define CL_COMMAND_TASK 0x11F1
#define CL_COMMAND_NATIVE_KERNEL 0x11F2
#define CL_COMMAND_READ_BUFFER 0x11F3
#define CL_COMMAND_WRITE_BUFFER 0x11F4
#define CL_COMMAND_COPY_BUFFER 0x11F5
#define CL_COMMAND_READ_IMAGE 0x11F6
#define CL_COMMAND_WRITE_IMAGE 0x11F7
#define CL_COMMAND_COPY_IMAGE 0x11F8
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
#define CL_COMMAND_MAP_BUFFER 0x11FB
#define CL_COMMAND_MAP_IMAGE 0x11FC
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
#define CL_COMMAND_MARKER 0x11FE
#define CL_COMMAND_WAIT_FOR_EVENTS 0x11FF
#define CL_COMMAND_BARRIER 0x1200
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x1201
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1202
// command execution status
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
// cl_profiling_info
#define CL_PROFILING_COMMAND_QUEUED 0x1280
#define CL_PROFILING_COMMAND_SUBMIT 0x1281
#define CL_PROFILING_COMMAND_START 0x1282
#define CL_PROFILING_COMMAND_END 0x1283
/********************************************************************************************************/
// Platform API
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformIDs(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformInfo(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Device APIs
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDs(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceInfo(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Context APIs
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContext(cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContextFromType(cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetContextInfo(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Command Queue APIs
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueue(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetCommandQueueInfo(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetCommandQueueProperty(cl_command_queue /* command_queue */,
cl_command_queue_properties /* properties */,
cl_bool /* enable */,
cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0;
// Memory Object APIs
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateImage2D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateImage3D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedImageFormats(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetMemObjectInfo(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetImageInfo(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Sampler APIs
extern CL_API_ENTRY cl_sampler CL_API_CALL
clCreateSampler(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSamplerInfo(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Program Object APIs
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithSource(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBinary(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramInfo(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramBuildInfo(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Kernel Object APIs
extern CL_API_ENTRY cl_kernel CL_API_CALL
clCreateKernel(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateKernelsInProgram(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArg(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelInfo(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Event Object APIs
extern CL_API_ENTRY cl_int CL_API_CALL
clWaitForEvents(cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventInfo(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
// Profiling APIs
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventProfilingInfo(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
// Flush and Finish APIs
extern CL_API_ENTRY cl_int CL_API_CALL
clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
// Enqueued Commands APIs
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* cb */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* cb */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* cb */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImage(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* cb */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueTask(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNativeKernel(cl_command_queue /* command_queue */,
void (*user_func)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMarker(cl_command_queue /* command_queue */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif // __OPENCL_CL_H

113
src/MiniCL/cl_gl.h Normal file
View File

@@ -0,0 +1,113 @@
/**********************************************************************************
* Copyright (c) 2008-2009 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#ifdef __APPLE__
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
// NOTE: Make sure that appropriate GL header file is included separately
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
// cl_gl_object_type
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
// cl_gl_texture_info
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
GLenum /* target */,
GLint /* miplevel */,
GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
GLenum /* target */,
GLint /* miplevel */,
GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif // __OPENCL_CL_GL_H

244
src/MiniCL/cl_platform.h Normal file
View File

@@ -0,0 +1,244 @@
/**********************************************************************************
* Copyright (c) 2008-2009 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __CL_PLATFORM_H
#define __CL_PLATFORM_H
#ifdef __APPLE__
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
#include <AvailabilityMacros.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define CL_API_ENTRY
#define CL_API_CALL
#ifdef __APPLE__
#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
#else
#define CL_API_SUFFIX__VERSION_1_0
#define CL_EXTENSION_WEAK_LINK
#endif
#ifdef WIN32
typedef signed __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef signed __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef signed __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef int8_t cl_char;
typedef uint8_t cl_uchar;
typedef int16_t cl_short ;
typedef uint16_t cl_ushort ;
typedef int32_t cl_int ;
typedef uint32_t cl_uint ;
typedef int64_t cl_long ;
typedef uint64_t cl_ulong ;
typedef uint16_t cl_half ;
typedef float cl_float ;
typedef double cl_double ;
typedef int8_t cl_char2[2] ;
typedef int8_t cl_char4[4] ;
typedef int8_t cl_char8[8] ;
typedef int8_t cl_char16[16] ;
typedef uint8_t cl_uchar2[2] ;
typedef uint8_t cl_uchar4[4] ;
typedef uint8_t cl_uchar8[8] ;
typedef uint8_t cl_uchar16[16] ;
typedef int16_t cl_short2[2] ;
typedef int16_t cl_short4[4] ;
typedef int16_t cl_short8[8] ;
typedef int16_t cl_short16[16] ;
typedef uint16_t cl_ushort2[2] ;
typedef uint16_t cl_ushort4[4] ;
typedef uint16_t cl_ushort8[8] ;
typedef uint16_t cl_ushort16[16] ;
typedef int32_t cl_int2[2] ;
typedef int32_t cl_int4[4] ;
typedef int32_t cl_int8[8] ;
typedef int32_t cl_int16[16] ;
typedef uint32_t cl_uint2[2] ;
typedef uint32_t cl_uint4[4] ;
typedef uint32_t cl_uint8[8] ;
typedef uint32_t cl_uint16[16] ;
typedef int64_t cl_long2[2] ;
typedef int64_t cl_long4[4] ;
typedef int64_t cl_long8[8] ;
typedef int64_t cl_long16[16] ;
typedef uint64_t cl_ulong2[2] ;
typedef uint64_t cl_ulong4[4] ;
typedef uint64_t cl_ulong8[8] ;
typedef uint64_t cl_ulong16[16] ;
typedef float cl_float2[2] ;
typedef float cl_float4[4] ;
typedef float cl_float8[8] ;
typedef float cl_float16[16] ;
typedef double cl_double2[2] ;
typedef double cl_double4[4] ;
typedef double cl_double8[8] ;
typedef double cl_double16[16] ;
#else
#include <stdint.h>
/* scalar types */
typedef int8_t cl_char;
typedef uint8_t cl_uchar;
typedef int16_t cl_short __attribute__((aligned(2)));
typedef uint16_t cl_ushort __attribute__((aligned(2)));
typedef int32_t cl_int __attribute__((aligned(4)));
typedef uint32_t cl_uint __attribute__((aligned(4)));
typedef int64_t cl_long __attribute__((aligned(8)));
typedef uint64_t cl_ulong __attribute__((aligned(8)));
typedef uint16_t cl_half __attribute__((aligned(2)));
typedef float cl_float __attribute__((aligned(4)));
typedef double cl_double __attribute__((aligned(8)));
/*
* Vector types
*
* Note: OpenCL requires that all types be naturally aligned.
* This means that vector types must be naturally aligned.
* For example, a vector of four floats must be aligned to
* a 16 byte boundary (calculated as 4 * the natural 4-byte
* alignment of the float). The alignment qualifiers here
* will only function properly if your compiler supports them
* and if you don't actively work to defeat them. For example,
* in order for a cl_float4 to be 16 byte aligned in a struct,
* the start of the struct must itself be 16-byte aligned.
*
* Maintaining proper alignment is the user's responsibility.
*/
typedef int8_t cl_char2[2] __attribute__((aligned(2)));
typedef int8_t cl_char4[4] __attribute__((aligned(4)));
typedef int8_t cl_char8[8] __attribute__((aligned(8)));
typedef int8_t cl_char16[16] __attribute__((aligned(16)));
typedef uint8_t cl_uchar2[2] __attribute__((aligned(2)));
typedef uint8_t cl_uchar4[4] __attribute__((aligned(4)));
typedef uint8_t cl_uchar8[8] __attribute__((aligned(8)));
typedef uint8_t cl_uchar16[16] __attribute__((aligned(16)));
typedef int16_t cl_short2[2] __attribute__((aligned(4)));
typedef int16_t cl_short4[4] __attribute__((aligned(8)));
typedef int16_t cl_short8[8] __attribute__((aligned(16)));
typedef int16_t cl_short16[16] __attribute__((aligned(32)));
typedef uint16_t cl_ushort2[2] __attribute__((aligned(4)));
typedef uint16_t cl_ushort4[4] __attribute__((aligned(8)));
typedef uint16_t cl_ushort8[8] __attribute__((aligned(16)));
typedef uint16_t cl_ushort16[16] __attribute__((aligned(32)));
typedef int32_t cl_int2[2] __attribute__((aligned(8)));
typedef int32_t cl_int4[4] __attribute__((aligned(16)));
typedef int32_t cl_int8[8] __attribute__((aligned(32)));
typedef int32_t cl_int16[16] __attribute__((aligned(64)));
typedef uint32_t cl_uint2[2] __attribute__((aligned(8)));
typedef uint32_t cl_uint4[4] __attribute__((aligned(16)));
typedef uint32_t cl_uint8[8] __attribute__((aligned(32)));
typedef uint32_t cl_uint16[16] __attribute__((aligned(64)));
typedef int64_t cl_long2[2] __attribute__((aligned(16)));
typedef int64_t cl_long4[4] __attribute__((aligned(32)));
typedef int64_t cl_long8[8] __attribute__((aligned(64)));
typedef int64_t cl_long16[16] __attribute__((aligned(128)));
typedef uint64_t cl_ulong2[2] __attribute__((aligned(16)));
typedef uint64_t cl_ulong4[4] __attribute__((aligned(32)));
typedef uint64_t cl_ulong8[8] __attribute__((aligned(64)));
typedef uint64_t cl_ulong16[16] __attribute__((aligned(128)));
typedef float cl_float2[2] __attribute__((aligned(8)));
typedef float cl_float4[4] __attribute__((aligned(16)));
typedef float cl_float8[8] __attribute__((aligned(32)));
typedef float cl_float16[16] __attribute__((aligned(64)));
typedef double cl_double2[2] __attribute__((aligned(16)));
typedef double cl_double4[4] __attribute__((aligned(32)));
typedef double cl_double8[8] __attribute__((aligned(64)));
typedef double cl_double16[16] __attribute__((aligned(128)));
#endif
#include <stddef.h>
/* and a few goodies to go with them */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
#define CL_SCHAR_MIN (-127-1)
#define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1)
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647
#define CL_INT_MIN (-2147483647-1)
#define CL_UINT_MAX 0xffffffffU
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_DIG 6
#define CL_FLT_MANT_DIG 24
#define CL_FLT_MAX_10_EXP +38
#define CL_FLT_MAX_EXP +128
#define CL_FLT_MIN_10_EXP -37
#define CL_FLT_MIN_EXP -125
#define CL_FLT_RADIX 2
#define CL_FLT_MAX 0x1.fffffep127f
#define CL_FLT_MIN 0x1.0p-126f
#define CL_FLT_EPSILON 0x1.0p-23f
#define CL_DBL_DIG 15
#define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#define CL_DBL_MAX_EXP +1024
#define CL_DBL_MIN_10_EXP -307
#define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#define CL_DBL_MAX 0x1.fffffffffffffp1023
#define CL_DBL_MIN 0x1.0p-1022
#define CL_DBL_EPSILON 0x1.0p-52
/* There are no vector types for half */
#ifdef __cplusplus
}
#endif
#endif // __CL_PLATFORM_H