move some recent MiniCL work to trunk

This commit is contained in:
erwin.coumans
2010-02-08 22:42:58 +00:00
parent 52e60c8246
commit 7d4e2873e2
15 changed files with 1471 additions and 724 deletions

View File

@@ -1,26 +1,27 @@
IF (USE_GLUT) IF (USE_GLUT)
SET(SharedDemoSubdirs
OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld
CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer
RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
DoublePrecisionDemo ConcaveDemo CollisionDemo
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
MultiMaterialDemo SerializeDemo InternalEdgeDemo
)
if (CMAKE_SIZEOF_VOID_P MATCHES "8") if (CMAKE_SIZEOF_VOID_P MATCHES "8")
SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld SUBDIRS( OpenGL
CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer ${SharedDemoSubdirs}
RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
DoublePrecisionDemo ConcaveDemo CollisionDemo
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
MultiMaterialDemo SerializeDemo InternalEdgeDemo
) )
else (CMAKE_SIZEOF_VOID_P MATCHES "8") else (CMAKE_SIZEOF_VOID_P MATCHES "8")
SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld SUBDIRS( OpenGL
MultiThreadedDemo CcdPhysicsDemo ConstraintDemo SliderConstraintDemo Raytracer ${SharedDemoSubdirs}
GenericJointDemo RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo MultiThreadedDemo
VehicleDemo UserCollisionAlgorithm CharacterDemo SoftDemo MiniCL_VectorAdd
HeightFieldFluidDemo
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
DoublePrecisionDemo ConcaveDemo CollisionDemo
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
MultiMaterialDemo SerializeDemo InternalEdgeDemo
) )
endif (CMAKE_SIZEOF_VOID_P MATCHES "8") endif (CMAKE_SIZEOF_VOID_P MATCHES "8")
@@ -40,6 +41,7 @@ SUBDIRS(
GenericJointDemo GenericJointDemo
SerializeDemo SerializeDemo
SoftDemo SoftDemo
MiniCL_VectorAdd
) )
ENDIF (USE_GLUT) ENDIF (USE_GLUT)

View File

@@ -11,7 +11,7 @@ LINK_LIBRARIES(
ADD_EXECUTABLE(AppMiniCLVectorAdd ADD_EXECUTABLE(AppMiniCLVectorAdd
MiniCL_VectorAdd.cpp MiniCL_VectorAdd.cpp
MiniCL.cpp VectorAddKernels.cl
) )
IF (UNIX) IF (UNIX)

View File

@@ -1,5 +0,0 @@
SubDir TOP Demos MiniCL_VectorAdd ;
BulletMiniCLDemo MiniCL_VectorAdd : [ Wildcard *.h *.cpp ] ;
MsvcIncDirs MiniCL_VectorAdd : "../../src" ;

View File

@@ -5,7 +5,13 @@
///Instead of #include <CL/cl.h> we include <MiniCL/cl.h> ///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
///Apart from this include file, all other code should compile and work on OpenCL compliant implementation ///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
#include <MiniCL/cl.h> #define USE_MINICL 1
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#include <CL/cl.h>
#endif//USE_MINICL
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include <stdlib.h> #include <stdlib.h>
@@ -170,3 +176,13 @@ int main(int argc, char **argv)
free(srcB); free(srcB);
free (dst); free (dst);
} }
#ifdef USE_MINICL
#include "MiniCL/cl_MiniCL_Defs.h"
extern "C"
{
#include "VectorAddKernels.cl"
}
MINICL_REGISTER(VectorAdd)
#endif//USE_MINICL

View File

@@ -0,0 +1,47 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation
#ifndef GUID_ARG
#define GUID_ARG
#endif
///////////////////////////////////////////////////
// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG)
{
// get oct-float index into global data array
int iGID = get_global_id(0);
// read inputs into registers
float8 f8InA = a[iGID];
float8 f8InB = b[iGID];
float8 f8Out = (float8)0.0f;
// add the vector elements
f8Out.s0 = f8InA.s0 + f8InB.s0;
f8Out.s1 = f8InA.s1 + f8InB.s1;
f8Out.s2 = f8InA.s2 + f8InB.s2;
f8Out.s3 = f8InA.s3 + f8InB.s3;
f8Out.s4 = f8InA.s4 + f8InB.s4;
f8Out.s5 = f8InA.s5 + f8InB.s5;
f8Out.s6 = f8InA.s6 + f8InB.s6;
f8Out.s7 = f8InA.s7 + f8InB.s7;
// write back out to GMEM
c[get_global_id(0)] = f8Out;
}

View File

@@ -56,6 +56,7 @@ ADD_LIBRARY(BulletMultiThreaded
btGpuUtilsSharedDefs.h btGpuUtilsSharedDefs.h
#MiniCL provides a small subset of OpenCL #MiniCL provides a small subset of OpenCL
MiniCL.cpp
MiniCLTaskScheduler.cpp MiniCLTaskScheduler.cpp
MiniCLTaskScheduler.h MiniCLTaskScheduler.h
MiniCLTask/MiniCLTask.cpp MiniCLTask/MiniCLTask.cpp
@@ -63,6 +64,7 @@ ADD_LIBRARY(BulletMultiThreaded
../MiniCL/cl.h ../MiniCL/cl.h
../MiniCL/cl_gl.h ../MiniCL/cl_gl.h
../MiniCL/cl_platform.h ../MiniCL/cl_platform.h
../MiniCL/cl_MiniCL_Defs.h
) )
IF (BUILD_SHARED_LIBS) IF (BUILD_SHARED_LIBS)

View File

@@ -1,346 +1,512 @@
/*
#include <MiniCL/cl.h> Copyright (C) 2010 Sony Computer Entertainment Inc.
#define __PHYSICS_COMMON_H__ 1 All rights reserved.
#ifdef WIN32
#include "BulletMultiThreaded/Win32ThreadSupport.h" This software is provided 'as-is', without any express or implied warranty.
#else In no event will the authors be held liable for any damages arising from the use of this software.
#include "BulletMultiThreaded/SequentialThreadSupport.h" Permission is granted to anyone to use this software for any purpose,
#endif including commercial applications, and to alter it and redistribute it freely,
#include "BulletMultiThreaded/MiniCLTaskScheduler.h" subject to the following restrictions:
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
#include "LinearMath/btMinMax.h" 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
/* 3. This notice may not be removed or altered from any source distribution.
m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
"collision", */
processCollisionTask,
createCollisionLocalStoreMemory,
maxNumOutstandingTasks)); #include "MiniCL/cl.h"
#define __PHYSICS_COMMON_H__ 1
if (!m_spuCollisionTaskProcess) #ifdef _WIN32
m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks); #include "BulletMultiThreaded/Win32ThreadSupport.h"
#endif
m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
#include "BulletMultiThreaded/SequentialThreadSupport.h"
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex); #include "MiniCLTaskScheduler.h"
#include "MiniCLTask/MiniCLTask.h"
//make sure all SPU work is done #include "LinearMath/btMinMax.h"
m_spuCollisionTaskProcess->flush2();
//#define DEBUG_MINICL_KERNELS 1
*/
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( cl_device_id device ,
cl_device_id device , cl_device_info param_name ,
cl_device_info param_name , size_t param_value_size ,
size_t param_value_size , void * param_value ,
void * param_value , size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 {
{
switch (param_name)
switch (param_name) {
{ case CL_DEVICE_NAME:
case CL_DEVICE_NAME: {
{ char deviceName[] = "CPU";
char deviceName[] = "CPU"; unsigned int nameLen = strlen(deviceName)+1;
int nameLen = strlen(deviceName)+1; assert(param_value_size>strlen(deviceName));
assert(param_value_size>strlen(deviceName)); if (nameLen < param_value_size)
if (nameLen < param_value_size) {
{ sprintf_s((char*)param_value,param_value_size, "CPU");
sprintf((char*)param_value,"CPU"); } else
} else {
{ printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); }
} break;
break; }
} case CL_DEVICE_TYPE:
case CL_DEVICE_TYPE: {
{ if (param_value_size>=sizeof(cl_device_type))
if (param_value_size>=sizeof(cl_device_type)) {
{ cl_device_type* deviceType = (cl_device_type*)param_value;
cl_device_type* deviceType = (cl_device_type*)param_value; *deviceType = CL_DEVICE_TYPE_CPU;
*deviceType = CL_DEVICE_TYPE_CPU; } else
} else {
{ printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); }
} break;
break; }
} case CL_DEVICE_MAX_COMPUTE_UNITS:
case CL_DEVICE_MAX_COMPUTE_UNITS: {
{ if (param_value_size>=sizeof(cl_uint))
if (param_value_size>=sizeof(cl_uint)) {
{ cl_uint* numUnits = (cl_uint*)param_value;
cl_uint* numUnits = (cl_uint*)param_value; *numUnits= 4;
*numUnits= 4; } else
} else {
{ printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); }
}
break;
break; }
} case CL_DEVICE_MAX_WORK_ITEM_SIZES:
case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
{ size_t workitem_size[3];
size_t workitem_size[3];
if (param_value_size>=sizeof(workitem_size))
if (param_value_size>=sizeof(workitem_size)) {
{ size_t* workItemSize = (size_t*)param_value;
size_t* workItemSize = (size_t*)param_value; workItemSize[0] = 64;
workItemSize[0] = 64; workItemSize[1] = 24;
workItemSize[1] = 24; workItemSize[2] = 16;
workItemSize[2] = 16; } else
} else {
{ printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); }
} break;
break; }
} case CL_DEVICE_MAX_CLOCK_FREQUENCY:
default: {
{ cl_uint* clock_frequency = (cl_uint*)param_value;
printf("error: unsupported param_name:%d\n",param_name); *clock_frequency = 3*1024;
} break;
} }
default:
{
return 0; printf("error: unsupported param_name:%d\n",param_name);
} }
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
{
return 0; return 0;
} }
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
{
CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 return 0;
{ }
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
{ {
return 0; return 0;
} }
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
{ {
return 0; return 0;
} }
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
// Enqueued Commands APIs {
CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue , return 0;
cl_mem buffer , }
cl_bool /* blocking_read */,
size_t /* offset */,
size_t cb , // Enqueued Commands APIs
void * ptr , CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
cl_uint /* num_events_in_wait_list */, cl_mem buffer ,
const cl_event * /* event_wait_list */, cl_bool /* blocking_read */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 size_t offset ,
{ size_t cb ,
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; void * ptr ,
cl_uint /* num_events_in_wait_list */,
///wait for all work items to be completed const cl_event * /* event_wait_list */,
scheduler->flush(); cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
{
memcpy(ptr,buffer,cb); MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
return 0;
} ///wait for all work items to be completed
scheduler->flush();
CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, memcpy(ptr,(char*)buffer + offset,cb);
cl_kernel clKernel , return 0;
cl_uint work_dim , }
const size_t * /* global_work_offset */,
const size_t * global_work_size ,
const size_t * /* local_work_size */, CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */,
cl_uint /* num_events_in_wait_list */, cl_device_id /* device */,
const cl_event * /* event_wait_list */, cl_program_build_info /* param_name */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 size_t /* param_value_size */,
{ void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
for (int ii=0;ii<work_dim;ii++) return 0;
{ }
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
int numWorkItems = global_work_size[ii];
// Program Object APIs
//at minimum 64 work items per task CL_API_ENTRY cl_program
int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask); clCreateProgramWithSource(cl_context context ,
cl_uint /* count */,
for (int t=0;t<numWorkItems;) const char ** /* strings */,
{ const size_t * /* lengths */,
//Performance Hint: tweak this number during benchmarking cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems; {
kernel->m_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes); *errcode_ret = CL_SUCCESS;
t = endIndex; return (cl_program)context;
} }
}
/* CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue ,
cl_mem buffer ,
void* bla = 0; cl_bool /* blocking_read */,
size_t offset,
scheduler->issueTask(bla,2,3); size_t cb ,
scheduler->flush(); const void * ptr ,
cl_uint /* num_events_in_wait_list */,
*/ const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
return 0; {
} MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel , ///wait for all work items to be completed
cl_uint arg_index , scheduler->flush();
size_t arg_size ,
const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 memcpy((char*)buffer + offset, ptr,cb);
{ return 0;
MiniCLKernel* kernel = (MiniCLKernel* ) clKernel; }
assert(arg_size < MINICL_MAX_ARGLENGTH);
if (arg_index>MINI_CL_MAX_ARG) CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
{ {
printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG); MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
} else ///wait for all work items to be completed
{ scheduler->flush();
if (arg_size>=MINICL_MAX_ARGLENGTH) return 0;
{ }
printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
} else
{ CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
memcpy( kernel->m_argData[arg_index],arg_value,arg_size); cl_kernel clKernel ,
kernel->m_argSizes[arg_index] = arg_size; cl_uint work_dim ,
} const size_t * /* global_work_offset */,
} const size_t * global_work_size ,
return 0; const size_t * /* local_work_size */,
} cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
// Kernel Object APIs cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program , {
const char * kernel_name ,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{ MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program; for (unsigned int ii=0;ii<work_dim;ii++)
MiniCLKernel* kernel = new MiniCLKernel(); {
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name); int numWorkItems = global_work_size[ii];
kernel->m_scheduler = scheduler;
//at minimum 64 work items per task
return (cl_kernel)kernel; int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
} for (int t=0;t<numWorkItems;)
{
//Performance Hint: tweak this number during benchmarking
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
cl_uint /* num_devices */, kernel->m_scheduler->issueTask(t, endIndex, kernel);
const cl_device_id * /* device_list */, t = endIndex;
const char * /* options */, }
void (*pfn_notify)(cl_program /* program */, void * /* user_data */), }
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0 /*
{
return 0; void* bla = 0;
}
scheduler->issueTask(bla,2,3);
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context , scheduler->flush();
cl_uint /* num_devices */,
const cl_device_id * /* device_list */, */
const size_t * /* lengths */,
const unsigned char ** /* binaries */, return 0;
cl_int * /* binary_status */, }
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{ #define LOCAL_BUF_SIZE 32768
return (cl_program)context; static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
} static int* spLocalBufCurr = NULL;
static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
static void* localBufMalloc(int size)
// Memory Object APIs {
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, int size16 = (size + 15) >> 4; // in 16-byte units
cl_mem_flags flags , if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
size_t size, { // reset
void * host_ptr , spLocalBufCurr = sLocalMemBuf;
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 while((int)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
{ sLocalBufUsed = 0;
cl_mem buf = (cl_mem)malloc(size); }
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr) void* ret = spLocalBufCurr;
{ spLocalBufCurr += size16 * 4;
memcpy(buf,host_ptr,size); sLocalBufUsed += size;
} return ret;
return buf; }
}
// Command Queue APIs
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context , CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
cl_device_id /* device */, cl_uint arg_index ,
cl_command_queue_properties /* properties */, size_t arg_size ,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
{ {
return (cl_command_queue) context; MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
} btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
if (arg_index>MINI_CL_MAX_ARG)
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, {
cl_context_info param_name , printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
size_t param_value_size , } else
void * param_value, {
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 // if (arg_size>=MINICL_MAX_ARGLENGTH)
{ if (arg_size != MINICL_MAX_ARGLENGTH)
{
switch (param_name) printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
{ }
case CL_CONTEXT_DEVICES: else
{ {
if (!param_value_size) if(arg_value == NULL)
{ { // this is only for __local memory qualifier
*param_value_size_ret = 13; void* ptr = localBufMalloc(arg_size);
} else kernel->m_argData[arg_index] = ptr;
{ }
sprintf((char*)param_value,"MiniCL_Test."); else
} {
break; memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
}; }
default: kernel->m_argSizes[arg_index] = arg_size;
{ if(arg_index >= kernel->m_numArgs)
printf("unsupported\n"); {
} kernel->m_numArgs = arg_index + 1;
} kernel->updateLauncher();
}
return 0; }
} }
return 0;
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, }
cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, // Kernel Object APIs
void * /* user_data */, CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 const char * kernel_name ,
{ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
int maxNumOutstandingTasks = 4; {
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
#ifdef WIN32 MiniCLKernel* kernel = new MiniCLKernel();
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( int nameLen = strlen(kernel_name);
"MiniCL", if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
processMiniCLTask, //processCollisionTask, {
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory, *errcode_ret = CL_INVALID_KERNEL_NAME;
maxNumOutstandingTasks)); return NULL;
#else }
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); strcpy_s(kernel->m_name, kernel_name);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); kernel->m_numArgs = 0;
#endif //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
//if (kernel->m_kernelProgramCommandId>=0)
//{
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); // *errcode_ret = CL_SUCCESS;
//} else
return (cl_context)scheduler; //{
} // *errcode_ret = CL_INVALID_KERNEL_NAME;
//}
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0 kernel->m_scheduler = scheduler;
{ if(kernel->registerSelf() == NULL)
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context; *errcode_ret = CL_INVALID_KERNEL_NAME;
return NULL;
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface(); }
delete scheduler; else
delete threadSupport; {
*errcode_ret = CL_SUCCESS;
return 0; }
}
return (cl_kernel)kernel;
}
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
{
return CL_SUCCESS;
}
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
return (cl_program)context;
}
// Memory Object APIs
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
cl_mem_flags flags ,
size_t size,
void * host_ptr ,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
{
cl_mem buf = (cl_mem)malloc(size);
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
{
memcpy(buf,host_ptr,size);
}
*errcode_ret = 0;
return buf;
}
// Command Queue APIs
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
{
*errcode_ret = 0;
return (cl_command_queue) context;
}
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
cl_context_info param_name ,
size_t param_value_size ,
void * param_value,
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
{
switch (param_name)
{
case CL_CONTEXT_DEVICES:
{
if (!param_value_size)
{
*param_value_size_ret = 13;
} else
{
sprintf_s((char*)param_value, param_value_size, "MiniCL_Test.");
}
break;
};
default:
{
printf("unsupported\n");
}
}
return 0;
}
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
{
int maxNumOutstandingTasks = 4;
// int maxNumOutstandingTasks = 1;
gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
const int maxNumOfThreadSupports = 8;
static int sUniqueThreadSupportIndex = 0;
static char* sUniqueThreadSupportName[maxNumOfThreadSupports] =
{
"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
};
#ifdef DEBUG_MINICL_KERNELS
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
#else
#if _WIN32
btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
// "MiniCL",
sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
processMiniCLTask, //processCollisionTask,
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
maxNumOutstandingTasks));
#else
///todo: add posix thread support for other platforms
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
#endif
#endif //DEBUG_MINICL_KERNELS
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
*errcode_ret = 0;
return (cl_context)scheduler;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
delete scheduler;
delete threadSupport;
return 0;
}
extern CL_API_ENTRY cl_int CL_API_CALL
clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
{
return CL_SUCCESS;
}
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelWorkGroupInfo(cl_kernel kernel ,
cl_device_id /* device */,
cl_kernel_work_group_info wgi/* param_name */,
size_t sz /* param_value_size */,
void * ptr /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
{
if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
&&(sz == sizeof(int))
&&(ptr != NULL))
{
MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
*((int*)ptr) = scheduler->getMaxNumOutstandingTasks();
return CL_SUCCESS;
}
else
{
return CL_INVALID_VALUE;
}
}

View File

@@ -15,10 +15,12 @@ subject to the following restrictions:
#include "MiniCLTask.h" #include "MiniCLTask.h"
#include "../PlatformDefinitions.h" #include "BulletMultiThreaded/PlatformDefinitions.h"
#include "../SpuFakeDma.h" #include "BulletMultiThreaded/SpuFakeDma.h"
#include "LinearMath/btMinMax.h" #include "LinearMath/btMinMax.h"
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h" #include "MiniCLTask.h"
#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
#ifdef __SPU__ #ifdef __SPU__
#include <spu_printf.h> #include <spu_printf.h>
@@ -27,9 +29,7 @@ subject to the following restrictions:
#define spu_printf printf #define spu_printf printf
#endif #endif
#define __kernel int gMiniCLNumOutstandingTasks = 0;
#define __global
#define get_global_id(a) guid
struct MiniCLTask_LocalStoreMemory struct MiniCLTask_LocalStoreMemory
{ {
@@ -37,65 +37,23 @@ struct MiniCLTask_LocalStoreMemory
}; };
///////////////////////////////////////////////////
// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid)
{
// get oct-float index into global data array
int iGID = get_global_id(0);
// read inputs into registers
float8 f8InA = a[iGID];
float8 f8InB = b[iGID];
float8 f8Out = (float8)0.0f;
// add the vector elements
f8Out.s0 = f8InA.s0 + f8InB.s0;
f8Out.s1 = f8InA.s1 + f8InB.s1;
f8Out.s2 = f8InA.s2 + f8InB.s2;
f8Out.s3 = f8InA.s3 + f8InB.s3;
f8Out.s4 = f8InA.s4 + f8InB.s4;
f8Out.s5 = f8InA.s5 + f8InB.s5;
f8Out.s6 = f8InA.s6 + f8InB.s6;
f8Out.s7 = f8InA.s7 + f8InB.s7;
// write back out to GMEM
c[get_global_id(0)] = f8Out;
}
///////////////////////////////////////////////////
//-- MAIN METHOD //-- MAIN METHOD
void processMiniCLTask(void* userPtr, void* lsMemory) void processMiniCLTask(void* userPtr, void* lsMemory)
{ {
// BT_PROFILE("processSampleTask"); // BT_PROFILE("processSampleTask");
//MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory; MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr; MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
MiniCLTaskDesc& taskDesc = *taskDescPtr; MiniCLTaskDesc& taskDesc = *taskDescPtr;
printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit); for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
switch (taskDesc.m_kernelProgramId)
{ {
case CMD_MINICL_ADDVECTOR: taskDesc.m_kernel->m_launcher(&taskDesc, i);
{ }
for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
{
VectorAdd(*(const float8**)&taskDesc.m_argData[0][0],*(const float8**)&taskDesc.m_argData[1][0],*(float8**)&taskDesc.m_argData[2][0],i);
}
break;
}
default:
{
printf("error in processMiniCLTask: unknown command id: %d\n",taskDesc.m_kernelProgramId);
}
};
// printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
} }

View File

@@ -16,39 +16,17 @@ subject to the following restrictions:
#ifndef MINICL__TASK_H #ifndef MINICL__TASK_H
#define MINICL__TASK_H #define MINICL__TASK_H
#include "../PlatformDefinitions.h" #include "BulletMultiThreaded/PlatformDefinitions.h"
#include "LinearMath/btScalar.h" #include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedAllocator.h" #include "LinearMath/btAlignedAllocator.h"
enum #define MINICL_MAX_ARGLENGTH (sizeof(void*))
{ #define MINI_CL_MAX_ARG 16
CMD_MINICL_1= 1, #define MINI_CL_MAX_KERNEL_NAME 256
CMD_MINICL_ADDVECTOR
};
struct MiniCLKernel;
struct float8
{
float s0;
float s1;
float s2;
float s3;
float s4;
float s5;
float s6;
float s7;
float8(float scalar)
{
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
}
};
#define MINICL_MAX_ARGLENGTH 128
#define MINI_CL_MAX_ARG 8
ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
{ {
@@ -62,16 +40,19 @@ ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
} }
} }
uint32_t m_taskId; uint32_t m_taskId;
uint32_t m_kernelProgramId; uint32_t m_firstWorkUnit;
uint32_t m_firstWorkUnit; uint32_t m_lastWorkUnit;
uint32_t m_lastWorkUnit;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH]; MiniCLKernel* m_kernel;
int m_argSizes[MINI_CL_MAX_ARG];
void* m_argData[MINI_CL_MAX_ARG];
int m_argSizes[MINI_CL_MAX_ARG];
}; };
extern "C" int gMiniCLNumOutstandingTasks;
void processMiniCLTask(void* userPtr, void* lsMemory); void processMiniCLTask(void* userPtr, void* lsMemory);
void* createMiniCLLocalStoreMemory(); void* createMiniCLLocalStoreMemory();

View File

@@ -14,6 +14,7 @@ subject to the following restrictions:
*/ */
//#define __CELLOS_LV2__ 1 //#define __CELLOS_LV2__ 1
#define __BT_SKIP_UINT64_H 1
#define USE_SAMPLE_PROCESS 1 #define USE_SAMPLE_PROCESS 1
#ifdef USE_SAMPLE_PROCESS #ifdef USE_SAMPLE_PROCESS
@@ -43,21 +44,18 @@ void* SamplelsMemoryFunc()
#else #else
#include "btThreadSupportInterface.h" #include "BulletMultiThreaded/btThreadSupportInterface.h"
//# include "SPUAssert.h" //# include "SPUAssert.h"
#include <string.h> #include <string.h>
#include "MiniCL/cl_platform.h"
extern "C" { extern "C" {
extern char SPU_SAMPLE_ELF_SYMBOL[]; extern char SPU_SAMPLE_ELF_SYMBOL[];
} }
MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks) MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
:m_threadInterface(threadInterface), :m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks) m_maxNumOutstandingTasks(maxNumOutstandingTasks)
@@ -66,6 +64,8 @@ m_maxNumOutstandingTasks(maxNumOutstandingTasks)
m_taskBusy.resize(m_maxNumOutstandingTasks); m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks); m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
m_kernels.resize(0);
for (int i = 0; i < m_maxNumOutstandingTasks; i++) for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{ {
m_taskBusy[i] = false; m_taskBusy[i] = false;
@@ -105,7 +105,7 @@ void MiniCLTaskScheduler::initialize()
} }
void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes) void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
{ {
#ifdef DEBUG_SPU_TASK_SCHEDULING #ifdef DEBUG_SPU_TASK_SCHEDULING
@@ -120,16 +120,18 @@ void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kern
// send task description in event message // send task description in event message
taskDesc.m_firstWorkUnit = firstWorkUnit; taskDesc.m_firstWorkUnit = firstWorkUnit;
taskDesc.m_lastWorkUnit = lastWorkUnit; taskDesc.m_lastWorkUnit = lastWorkUnit;
taskDesc.m_kernelProgramId = kernelProgramId; taskDesc.m_kernel = kernel;
//some bookkeeping to recognize finished tasks //some bookkeeping to recognize finished tasks
taskDesc.m_taskId = m_currentTask; taskDesc.m_taskId = m_currentTask;
for (int i=0;i<MINI_CL_MAX_ARG;i++) // for (int i=0;i<MINI_CL_MAX_ARG;i++)
for (unsigned int i=0; i < kernel->m_numArgs; i++)
{ {
taskDesc.m_argSizes[i] = argSizes[i]; taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
if (taskDesc.m_argSizes[i]) if (taskDesc.m_argSizes[i])
{ {
memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]); taskDesc.m_argData[i] = kernel->m_argData[i];
// memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
} }
} }
} }
@@ -221,6 +223,296 @@ void MiniCLTaskScheduler::flush()
} }
typedef void (*MiniCLKernelLauncher0)(int);
typedef void (*MiniCLKernelLauncher1)(void*, int);
typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
}
static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
guid);
}
static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
guid);
}
static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
guid);
}
static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
guid);
}
static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
guid);
}
static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
guid);
}
static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
guid);
}
static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
guid);
}
static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
guid);
}
static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
guid);
}
static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
guid);
}
static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
taskDesc->m_argData[11],
guid);
}
static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
taskDesc->m_argData[11],
taskDesc->m_argData[12],
guid);
}
static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
taskDesc->m_argData[11],
taskDesc->m_argData[12],
taskDesc->m_argData[13],
guid);
}
static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
taskDesc->m_argData[11],
taskDesc->m_argData[12],
taskDesc->m_argData[13],
taskDesc->m_argData[14],
guid);
}
static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
{
((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
taskDesc->m_argData[1],
taskDesc->m_argData[2],
taskDesc->m_argData[3],
taskDesc->m_argData[4],
taskDesc->m_argData[5],
taskDesc->m_argData[6],
taskDesc->m_argData[7],
taskDesc->m_argData[8],
taskDesc->m_argData[9],
taskDesc->m_argData[10],
taskDesc->m_argData[11],
taskDesc->m_argData[12],
taskDesc->m_argData[13],
taskDesc->m_argData[14],
taskDesc->m_argData[15],
guid);
}
static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] =
{
kernelLauncher0,
kernelLauncher1,
kernelLauncher2,
kernelLauncher3,
kernelLauncher4,
kernelLauncher5,
kernelLauncher6,
kernelLauncher7,
kernelLauncher8,
kernelLauncher9,
kernelLauncher10,
kernelLauncher11,
kernelLauncher12,
kernelLauncher13,
kernelLauncher14,
kernelLauncher15,
kernelLauncher16
};
void MiniCLKernel::updateLauncher()
{
m_launcher = spLauncherList[m_numArgs];
}
struct MiniCLKernelDescEntry
{
void* pCode;
char* pName;
};
static MiniCLKernelDescEntry spKernelDesc[256];
static int sNumKernelDesc = 0;
MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName)
{
for(int i = 0; i < sNumKernelDesc; i++)
{
if(!strcmp(pName, spKernelDesc[i].pName))
{ // already registered
btAssert(spKernelDesc[i].pCode == pCode);
return;
}
}
spKernelDesc[sNumKernelDesc].pCode = pCode;
spKernelDesc[sNumKernelDesc].pName = pName;
sNumKernelDesc++;
}
MiniCLKernel* MiniCLKernel::registerSelf()
{
m_scheduler->registerKernel(this);
for(int i = 0; i < sNumKernelDesc; i++)
{
if(!strcmp(m_name, spKernelDesc[i].pName))
{
m_pCode = spKernelDesc[i].pCode;
return this;
}
}
return NULL;
}
#endif #endif

View File

@@ -21,7 +21,7 @@ subject to the following restrictions:
#include <assert.h> #include <assert.h>
#include "PlatformDefinitions.h" #include "BulletMultiThreaded/PlatformDefinitions.h"
#include <stdlib.h> #include <stdlib.h>
@@ -30,11 +30,10 @@ subject to the following restrictions:
#include "MiniCLTask/MiniCLTask.h" #include "MiniCLTask/MiniCLTask.h"
//just add your commands here, try to keep them globally unique for debugging purposes //just add your commands here, try to keep them globally unique for debugging purposes
#define CMD_SAMPLE_TASK_COMMAND 10 #define CMD_SAMPLE_TASK_COMMAND 10
struct MiniCLKernel;
/// MiniCLTaskScheduler handles SPU processing of collision pairs. /// MiniCLTaskScheduler handles SPU processing of collision pairs.
/// When PPU issues a task, it will look for completed task buffers /// When PPU issues a task, it will look for completed task buffers
@@ -44,7 +43,11 @@ class MiniCLTaskScheduler
// track task buffers that are being used, and total busy tasks // track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy; btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc; btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc;
btAlignedObjectArray<const MiniCLKernel*> m_kernels;
int m_numBusyTasks; int m_numBusyTasks;
// the current task and the current entry to insert a new work unit // the current task and the current entry to insert a new work unit
@@ -68,7 +71,7 @@ public:
///call initialize in the beginning of the frame, before addCollisionPairToTask ///call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize(); void initialize();
void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes); void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush(); void flush();
@@ -78,25 +81,35 @@ public:
return m_threadInterface; return m_threadInterface;
} }
int findProgramCommandIdByName(const char* programName) const int findProgramCommandIdByName(const char* programName) const;
{
return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support
}
int getMaxNumOutstandingTasks() const int getMaxNumOutstandingTasks() const
{ {
return m_maxNumOutstandingTasks; return m_maxNumOutstandingTasks;
} }
void registerKernel(MiniCLKernel* kernel)
{
m_kernels.push_back(kernel);
}
}; };
typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
struct MiniCLKernel struct MiniCLKernel
{ {
MiniCLTaskScheduler* m_scheduler; MiniCLTaskScheduler* m_scheduler;
int m_kernelProgramCommandId; // int m_kernelProgramCommandId;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH]; char m_name[MINI_CL_MAX_KERNEL_NAME];
unsigned int m_numArgs;
kernelLauncherCB m_launcher;
void* m_pCode;
void updateLauncher();
MiniCLKernel* registerSelf();
void* m_argData[MINI_CL_MAX_ARG];
int m_argSizes[MINI_CL_MAX_ARG]; int m_argSizes[MINI_CL_MAX_ARG];
}; };

View File

@@ -19,7 +19,9 @@ typedef union
typedef unsigned char uint8_t; typedef unsigned char uint8_t;
#ifndef __PHYSICS_COMMON_H__ #ifndef __PHYSICS_COMMON_H__
#ifndef __BT_SKIP_UINT64_H
typedef unsigned long int uint64_t; typedef unsigned long int uint64_t;
#endif //__BT_SKIP_UINT64_H
typedef unsigned int uint32_t; typedef unsigned int uint32_t;
#endif //__PHYSICS_COMMON_H__ #endif //__PHYSICS_COMMON_H__
typedef unsigned short uint16_t; typedef unsigned short uint16_t;

View File

@@ -2,11 +2,11 @@
#define __PPU_ADDRESS_SPACE_H #define __PPU_ADDRESS_SPACE_H
#ifdef WIN32 #ifdef _WIN32
//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions //stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
#pragma warning (disable: 4311) #pragma warning (disable: 4311)
#pragma warning (disable: 4312) #pragma warning (disable: 4312)
#endif //WIN32 #endif //_WIN32
#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64) #if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64)
typedef uint64_t ppu_address_t; typedef uint64_t ppu_address_t;

263
src/MiniCL/cl_MiniCL_Defs.h Normal file
View File

@@ -0,0 +1,263 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <float.h>
#include <math.h>
#include "LinearMath/btScalar.h"
#include "MiniCL/cl.h"
#define __kernel
#define __global
#define __local
#define get_global_id(a) __guid_arg
#define get_local_id(a) ((__guid_arg) % gMiniCLNumOutstandingTasks)
#define get_local_size(a) (gMiniCLNumOutstandingTasks)
#define get_group_id(a) ((__guid_arg) / gMiniCLNumOutstandingTasks)
#define CLK_LOCAL_MEM_FENCE 0x01
#define CLK_GLOBAL_MEM_FENCE 0x02
static void barrier(unsigned int a)
{
// TODO : implement
}
ATTRIBUTE_ALIGNED16(struct) float8
{
float s0;
float s1;
float s2;
float s3;
float s4;
float s5;
float s6;
float s7;
float8(float scalar)
{
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
}
};
ATTRIBUTE_ALIGNED16(struct) float4
{
float x,y,z,w;
float4() {}
float4(float v)
{
x = y = z = w = v;
}
float4 operator*(const float4& other)
{
float4 tmp;
tmp.x = x*other.x;
tmp.y = y*other.y;
tmp.z = z*other.z;
tmp.w = w*other.w;
return tmp;
}
float4 operator*(const float& other)
{
float4 tmp;
tmp.x = x*other;
tmp.y = y*other;
tmp.z = z*other;
tmp.w = w*other;
return tmp;
}
float4& operator+=(const float4& other)
{
x += other.x;
y += other.y;
z += other.z;
w += other.w;
return *this;
}
float4& operator-=(const float4& other)
{
x -= other.x;
y -= other.y;
z -= other.z;
w -= other.w;
return *this;
}
float4& operator *=(float scalar)
{
x *= scalar;
y *= scalar;
z *= scalar;
w *= scalar;
return (*this);
}
};
static float4 fabs(const float4& a)
{
float4 tmp;
tmp.x = a.x < 0.f ? 0.f : a.x;
tmp.y = a.y < 0.f ? 0.f : a.y;
tmp.z = a.z < 0.f ? 0.f : a.z;
tmp.w = a.w < 0.f ? 0.f : a.w;
return tmp;
}
static float4 operator+(const float4& a,const float4& b)
{
float4 tmp;
tmp.x = a.x + b.x;
tmp.y = a.y + b.y;
tmp.z = a.z + b.z;
tmp.w = a.w + b.w;
return tmp;
}
static float4 operator-(const float4& a,const float4& b)
{
float4 tmp;
tmp.x = a.x - b.x;
tmp.y = a.y - b.y;
tmp.z = a.z - b.z;
tmp.w = a.w - b.w;
return tmp;
}
static float4 operator*(float a,const float4& b)
{
float4 tmp;
tmp.x = a * b.x;
tmp.y = a * b.y;
tmp.z = a * b.z;
tmp.w = a * b.w;
return tmp;
}
static float dot(const float4&a ,const float4& b)
{
float4 tmp;
tmp.x = a.x*b.x;
tmp.y = a.y*b.y;
tmp.z = a.z*b.z;
tmp.w = a.w*b.w;
return tmp.x+tmp.y+tmp.z+tmp.w;
}
static float4 cross(const float4&a ,const float4& b)
{
float4 tmp;
tmp.x = a.y*b.z - a.z*b.y;
tmp.y = -a.x*b.z + a.z*b.x;
tmp.z = a.x*b.y - a.y*b.x;
tmp.w = 0.f;
return tmp;
}
static float max(float a, float b)
{
return (a >= b) ? a : b;
}
static float min(float a, float b)
{
return (a <= b) ? a : b;
}
static float fmax(float a, float b)
{
return (a >= b) ? a : b;
}
static float fmin(float a, float b)
{
return (a <= b) ? a : b;
}
struct int2
{
int x,y;
};
struct uint2
{
unsigned int x,y;
};
//typedef int2 uint2;
typedef unsigned int uint;
struct int4
{
int x,y,z,w;
};
struct uint4
{
unsigned int x,y,z,w;
uint4() {}
uint4(uint val) { x = y = z = w = val; }
uint4& operator+=(const uint4& other)
{
x += other.x;
y += other.y;
z += other.z;
w += other.w;
return *this;
}
};
static uint4 operator+(const uint4& a,const uint4& b)
{
uint4 tmp;
tmp.x = a.x + b.x;
tmp.y = a.y + b.y;
tmp.z = a.z + b.z;
tmp.w = a.w + b.w;
return tmp;
}
static uint4 operator-(const uint4& a,const uint4& b)
{
uint4 tmp;
tmp.x = a.x - b.x;
tmp.y = a.y - b.y;
tmp.z = a.z - b.z;
tmp.w = a.w - b.w;
return tmp;
}
#define native_sqrt sqrtf
#define native_sin sinf
#define native_cos cosf
#define native_powr powf
#define GUID_ARG ,int __guid_arg
#define GUID_ARG_VAL ,__guid_arg
#define as_int(a) (*((int*)&(a)))
extern "C" int gMiniCLNumOutstandingTasks;
// extern "C" void __kernel_func(); \

View File

@@ -1,244 +1,254 @@
/********************************************************************************** /**********************************************************************************
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2009 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials. * in all copies or substantial portions of the Materials.
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/ **********************************************************************************/
#ifndef __CL_PLATFORM_H #ifndef __CL_PLATFORM_H
#define __CL_PLATFORM_H #define __CL_PLATFORM_H
#ifdef __APPLE__ #define CL_PLATFORM_MINI_CL 0x12345
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
#include <AvailabilityMacros.h> struct MiniCLKernelDesc
#endif {
MiniCLKernelDesc(void* pCode, char* pName);
#ifdef __cplusplus };
extern "C" {
#endif #define MINICL_REGISTER(__kernel_func) static MiniCLKernelDesc __kernel_func##Desc(__kernel_func, #__kernel_func);
#define CL_API_ENTRY
#define CL_API_CALL #ifdef __APPLE__
#ifdef __APPLE__ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #include <AvailabilityMacros.h>
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) #endif
#else
#define CL_API_SUFFIX__VERSION_1_0 #ifdef __cplusplus
#define CL_EXTENSION_WEAK_LINK extern "C" {
#endif #endif
#ifdef _WIN32 #define CL_API_ENTRY
typedef signed __int8 int8_t; #define CL_API_CALL
typedef unsigned __int8 uint8_t; #ifdef __APPLE__
typedef signed __int16 int16_t; #define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
typedef unsigned __int16 uint16_t; #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
typedef signed __int32 int32_t; #else
typedef unsigned __int32 uint32_t; #define CL_API_SUFFIX__VERSION_1_0
typedef signed __int64 int64_t; #define CL_EXTENSION_WEAK_LINK
typedef unsigned __int64 uint64_t; #endif
typedef int8_t cl_char; #ifdef _WIN32
typedef uint8_t cl_uchar; typedef signed __int8 int8_t;
typedef int16_t cl_short ; typedef unsigned __int8 uint8_t;
typedef uint16_t cl_ushort ; typedef signed __int16 int16_t;
typedef int32_t cl_int ; typedef unsigned __int16 uint16_t;
typedef uint32_t cl_uint ; typedef signed __int32 int32_t;
typedef int64_t cl_long ; typedef unsigned __int32 uint32_t;
typedef uint64_t cl_ulong ; typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef uint16_t cl_half ;
typedef float cl_float ; typedef int8_t cl_char;
typedef double cl_double ; typedef uint8_t cl_uchar;
typedef int16_t cl_short ;
typedef uint16_t cl_ushort ;
typedef int8_t cl_char2[2] ; typedef int32_t cl_int ;
typedef int8_t cl_char4[4] ; typedef uint32_t cl_uint ;
typedef int8_t cl_char8[8] ; typedef int64_t cl_long ;
typedef int8_t cl_char16[16] ; typedef uint64_t cl_ulong ;
typedef uint8_t cl_uchar2[2] ;
typedef uint8_t cl_uchar4[4] ; typedef uint16_t cl_half ;
typedef uint8_t cl_uchar8[8] ; typedef float cl_float ;
typedef uint8_t cl_uchar16[16] ; typedef double cl_double ;
typedef int16_t cl_short2[2] ;
typedef int16_t cl_short4[4] ; typedef int8_t cl_char2[2] ;
typedef int16_t cl_short8[8] ; typedef int8_t cl_char4[4] ;
typedef int16_t cl_short16[16] ; typedef int8_t cl_char8[8] ;
typedef uint16_t cl_ushort2[2] ; typedef int8_t cl_char16[16] ;
typedef uint16_t cl_ushort4[4] ; typedef uint8_t cl_uchar2[2] ;
typedef uint16_t cl_ushort8[8] ; typedef uint8_t cl_uchar4[4] ;
typedef uint16_t cl_ushort16[16] ; typedef uint8_t cl_uchar8[8] ;
typedef uint8_t cl_uchar16[16] ;
typedef int32_t cl_int2[2] ;
typedef int32_t cl_int4[4] ; typedef int16_t cl_short2[2] ;
typedef int32_t cl_int8[8] ; typedef int16_t cl_short4[4] ;
typedef int32_t cl_int16[16] ; typedef int16_t cl_short8[8] ;
typedef uint32_t cl_uint2[2] ; typedef int16_t cl_short16[16] ;
typedef uint32_t cl_uint4[4] ; typedef uint16_t cl_ushort2[2] ;
typedef uint32_t cl_uint8[8] ; typedef uint16_t cl_ushort4[4] ;
typedef uint32_t cl_uint16[16] ; typedef uint16_t cl_ushort8[8] ;
typedef uint16_t cl_ushort16[16] ;
typedef int64_t cl_long2[2] ;
typedef int64_t cl_long4[4] ; typedef int32_t cl_int2[2] ;
typedef int64_t cl_long8[8] ; typedef int32_t cl_int4[4] ;
typedef int64_t cl_long16[16] ; typedef int32_t cl_int8[8] ;
typedef uint64_t cl_ulong2[2] ; typedef int32_t cl_int16[16] ;
typedef uint64_t cl_ulong4[4] ; typedef uint32_t cl_uint2[2] ;
typedef uint64_t cl_ulong8[8] ; typedef uint32_t cl_uint4[4] ;
typedef uint64_t cl_ulong16[16] ; typedef uint32_t cl_uint8[8] ;
typedef uint32_t cl_uint16[16] ;
typedef float cl_float2[2] ;
typedef float cl_float4[4] ; typedef int64_t cl_long2[2] ;
typedef float cl_float8[8] ; typedef int64_t cl_long4[4] ;
typedef float cl_float16[16] ; typedef int64_t cl_long8[8] ;
typedef int64_t cl_long16[16] ;
typedef double cl_double2[2] ; typedef uint64_t cl_ulong2[2] ;
typedef double cl_double4[4] ; typedef uint64_t cl_ulong4[4] ;
typedef double cl_double8[8] ; typedef uint64_t cl_ulong8[8] ;
typedef double cl_double16[16] ; typedef uint64_t cl_ulong16[16] ;
typedef float cl_float2[2] ;
#else typedef float cl_float4[4] ;
#include <stdint.h> typedef float cl_float8[8] ;
typedef float cl_float16[16] ;
/* scalar types */
typedef int8_t cl_char; typedef double cl_double2[2] ;
typedef uint8_t cl_uchar; typedef double cl_double4[4] ;
typedef int16_t cl_short __attribute__((aligned(2))); typedef double cl_double8[8] ;
typedef uint16_t cl_ushort __attribute__((aligned(2))); typedef double cl_double16[16] ;
typedef int32_t cl_int __attribute__((aligned(4)));
typedef uint32_t cl_uint __attribute__((aligned(4)));
typedef int64_t cl_long __attribute__((aligned(8))); #else
typedef uint64_t cl_ulong __attribute__((aligned(8))); #include <stdint.h>
typedef uint16_t cl_half __attribute__((aligned(2))); /* scalar types */
typedef float cl_float __attribute__((aligned(4))); typedef int8_t cl_char;
typedef double cl_double __attribute__((aligned(8))); typedef uint8_t cl_uchar;
typedef int16_t cl_short __attribute__((aligned(2)));
typedef uint16_t cl_ushort __attribute__((aligned(2)));
/* typedef int32_t cl_int __attribute__((aligned(4)));
* Vector types typedef uint32_t cl_uint __attribute__((aligned(4)));
* typedef int64_t cl_long __attribute__((aligned(8)));
* Note: OpenCL requires that all types be naturally aligned. typedef uint64_t cl_ulong __attribute__((aligned(8)));
* This means that vector types must be naturally aligned.
* For example, a vector of four floats must be aligned to typedef uint16_t cl_half __attribute__((aligned(2)));
* a 16 byte boundary (calculated as 4 * the natural 4-byte typedef float cl_float __attribute__((aligned(4)));
* alignment of the float). The alignment qualifiers here typedef double cl_double __attribute__((aligned(8)));
* will only function properly if your compiler supports them
* and if you don't actively work to defeat them. For example,
* in order for a cl_float4 to be 16 byte aligned in a struct, /*
* the start of the struct must itself be 16-byte aligned. * Vector types
* *
* Maintaining proper alignment is the user's responsibility. * Note: OpenCL requires that all types be naturally aligned.
*/ * This means that vector types must be naturally aligned.
typedef int8_t cl_char2[2] __attribute__((aligned(2))); * For example, a vector of four floats must be aligned to
typedef int8_t cl_char4[4] __attribute__((aligned(4))); * a 16 byte boundary (calculated as 4 * the natural 4-byte
typedef int8_t cl_char8[8] __attribute__((aligned(8))); * alignment of the float). The alignment qualifiers here
typedef int8_t cl_char16[16] __attribute__((aligned(16))); * will only function properly if your compiler supports them
typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); * and if you don't actively work to defeat them. For example,
typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); * in order for a cl_float4 to be 16 byte aligned in a struct,
typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); * the start of the struct must itself be 16-byte aligned.
typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); *
* Maintaining proper alignment is the user's responsibility.
typedef int16_t cl_short2[2] __attribute__((aligned(4))); */
typedef int16_t cl_short4[4] __attribute__((aligned(8))); typedef int8_t cl_char2[2] __attribute__((aligned(2)));
typedef int16_t cl_short8[8] __attribute__((aligned(16))); typedef int8_t cl_char4[4] __attribute__((aligned(4)));
typedef int16_t cl_short16[16] __attribute__((aligned(32))); typedef int8_t cl_char8[8] __attribute__((aligned(8)));
typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); typedef int8_t cl_char16[16] __attribute__((aligned(16)));
typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); typedef uint8_t cl_uchar2[2] __attribute__((aligned(2)));
typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); typedef uint8_t cl_uchar4[4] __attribute__((aligned(4)));
typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); typedef uint8_t cl_uchar8[8] __attribute__((aligned(8)));
typedef uint8_t cl_uchar16[16] __attribute__((aligned(16)));
typedef int32_t cl_int2[2] __attribute__((aligned(8)));
typedef int32_t cl_int4[4] __attribute__((aligned(16))); typedef int16_t cl_short2[2] __attribute__((aligned(4)));
typedef int32_t cl_int8[8] __attribute__((aligned(32))); typedef int16_t cl_short4[4] __attribute__((aligned(8)));
typedef int32_t cl_int16[16] __attribute__((aligned(64))); typedef int16_t cl_short8[8] __attribute__((aligned(16)));
typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); typedef int16_t cl_short16[16] __attribute__((aligned(32)));
typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); typedef uint16_t cl_ushort2[2] __attribute__((aligned(4)));
typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); typedef uint16_t cl_ushort4[4] __attribute__((aligned(8)));
typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); typedef uint16_t cl_ushort8[8] __attribute__((aligned(16)));
typedef uint16_t cl_ushort16[16] __attribute__((aligned(32)));
typedef int64_t cl_long2[2] __attribute__((aligned(16)));
typedef int64_t cl_long4[4] __attribute__((aligned(32))); typedef int32_t cl_int2[2] __attribute__((aligned(8)));
typedef int64_t cl_long8[8] __attribute__((aligned(64))); typedef int32_t cl_int4[4] __attribute__((aligned(16)));
typedef int64_t cl_long16[16] __attribute__((aligned(128))); typedef int32_t cl_int8[8] __attribute__((aligned(32)));
typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); typedef int32_t cl_int16[16] __attribute__((aligned(64)));
typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); typedef uint32_t cl_uint2[2] __attribute__((aligned(8)));
typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); typedef uint32_t cl_uint4[4] __attribute__((aligned(16)));
typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); typedef uint32_t cl_uint8[8] __attribute__((aligned(32)));
typedef uint32_t cl_uint16[16] __attribute__((aligned(64)));
typedef float cl_float2[2] __attribute__((aligned(8)));
typedef float cl_float4[4] __attribute__((aligned(16))); typedef int64_t cl_long2[2] __attribute__((aligned(16)));
typedef float cl_float8[8] __attribute__((aligned(32))); typedef int64_t cl_long4[4] __attribute__((aligned(32)));
typedef float cl_float16[16] __attribute__((aligned(64))); typedef int64_t cl_long8[8] __attribute__((aligned(64)));
typedef int64_t cl_long16[16] __attribute__((aligned(128)));
typedef double cl_double2[2] __attribute__((aligned(16))); typedef uint64_t cl_ulong2[2] __attribute__((aligned(16)));
typedef double cl_double4[4] __attribute__((aligned(32))); typedef uint64_t cl_ulong4[4] __attribute__((aligned(32)));
typedef double cl_double8[8] __attribute__((aligned(64))); typedef uint64_t cl_ulong8[8] __attribute__((aligned(64)));
typedef double cl_double16[16] __attribute__((aligned(128))); typedef uint64_t cl_ulong16[16] __attribute__((aligned(128)));
#endif
typedef float cl_float2[2] __attribute__((aligned(8)));
#include <stddef.h> typedef float cl_float4[4] __attribute__((aligned(16)));
typedef float cl_float8[8] __attribute__((aligned(32)));
/* and a few goodies to go with them */ typedef float cl_float16[16] __attribute__((aligned(64)));
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127 typedef double cl_double2[2] __attribute__((aligned(16)));
#define CL_SCHAR_MIN (-127-1) typedef double cl_double4[4] __attribute__((aligned(32)));
#define CL_CHAR_MAX CL_SCHAR_MAX typedef double cl_double8[8] __attribute__((aligned(64)));
#define CL_CHAR_MIN CL_SCHAR_MIN typedef double cl_double16[16] __attribute__((aligned(128)));
#define CL_UCHAR_MAX 255 #endif
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1) #include <stddef.h>
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647 /* and a few goodies to go with them */
#define CL_INT_MIN (-2147483647-1) #define CL_CHAR_BIT 8
#define CL_UINT_MAX 0xffffffffU #define CL_SCHAR_MAX 127
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_SCHAR_MIN (-127-1)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_FLT_DIG 6 #define CL_SHRT_MAX 32767
#define CL_FLT_MANT_DIG 24 #define CL_SHRT_MIN (-32767-1)
#define CL_FLT_MAX_10_EXP +38 #define CL_USHRT_MAX 65535
#define CL_FLT_MAX_EXP +128 #define CL_INT_MAX 2147483647
#define CL_FLT_MIN_10_EXP -37 #define CL_INT_MIN (-2147483647-1)
#define CL_FLT_MIN_EXP -125 #define CL_UINT_MAX 0xffffffffU
#define CL_FLT_RADIX 2 #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_FLT_MAX 0x1.fffffep127f #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_FLT_MIN 0x1.0p-126f #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_EPSILON 0x1.0p-23f
#define CL_FLT_DIG 6
#define CL_DBL_DIG 15 #define CL_FLT_MANT_DIG 24
#define CL_DBL_MANT_DIG 53 #define CL_FLT_MAX_10_EXP +38
#define CL_DBL_MAX_10_EXP +308 #define CL_FLT_MAX_EXP +128
#define CL_DBL_MAX_EXP +1024 #define CL_FLT_MIN_10_EXP -37
#define CL_DBL_MIN_10_EXP -307 #define CL_FLT_MIN_EXP -125
#define CL_DBL_MIN_EXP -1021 #define CL_FLT_RADIX 2
#define CL_DBL_RADIX 2 #define CL_FLT_MAX 0x1.fffffep127f
#define CL_DBL_MAX 0x1.fffffffffffffp1023 #define CL_FLT_MIN 0x1.0p-126f
#define CL_DBL_MIN 0x1.0p-1022 #define CL_FLT_EPSILON 0x1.0p-23f
#define CL_DBL_EPSILON 0x1.0p-52
#define CL_DBL_DIG 15
/* There are no vector types for half */ #define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#ifdef __cplusplus #define CL_DBL_MAX_EXP +1024
} #define CL_DBL_MIN_10_EXP -307
#endif #define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#endif // __CL_PLATFORM_H #define CL_DBL_MAX 0x1.fffffffffffffp1023
#define CL_DBL_MIN 0x1.0p-1022
#define CL_DBL_EPSILON 0x1.0p-52
/* There are no vector types for half */
#ifdef __cplusplus
}
#endif
#endif // __CL_PLATFORM_H