move some recent MiniCL work to trunk
This commit is contained in:
@@ -1,26 +1,27 @@
|
||||
|
||||
IF (USE_GLUT)
|
||||
|
||||
SET(SharedDemoSubdirs
|
||||
OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld
|
||||
CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer
|
||||
RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
|
||||
UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo
|
||||
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
|
||||
DoublePrecisionDemo ConcaveDemo CollisionDemo
|
||||
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
|
||||
MultiMaterialDemo SerializeDemo InternalEdgeDemo
|
||||
)
|
||||
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P MATCHES "8")
|
||||
SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld
|
||||
CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer
|
||||
RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo
|
||||
UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo
|
||||
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
|
||||
DoublePrecisionDemo ConcaveDemo CollisionDemo
|
||||
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
|
||||
MultiMaterialDemo SerializeDemo InternalEdgeDemo
|
||||
SUBDIRS( OpenGL
|
||||
${SharedDemoSubdirs}
|
||||
)
|
||||
else (CMAKE_SIZEOF_VOID_P MATCHES "8")
|
||||
SUBDIRS( OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld
|
||||
MultiThreadedDemo CcdPhysicsDemo ConstraintDemo SliderConstraintDemo Raytracer
|
||||
GenericJointDemo RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo
|
||||
VehicleDemo UserCollisionAlgorithm CharacterDemo SoftDemo
|
||||
HeightFieldFluidDemo
|
||||
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
|
||||
DoublePrecisionDemo ConcaveDemo CollisionDemo
|
||||
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
|
||||
MultiMaterialDemo SerializeDemo InternalEdgeDemo
|
||||
SUBDIRS( OpenGL
|
||||
${SharedDemoSubdirs}
|
||||
MultiThreadedDemo
|
||||
MiniCL_VectorAdd
|
||||
)
|
||||
endif (CMAKE_SIZEOF_VOID_P MATCHES "8")
|
||||
|
||||
@@ -40,6 +41,7 @@ SUBDIRS(
|
||||
GenericJointDemo
|
||||
SerializeDemo
|
||||
SoftDemo
|
||||
MiniCL_VectorAdd
|
||||
)
|
||||
|
||||
ENDIF (USE_GLUT)
|
||||
@@ -11,7 +11,7 @@ LINK_LIBRARIES(
|
||||
|
||||
ADD_EXECUTABLE(AppMiniCLVectorAdd
|
||||
MiniCL_VectorAdd.cpp
|
||||
MiniCL.cpp
|
||||
VectorAddKernels.cl
|
||||
)
|
||||
|
||||
IF (UNIX)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
SubDir TOP Demos MiniCL_VectorAdd ;
|
||||
|
||||
BulletMiniCLDemo MiniCL_VectorAdd : [ Wildcard *.h *.cpp ] ;
|
||||
|
||||
MsvcIncDirs MiniCL_VectorAdd : "../../src" ;
|
||||
@@ -5,7 +5,13 @@
|
||||
///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
|
||||
///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
|
||||
|
||||
#include <MiniCL/cl.h>
|
||||
#define USE_MINICL 1
|
||||
#ifdef USE_MINICL
|
||||
#include "MiniCL/cl.h"
|
||||
#else //USE_MINICL
|
||||
#include <CL/cl.h>
|
||||
#endif//USE_MINICL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
@@ -170,3 +176,13 @@ int main(int argc, char **argv)
|
||||
free(srcB);
|
||||
free (dst);
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_MINICL
|
||||
#include "MiniCL/cl_MiniCL_Defs.h"
|
||||
extern "C"
|
||||
{
|
||||
#include "VectorAddKernels.cl"
|
||||
}
|
||||
MINICL_REGISTER(VectorAdd)
|
||||
#endif//USE_MINICL
|
||||
47
Demos/MiniCL_VectorAdd/VectorAddKernels.cl
Normal file
47
Demos/MiniCL_VectorAdd/VectorAddKernels.cl
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation
|
||||
#ifndef GUID_ARG
|
||||
#define GUID_ARG
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// OpenCL Kernel Function for element by element vector addition
|
||||
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG)
|
||||
{
|
||||
// get oct-float index into global data array
|
||||
int iGID = get_global_id(0);
|
||||
|
||||
// read inputs into registers
|
||||
float8 f8InA = a[iGID];
|
||||
float8 f8InB = b[iGID];
|
||||
float8 f8Out = (float8)0.0f;
|
||||
|
||||
|
||||
// add the vector elements
|
||||
f8Out.s0 = f8InA.s0 + f8InB.s0;
|
||||
f8Out.s1 = f8InA.s1 + f8InB.s1;
|
||||
f8Out.s2 = f8InA.s2 + f8InB.s2;
|
||||
f8Out.s3 = f8InA.s3 + f8InB.s3;
|
||||
f8Out.s4 = f8InA.s4 + f8InB.s4;
|
||||
f8Out.s5 = f8InA.s5 + f8InB.s5;
|
||||
f8Out.s6 = f8InA.s6 + f8InB.s6;
|
||||
f8Out.s7 = f8InA.s7 + f8InB.s7;
|
||||
|
||||
// write back out to GMEM
|
||||
c[get_global_id(0)] = f8Out;
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@ ADD_LIBRARY(BulletMultiThreaded
|
||||
btGpuUtilsSharedDefs.h
|
||||
|
||||
#MiniCL provides a small subset of OpenCL
|
||||
MiniCL.cpp
|
||||
MiniCLTaskScheduler.cpp
|
||||
MiniCLTaskScheduler.h
|
||||
MiniCLTask/MiniCLTask.cpp
|
||||
@@ -63,6 +64,7 @@ ADD_LIBRARY(BulletMultiThreaded
|
||||
../MiniCL/cl.h
|
||||
../MiniCL/cl_gl.h
|
||||
../MiniCL/cl_platform.h
|
||||
../MiniCL/cl_MiniCL_Defs.h
|
||||
)
|
||||
|
||||
IF (BUILD_SHARED_LIBS)
|
||||
|
||||
@@ -1,346 +1,512 @@
|
||||
|
||||
#include <MiniCL/cl.h>
|
||||
#define __PHYSICS_COMMON_H__ 1
|
||||
#ifdef WIN32
|
||||
#include "BulletMultiThreaded/Win32ThreadSupport.h"
|
||||
#else
|
||||
#include "BulletMultiThreaded/SequentialThreadSupport.h"
|
||||
#endif
|
||||
#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
|
||||
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
|
||||
#include "LinearMath/btMinMax.h"
|
||||
|
||||
/*
|
||||
m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
|
||||
"collision",
|
||||
processCollisionTask,
|
||||
createCollisionLocalStoreMemory,
|
||||
maxNumOutstandingTasks));
|
||||
|
||||
if (!m_spuCollisionTaskProcess)
|
||||
m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
|
||||
|
||||
m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
|
||||
|
||||
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
|
||||
|
||||
//make sure all SPU work is done
|
||||
m_spuCollisionTaskProcess->flush2();
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
|
||||
cl_device_id device ,
|
||||
cl_device_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
switch (param_name)
|
||||
{
|
||||
case CL_DEVICE_NAME:
|
||||
{
|
||||
char deviceName[] = "CPU";
|
||||
int nameLen = strlen(deviceName)+1;
|
||||
assert(param_value_size>strlen(deviceName));
|
||||
if (nameLen < param_value_size)
|
||||
{
|
||||
sprintf((char*)param_value,"CPU");
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_TYPE:
|
||||
{
|
||||
if (param_value_size>=sizeof(cl_device_type))
|
||||
{
|
||||
cl_device_type* deviceType = (cl_device_type*)param_value;
|
||||
*deviceType = CL_DEVICE_TYPE_CPU;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_MAX_COMPUTE_UNITS:
|
||||
{
|
||||
if (param_value_size>=sizeof(cl_uint))
|
||||
{
|
||||
cl_uint* numUnits = (cl_uint*)param_value;
|
||||
*numUnits= 4;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_MAX_WORK_ITEM_SIZES:
|
||||
{
|
||||
size_t workitem_size[3];
|
||||
|
||||
if (param_value_size>=sizeof(workitem_size))
|
||||
{
|
||||
size_t* workItemSize = (size_t*)param_value;
|
||||
workItemSize[0] = 64;
|
||||
workItemSize[1] = 24;
|
||||
workItemSize[2] = 16;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printf("error: unsupported param_name:%d\n",param_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Enqueued Commands APIs
|
||||
CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
|
||||
cl_mem buffer ,
|
||||
cl_bool /* blocking_read */,
|
||||
size_t /* offset */,
|
||||
size_t cb ,
|
||||
void * ptr ,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
|
||||
|
||||
///wait for all work items to be completed
|
||||
scheduler->flush();
|
||||
|
||||
memcpy(ptr,buffer,cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel clKernel ,
|
||||
cl_uint work_dim ,
|
||||
const size_t * /* global_work_offset */,
|
||||
const size_t * global_work_size ,
|
||||
const size_t * /* local_work_size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
|
||||
MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
|
||||
for (int ii=0;ii<work_dim;ii++)
|
||||
{
|
||||
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
|
||||
int numWorkItems = global_work_size[ii];
|
||||
|
||||
//at minimum 64 work items per task
|
||||
int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
|
||||
|
||||
for (int t=0;t<numWorkItems;)
|
||||
{
|
||||
//Performance Hint: tweak this number during benchmarking
|
||||
int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
|
||||
kernel->m_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes);
|
||||
t = endIndex;
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
||||
void* bla = 0;
|
||||
|
||||
scheduler->issueTask(bla,2,3);
|
||||
scheduler->flush();
|
||||
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
|
||||
cl_uint arg_index ,
|
||||
size_t arg_size ,
|
||||
const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
|
||||
assert(arg_size < MINICL_MAX_ARGLENGTH);
|
||||
if (arg_index>MINI_CL_MAX_ARG)
|
||||
{
|
||||
printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
|
||||
} else
|
||||
{
|
||||
if (arg_size>=MINICL_MAX_ARGLENGTH)
|
||||
{
|
||||
printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
|
||||
} else
|
||||
{
|
||||
memcpy( kernel->m_argData[arg_index],arg_value,arg_size);
|
||||
kernel->m_argSizes[arg_index] = arg_size;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Kernel Object APIs
|
||||
CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
|
||||
const char * kernel_name ,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
|
||||
MiniCLKernel* kernel = new MiniCLKernel();
|
||||
|
||||
kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
|
||||
kernel->m_scheduler = scheduler;
|
||||
|
||||
return (cl_kernel)kernel;
|
||||
|
||||
}
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const char * /* options */,
|
||||
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
|
||||
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const size_t * /* lengths */,
|
||||
const unsigned char ** /* binaries */,
|
||||
cl_int * /* binary_status */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return (cl_program)context;
|
||||
}
|
||||
|
||||
|
||||
// Memory Object APIs
|
||||
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
|
||||
cl_mem_flags flags ,
|
||||
size_t size,
|
||||
void * host_ptr ,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
cl_mem buf = (cl_mem)malloc(size);
|
||||
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
|
||||
{
|
||||
memcpy(buf,host_ptr,size);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Command Queue APIs
|
||||
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
|
||||
cl_device_id /* device */,
|
||||
cl_command_queue_properties /* properties */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return (cl_command_queue) context;
|
||||
}
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
|
||||
cl_context_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
switch (param_name)
|
||||
{
|
||||
case CL_CONTEXT_DEVICES:
|
||||
{
|
||||
if (!param_value_size)
|
||||
{
|
||||
*param_value_size_ret = 13;
|
||||
} else
|
||||
{
|
||||
sprintf((char*)param_value,"MiniCL_Test.");
|
||||
}
|
||||
break;
|
||||
};
|
||||
default:
|
||||
{
|
||||
printf("unsupported\n");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
|
||||
cl_device_type /* device_type */,
|
||||
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
|
||||
void * /* user_data */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
int maxNumOutstandingTasks = 4;
|
||||
|
||||
#ifdef WIN32
|
||||
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
|
||||
"MiniCL",
|
||||
processMiniCLTask, //processCollisionTask,
|
||||
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
|
||||
maxNumOutstandingTasks));
|
||||
#else
|
||||
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
|
||||
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
|
||||
|
||||
return (cl_context)scheduler;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
|
||||
|
||||
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
|
||||
delete scheduler;
|
||||
delete threadSupport;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
Copyright (C) 2010 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
#include "MiniCL/cl.h"
|
||||
#define __PHYSICS_COMMON_H__ 1
|
||||
#ifdef _WIN32
|
||||
#include "BulletMultiThreaded/Win32ThreadSupport.h"
|
||||
#endif
|
||||
|
||||
#include "BulletMultiThreaded/SequentialThreadSupport.h"
|
||||
#include "MiniCLTaskScheduler.h"
|
||||
#include "MiniCLTask/MiniCLTask.h"
|
||||
#include "LinearMath/btMinMax.h"
|
||||
|
||||
//#define DEBUG_MINICL_KERNELS 1
|
||||
|
||||
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
|
||||
cl_device_id device ,
|
||||
cl_device_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
switch (param_name)
|
||||
{
|
||||
case CL_DEVICE_NAME:
|
||||
{
|
||||
char deviceName[] = "CPU";
|
||||
unsigned int nameLen = strlen(deviceName)+1;
|
||||
assert(param_value_size>strlen(deviceName));
|
||||
if (nameLen < param_value_size)
|
||||
{
|
||||
sprintf_s((char*)param_value,param_value_size, "CPU");
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_TYPE:
|
||||
{
|
||||
if (param_value_size>=sizeof(cl_device_type))
|
||||
{
|
||||
cl_device_type* deviceType = (cl_device_type*)param_value;
|
||||
*deviceType = CL_DEVICE_TYPE_CPU;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_MAX_COMPUTE_UNITS:
|
||||
{
|
||||
if (param_value_size>=sizeof(cl_uint))
|
||||
{
|
||||
cl_uint* numUnits = (cl_uint*)param_value;
|
||||
*numUnits= 4;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_MAX_WORK_ITEM_SIZES:
|
||||
{
|
||||
size_t workitem_size[3];
|
||||
|
||||
if (param_value_size>=sizeof(workitem_size))
|
||||
{
|
||||
size_t* workItemSize = (size_t*)param_value;
|
||||
workItemSize[0] = 64;
|
||||
workItemSize[1] = 24;
|
||||
workItemSize[2] = 16;
|
||||
} else
|
||||
{
|
||||
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CL_DEVICE_MAX_CLOCK_FREQUENCY:
|
||||
{
|
||||
cl_uint* clock_frequency = (cl_uint*)param_value;
|
||||
*clock_frequency = 3*1024;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
printf("error: unsupported param_name:%d\n",param_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Enqueued Commands APIs
|
||||
CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
|
||||
cl_mem buffer ,
|
||||
cl_bool /* blocking_read */,
|
||||
size_t offset ,
|
||||
size_t cb ,
|
||||
void * ptr ,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
|
||||
|
||||
///wait for all work items to be completed
|
||||
scheduler->flush();
|
||||
|
||||
memcpy(ptr,(char*)buffer + offset,cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */,
|
||||
cl_device_id /* device */,
|
||||
cl_program_build_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Program Object APIs
|
||||
CL_API_ENTRY cl_program
|
||||
clCreateProgramWithSource(cl_context context ,
|
||||
cl_uint /* count */,
|
||||
const char ** /* strings */,
|
||||
const size_t * /* lengths */,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
*errcode_ret = CL_SUCCESS;
|
||||
return (cl_program)context;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue ,
|
||||
cl_mem buffer ,
|
||||
cl_bool /* blocking_read */,
|
||||
size_t offset,
|
||||
size_t cb ,
|
||||
const void * ptr ,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
|
||||
|
||||
///wait for all work items to be completed
|
||||
scheduler->flush();
|
||||
|
||||
memcpy((char*)buffer + offset, ptr,cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
|
||||
///wait for all work items to be completed
|
||||
scheduler->flush();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel clKernel ,
|
||||
cl_uint work_dim ,
|
||||
const size_t * /* global_work_offset */,
|
||||
const size_t * global_work_size ,
|
||||
const size_t * /* local_work_size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
|
||||
MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
|
||||
for (unsigned int ii=0;ii<work_dim;ii++)
|
||||
{
|
||||
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
|
||||
int numWorkItems = global_work_size[ii];
|
||||
|
||||
//at minimum 64 work items per task
|
||||
int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
|
||||
|
||||
for (int t=0;t<numWorkItems;)
|
||||
{
|
||||
//Performance Hint: tweak this number during benchmarking
|
||||
int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
|
||||
kernel->m_scheduler->issueTask(t, endIndex, kernel);
|
||||
t = endIndex;
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
||||
void* bla = 0;
|
||||
|
||||
scheduler->issueTask(bla,2,3);
|
||||
scheduler->flush();
|
||||
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define LOCAL_BUF_SIZE 32768
|
||||
static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
|
||||
static int* spLocalBufCurr = NULL;
|
||||
static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
|
||||
static void* localBufMalloc(int size)
|
||||
{
|
||||
int size16 = (size + 15) >> 4; // in 16-byte units
|
||||
if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
|
||||
{ // reset
|
||||
spLocalBufCurr = sLocalMemBuf;
|
||||
while((int)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
|
||||
sLocalBufUsed = 0;
|
||||
}
|
||||
void* ret = spLocalBufCurr;
|
||||
spLocalBufCurr += size16 * 4;
|
||||
sLocalBufUsed += size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
|
||||
cl_uint arg_index ,
|
||||
size_t arg_size ,
|
||||
const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
|
||||
btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
|
||||
if (arg_index>MINI_CL_MAX_ARG)
|
||||
{
|
||||
printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
|
||||
} else
|
||||
{
|
||||
// if (arg_size>=MINICL_MAX_ARGLENGTH)
|
||||
if (arg_size != MINICL_MAX_ARGLENGTH)
|
||||
{
|
||||
printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(arg_value == NULL)
|
||||
{ // this is only for __local memory qualifier
|
||||
void* ptr = localBufMalloc(arg_size);
|
||||
kernel->m_argData[arg_index] = ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
|
||||
}
|
||||
kernel->m_argSizes[arg_index] = arg_size;
|
||||
if(arg_index >= kernel->m_numArgs)
|
||||
{
|
||||
kernel->m_numArgs = arg_index + 1;
|
||||
kernel->updateLauncher();
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Kernel Object APIs
|
||||
CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
|
||||
const char * kernel_name ,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
|
||||
MiniCLKernel* kernel = new MiniCLKernel();
|
||||
int nameLen = strlen(kernel_name);
|
||||
if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
|
||||
{
|
||||
*errcode_ret = CL_INVALID_KERNEL_NAME;
|
||||
return NULL;
|
||||
}
|
||||
strcpy_s(kernel->m_name, kernel_name);
|
||||
kernel->m_numArgs = 0;
|
||||
|
||||
//kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
|
||||
//if (kernel->m_kernelProgramCommandId>=0)
|
||||
//{
|
||||
// *errcode_ret = CL_SUCCESS;
|
||||
//} else
|
||||
//{
|
||||
// *errcode_ret = CL_INVALID_KERNEL_NAME;
|
||||
//}
|
||||
kernel->m_scheduler = scheduler;
|
||||
if(kernel->registerSelf() == NULL)
|
||||
{
|
||||
*errcode_ret = CL_INVALID_KERNEL_NAME;
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*errcode_ret = CL_SUCCESS;
|
||||
}
|
||||
|
||||
return (cl_kernel)kernel;
|
||||
|
||||
}
|
||||
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const char * /* options */,
|
||||
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
|
||||
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const size_t * /* lengths */,
|
||||
const unsigned char ** /* binaries */,
|
||||
cl_int * /* binary_status */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return (cl_program)context;
|
||||
}
|
||||
|
||||
|
||||
// Memory Object APIs
|
||||
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
|
||||
cl_mem_flags flags ,
|
||||
size_t size,
|
||||
void * host_ptr ,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
cl_mem buf = (cl_mem)malloc(size);
|
||||
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
|
||||
{
|
||||
memcpy(buf,host_ptr,size);
|
||||
}
|
||||
*errcode_ret = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Command Queue APIs
|
||||
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
|
||||
cl_device_id /* device */,
|
||||
cl_command_queue_properties /* properties */,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
*errcode_ret = 0;
|
||||
return (cl_command_queue) context;
|
||||
}
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
|
||||
cl_context_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
switch (param_name)
|
||||
{
|
||||
case CL_CONTEXT_DEVICES:
|
||||
{
|
||||
if (!param_value_size)
|
||||
{
|
||||
*param_value_size_ret = 13;
|
||||
} else
|
||||
{
|
||||
sprintf_s((char*)param_value, param_value_size, "MiniCL_Test.");
|
||||
}
|
||||
break;
|
||||
};
|
||||
default:
|
||||
{
|
||||
printf("unsupported\n");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
|
||||
cl_device_type /* device_type */,
|
||||
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
|
||||
void * /* user_data */,
|
||||
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
int maxNumOutstandingTasks = 4;
|
||||
// int maxNumOutstandingTasks = 1;
|
||||
gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
|
||||
const int maxNumOfThreadSupports = 8;
|
||||
static int sUniqueThreadSupportIndex = 0;
|
||||
static char* sUniqueThreadSupportName[maxNumOfThreadSupports] =
|
||||
{
|
||||
"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
|
||||
};
|
||||
|
||||
#ifdef DEBUG_MINICL_KERNELS
|
||||
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
|
||||
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
|
||||
#else
|
||||
|
||||
#if _WIN32
|
||||
btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
|
||||
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
|
||||
// "MiniCL",
|
||||
sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
|
||||
processMiniCLTask, //processCollisionTask,
|
||||
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
|
||||
maxNumOutstandingTasks));
|
||||
#else
|
||||
///todo: add posix thread support for other platforms
|
||||
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
|
||||
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
|
||||
#endif
|
||||
|
||||
#endif //DEBUG_MINICL_KERNELS
|
||||
|
||||
|
||||
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
|
||||
|
||||
*errcode_ret = 0;
|
||||
return (cl_context)scheduler;
|
||||
}
|
||||
|
||||
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
|
||||
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
|
||||
|
||||
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
|
||||
delete scheduler;
|
||||
delete threadSupport;
|
||||
|
||||
return 0;
|
||||
}
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelWorkGroupInfo(cl_kernel kernel ,
|
||||
cl_device_id /* device */,
|
||||
cl_kernel_work_group_info wgi/* param_name */,
|
||||
size_t sz /* param_value_size */,
|
||||
void * ptr /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
|
||||
{
|
||||
if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
|
||||
&&(sz == sizeof(int))
|
||||
&&(ptr != NULL))
|
||||
{
|
||||
MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
|
||||
MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
|
||||
*((int*)ptr) = scheduler->getMaxNumOutstandingTasks();
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
@@ -15,10 +15,12 @@ subject to the following restrictions:
|
||||
|
||||
|
||||
#include "MiniCLTask.h"
|
||||
#include "../PlatformDefinitions.h"
|
||||
#include "../SpuFakeDma.h"
|
||||
#include "BulletMultiThreaded/PlatformDefinitions.h"
|
||||
#include "BulletMultiThreaded/SpuFakeDma.h"
|
||||
#include "LinearMath/btMinMax.h"
|
||||
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
|
||||
#include "MiniCLTask.h"
|
||||
#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
|
||||
|
||||
|
||||
#ifdef __SPU__
|
||||
#include <spu_printf.h>
|
||||
@@ -27,9 +29,7 @@ subject to the following restrictions:
|
||||
#define spu_printf printf
|
||||
#endif
|
||||
|
||||
#define __kernel
|
||||
#define __global
|
||||
#define get_global_id(a) guid
|
||||
int gMiniCLNumOutstandingTasks = 0;
|
||||
|
||||
struct MiniCLTask_LocalStoreMemory
|
||||
{
|
||||
@@ -37,65 +37,23 @@ struct MiniCLTask_LocalStoreMemory
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// OpenCL Kernel Function for element by element vector addition
|
||||
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid)
|
||||
{
|
||||
// get oct-float index into global data array
|
||||
int iGID = get_global_id(0);
|
||||
|
||||
// read inputs into registers
|
||||
float8 f8InA = a[iGID];
|
||||
float8 f8InB = b[iGID];
|
||||
float8 f8Out = (float8)0.0f;
|
||||
|
||||
// add the vector elements
|
||||
f8Out.s0 = f8InA.s0 + f8InB.s0;
|
||||
f8Out.s1 = f8InA.s1 + f8InB.s1;
|
||||
f8Out.s2 = f8InA.s2 + f8InB.s2;
|
||||
f8Out.s3 = f8InA.s3 + f8InB.s3;
|
||||
f8Out.s4 = f8InA.s4 + f8InB.s4;
|
||||
f8Out.s5 = f8InA.s5 + f8InB.s5;
|
||||
f8Out.s6 = f8InA.s6 + f8InB.s6;
|
||||
f8Out.s7 = f8InA.s7 + f8InB.s7;
|
||||
|
||||
// write back out to GMEM
|
||||
c[get_global_id(0)] = f8Out;
|
||||
}
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
|
||||
//-- MAIN METHOD
|
||||
void processMiniCLTask(void* userPtr, void* lsMemory)
|
||||
{
|
||||
// BT_PROFILE("processSampleTask");
|
||||
|
||||
//MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
|
||||
MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
|
||||
|
||||
MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
|
||||
MiniCLTaskDesc& taskDesc = *taskDescPtr;
|
||||
|
||||
printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
|
||||
|
||||
|
||||
switch (taskDesc.m_kernelProgramId)
|
||||
for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
|
||||
{
|
||||
case CMD_MINICL_ADDVECTOR:
|
||||
{
|
||||
for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
|
||||
{
|
||||
VectorAdd(*(const float8**)&taskDesc.m_argData[0][0],*(const float8**)&taskDesc.m_argData[1][0],*(float8**)&taskDesc.m_argData[2][0],i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
printf("error in processMiniCLTask: unknown command id: %d\n",taskDesc.m_kernelProgramId);
|
||||
|
||||
}
|
||||
};
|
||||
taskDesc.m_kernel->m_launcher(&taskDesc, i);
|
||||
}
|
||||
|
||||
// printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -16,39 +16,17 @@ subject to the following restrictions:
|
||||
#ifndef MINICL__TASK_H
|
||||
#define MINICL__TASK_H
|
||||
|
||||
#include "../PlatformDefinitions.h"
|
||||
#include "BulletMultiThreaded/PlatformDefinitions.h"
|
||||
#include "LinearMath/btScalar.h"
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
CMD_MINICL_1= 1,
|
||||
CMD_MINICL_ADDVECTOR
|
||||
};
|
||||
#define MINICL_MAX_ARGLENGTH (sizeof(void*))
|
||||
#define MINI_CL_MAX_ARG 16
|
||||
#define MINI_CL_MAX_KERNEL_NAME 256
|
||||
|
||||
|
||||
|
||||
struct float8
|
||||
{
|
||||
float s0;
|
||||
float s1;
|
||||
float s2;
|
||||
float s3;
|
||||
float s4;
|
||||
float s5;
|
||||
float s6;
|
||||
float s7;
|
||||
|
||||
float8(float scalar)
|
||||
{
|
||||
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
|
||||
}
|
||||
};
|
||||
|
||||
#define MINICL_MAX_ARGLENGTH 128
|
||||
#define MINI_CL_MAX_ARG 8
|
||||
struct MiniCLKernel;
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
|
||||
{
|
||||
@@ -62,16 +40,19 @@ ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t m_taskId;
|
||||
uint32_t m_taskId;
|
||||
|
||||
uint32_t m_kernelProgramId;
|
||||
uint32_t m_firstWorkUnit;
|
||||
uint32_t m_lastWorkUnit;
|
||||
uint32_t m_firstWorkUnit;
|
||||
uint32_t m_lastWorkUnit;
|
||||
|
||||
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
|
||||
int m_argSizes[MINI_CL_MAX_ARG];
|
||||
MiniCLKernel* m_kernel;
|
||||
|
||||
void* m_argData[MINI_CL_MAX_ARG];
|
||||
int m_argSizes[MINI_CL_MAX_ARG];
|
||||
};
|
||||
|
||||
extern "C" int gMiniCLNumOutstandingTasks;
|
||||
|
||||
|
||||
void processMiniCLTask(void* userPtr, void* lsMemory);
|
||||
void* createMiniCLLocalStoreMemory();
|
||||
|
||||
@@ -14,6 +14,7 @@ subject to the following restrictions:
|
||||
*/
|
||||
|
||||
//#define __CELLOS_LV2__ 1
|
||||
#define __BT_SKIP_UINT64_H 1
|
||||
|
||||
#define USE_SAMPLE_PROCESS 1
|
||||
#ifdef USE_SAMPLE_PROCESS
|
||||
@@ -43,21 +44,18 @@ void* SamplelsMemoryFunc()
|
||||
#else
|
||||
|
||||
|
||||
#include "btThreadSupportInterface.h"
|
||||
#include "BulletMultiThreaded/btThreadSupportInterface.h"
|
||||
|
||||
//# include "SPUAssert.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "MiniCL/cl_platform.h"
|
||||
|
||||
extern "C" {
|
||||
extern char SPU_SAMPLE_ELF_SYMBOL[];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
|
||||
:m_threadInterface(threadInterface),
|
||||
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
|
||||
@@ -66,6 +64,8 @@ m_maxNumOutstandingTasks(maxNumOutstandingTasks)
|
||||
m_taskBusy.resize(m_maxNumOutstandingTasks);
|
||||
m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
|
||||
|
||||
m_kernels.resize(0);
|
||||
|
||||
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
|
||||
{
|
||||
m_taskBusy[i] = false;
|
||||
@@ -105,7 +105,7 @@ void MiniCLTaskScheduler::initialize()
|
||||
}
|
||||
|
||||
|
||||
void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes)
|
||||
void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
|
||||
{
|
||||
|
||||
#ifdef DEBUG_SPU_TASK_SCHEDULING
|
||||
@@ -120,16 +120,18 @@ void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kern
|
||||
// send task description in event message
|
||||
taskDesc.m_firstWorkUnit = firstWorkUnit;
|
||||
taskDesc.m_lastWorkUnit = lastWorkUnit;
|
||||
taskDesc.m_kernelProgramId = kernelProgramId;
|
||||
taskDesc.m_kernel = kernel;
|
||||
//some bookkeeping to recognize finished tasks
|
||||
taskDesc.m_taskId = m_currentTask;
|
||||
|
||||
for (int i=0;i<MINI_CL_MAX_ARG;i++)
|
||||
// for (int i=0;i<MINI_CL_MAX_ARG;i++)
|
||||
for (unsigned int i=0; i < kernel->m_numArgs; i++)
|
||||
{
|
||||
taskDesc.m_argSizes[i] = argSizes[i];
|
||||
taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
|
||||
if (taskDesc.m_argSizes[i])
|
||||
{
|
||||
memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
|
||||
taskDesc.m_argData[i] = kernel->m_argData[i];
|
||||
// memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -221,6 +223,296 @@ void MiniCLTaskScheduler::flush()
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef void (*MiniCLKernelLauncher0)(int);
|
||||
typedef void (*MiniCLKernelLauncher1)(void*, int);
|
||||
typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
|
||||
|
||||
|
||||
static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
|
||||
}
|
||||
static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
taskDesc->m_argData[11],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
taskDesc->m_argData[11],
|
||||
taskDesc->m_argData[12],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
taskDesc->m_argData[11],
|
||||
taskDesc->m_argData[12],
|
||||
taskDesc->m_argData[13],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
taskDesc->m_argData[11],
|
||||
taskDesc->m_argData[12],
|
||||
taskDesc->m_argData[13],
|
||||
taskDesc->m_argData[14],
|
||||
guid);
|
||||
}
|
||||
static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
|
||||
{
|
||||
((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
|
||||
taskDesc->m_argData[1],
|
||||
taskDesc->m_argData[2],
|
||||
taskDesc->m_argData[3],
|
||||
taskDesc->m_argData[4],
|
||||
taskDesc->m_argData[5],
|
||||
taskDesc->m_argData[6],
|
||||
taskDesc->m_argData[7],
|
||||
taskDesc->m_argData[8],
|
||||
taskDesc->m_argData[9],
|
||||
taskDesc->m_argData[10],
|
||||
taskDesc->m_argData[11],
|
||||
taskDesc->m_argData[12],
|
||||
taskDesc->m_argData[13],
|
||||
taskDesc->m_argData[14],
|
||||
taskDesc->m_argData[15],
|
||||
guid);
|
||||
}
|
||||
|
||||
static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] =
|
||||
{
|
||||
kernelLauncher0,
|
||||
kernelLauncher1,
|
||||
kernelLauncher2,
|
||||
kernelLauncher3,
|
||||
kernelLauncher4,
|
||||
kernelLauncher5,
|
||||
kernelLauncher6,
|
||||
kernelLauncher7,
|
||||
kernelLauncher8,
|
||||
kernelLauncher9,
|
||||
kernelLauncher10,
|
||||
kernelLauncher11,
|
||||
kernelLauncher12,
|
||||
kernelLauncher13,
|
||||
kernelLauncher14,
|
||||
kernelLauncher15,
|
||||
kernelLauncher16
|
||||
};
|
||||
|
||||
void MiniCLKernel::updateLauncher()
|
||||
{
|
||||
m_launcher = spLauncherList[m_numArgs];
|
||||
}
|
||||
|
||||
struct MiniCLKernelDescEntry
|
||||
{
|
||||
void* pCode;
|
||||
char* pName;
|
||||
};
|
||||
static MiniCLKernelDescEntry spKernelDesc[256];
|
||||
static int sNumKernelDesc = 0;
|
||||
|
||||
MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName)
|
||||
{
|
||||
for(int i = 0; i < sNumKernelDesc; i++)
|
||||
{
|
||||
if(!strcmp(pName, spKernelDesc[i].pName))
|
||||
{ // already registered
|
||||
btAssert(spKernelDesc[i].pCode == pCode);
|
||||
return;
|
||||
}
|
||||
}
|
||||
spKernelDesc[sNumKernelDesc].pCode = pCode;
|
||||
spKernelDesc[sNumKernelDesc].pName = pName;
|
||||
sNumKernelDesc++;
|
||||
}
|
||||
|
||||
|
||||
MiniCLKernel* MiniCLKernel::registerSelf()
|
||||
{
|
||||
m_scheduler->registerKernel(this);
|
||||
for(int i = 0; i < sNumKernelDesc; i++)
|
||||
{
|
||||
if(!strcmp(m_name, spKernelDesc[i].pName))
|
||||
{
|
||||
m_pCode = spKernelDesc[i].pCode;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ subject to the following restrictions:
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
#include "PlatformDefinitions.h"
|
||||
#include "BulletMultiThreaded/PlatformDefinitions.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -30,11 +30,10 @@ subject to the following restrictions:
|
||||
|
||||
#include "MiniCLTask/MiniCLTask.h"
|
||||
|
||||
|
||||
//just add your commands here, try to keep them globally unique for debugging purposes
|
||||
#define CMD_SAMPLE_TASK_COMMAND 10
|
||||
|
||||
|
||||
struct MiniCLKernel;
|
||||
|
||||
/// MiniCLTaskScheduler handles SPU processing of collision pairs.
|
||||
/// When PPU issues a task, it will look for completed task buffers
|
||||
@@ -44,7 +43,11 @@ class MiniCLTaskScheduler
|
||||
// track task buffers that are being used, and total busy tasks
|
||||
btAlignedObjectArray<bool> m_taskBusy;
|
||||
btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc;
|
||||
|
||||
|
||||
|
||||
btAlignedObjectArray<const MiniCLKernel*> m_kernels;
|
||||
|
||||
|
||||
int m_numBusyTasks;
|
||||
|
||||
// the current task and the current entry to insert a new work unit
|
||||
@@ -68,7 +71,7 @@ public:
|
||||
///call initialize in the beginning of the frame, before addCollisionPairToTask
|
||||
void initialize();
|
||||
|
||||
void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes);
|
||||
void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
|
||||
|
||||
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
|
||||
void flush();
|
||||
@@ -78,25 +81,35 @@ public:
|
||||
return m_threadInterface;
|
||||
}
|
||||
|
||||
int findProgramCommandIdByName(const char* programName) const
|
||||
{
|
||||
return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support
|
||||
}
|
||||
int findProgramCommandIdByName(const char* programName) const;
|
||||
|
||||
int getMaxNumOutstandingTasks() const
|
||||
{
|
||||
return m_maxNumOutstandingTasks;
|
||||
}
|
||||
|
||||
void registerKernel(MiniCLKernel* kernel)
|
||||
{
|
||||
m_kernels.push_back(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
|
||||
|
||||
struct MiniCLKernel
|
||||
{
|
||||
MiniCLTaskScheduler* m_scheduler;
|
||||
|
||||
int m_kernelProgramCommandId;
|
||||
// int m_kernelProgramCommandId;
|
||||
|
||||
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
|
||||
char m_name[MINI_CL_MAX_KERNEL_NAME];
|
||||
unsigned int m_numArgs;
|
||||
kernelLauncherCB m_launcher;
|
||||
void* m_pCode;
|
||||
void updateLauncher();
|
||||
MiniCLKernel* registerSelf();
|
||||
|
||||
void* m_argData[MINI_CL_MAX_ARG];
|
||||
int m_argSizes[MINI_CL_MAX_ARG];
|
||||
};
|
||||
|
||||
|
||||
@@ -19,7 +19,9 @@ typedef union
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
#ifndef __PHYSICS_COMMON_H__
|
||||
#ifndef __BT_SKIP_UINT64_H
|
||||
typedef unsigned long int uint64_t;
|
||||
#endif //__BT_SKIP_UINT64_H
|
||||
typedef unsigned int uint32_t;
|
||||
#endif //__PHYSICS_COMMON_H__
|
||||
typedef unsigned short uint16_t;
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
#define __PPU_ADDRESS_SPACE_H
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
#ifdef _WIN32
|
||||
//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
|
||||
#pragma warning (disable: 4311)
|
||||
#pragma warning (disable: 4312)
|
||||
#endif //WIN32
|
||||
#endif //_WIN32
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64)
|
||||
typedef uint64_t ppu_address_t;
|
||||
|
||||
263
src/MiniCL/cl_MiniCL_Defs.h
Normal file
263
src/MiniCL/cl_MiniCL_Defs.h
Normal file
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include "LinearMath/btScalar.h"
|
||||
|
||||
#include "MiniCL/cl.h"
|
||||
|
||||
|
||||
#define __kernel
|
||||
#define __global
|
||||
#define __local
|
||||
#define get_global_id(a) __guid_arg
|
||||
#define get_local_id(a) ((__guid_arg) % gMiniCLNumOutstandingTasks)
|
||||
#define get_local_size(a) (gMiniCLNumOutstandingTasks)
|
||||
#define get_group_id(a) ((__guid_arg) / gMiniCLNumOutstandingTasks)
|
||||
|
||||
#define CLK_LOCAL_MEM_FENCE 0x01
|
||||
#define CLK_GLOBAL_MEM_FENCE 0x02
|
||||
|
||||
static void barrier(unsigned int a)
|
||||
{
|
||||
// TODO : implement
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) float8
|
||||
{
|
||||
float s0;
|
||||
float s1;
|
||||
float s2;
|
||||
float s3;
|
||||
float s4;
|
||||
float s5;
|
||||
float s6;
|
||||
float s7;
|
||||
|
||||
float8(float scalar)
|
||||
{
|
||||
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
|
||||
}
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) float4
|
||||
{
|
||||
float x,y,z,w;
|
||||
float4() {}
|
||||
float4(float v)
|
||||
{
|
||||
x = y = z = w = v;
|
||||
}
|
||||
float4 operator*(const float4& other)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = x*other.x;
|
||||
tmp.y = y*other.y;
|
||||
tmp.z = z*other.z;
|
||||
tmp.w = w*other.w;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
float4 operator*(const float& other)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = x*other;
|
||||
tmp.y = y*other;
|
||||
tmp.z = z*other;
|
||||
tmp.w = w*other;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
float4& operator+=(const float4& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
return *this;
|
||||
}
|
||||
|
||||
float4& operator-=(const float4& other)
|
||||
{
|
||||
x -= other.x;
|
||||
y -= other.y;
|
||||
z -= other.z;
|
||||
w -= other.w;
|
||||
return *this;
|
||||
}
|
||||
|
||||
float4& operator *=(float scalar)
|
||||
{
|
||||
x *= scalar;
|
||||
y *= scalar;
|
||||
z *= scalar;
|
||||
w *= scalar;
|
||||
return (*this);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
static float4 fabs(const float4& a)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a.x < 0.f ? 0.f : a.x;
|
||||
tmp.y = a.y < 0.f ? 0.f : a.y;
|
||||
tmp.z = a.z < 0.f ? 0.f : a.z;
|
||||
tmp.w = a.w < 0.f ? 0.f : a.w;
|
||||
return tmp;
|
||||
}
|
||||
static float4 operator+(const float4& a,const float4& b)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a.x + b.x;
|
||||
tmp.y = a.y + b.y;
|
||||
tmp.z = a.z + b.z;
|
||||
tmp.w = a.w + b.w;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static float4 operator-(const float4& a,const float4& b)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a.x - b.x;
|
||||
tmp.y = a.y - b.y;
|
||||
tmp.z = a.z - b.z;
|
||||
tmp.w = a.w - b.w;
|
||||
return tmp;
|
||||
}
|
||||
static float4 operator*(float a,const float4& b)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a * b.x;
|
||||
tmp.y = a * b.y;
|
||||
tmp.z = a * b.z;
|
||||
tmp.w = a * b.w;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
||||
static float dot(const float4&a ,const float4& b)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a.x*b.x;
|
||||
tmp.y = a.y*b.y;
|
||||
tmp.z = a.z*b.z;
|
||||
tmp.w = a.w*b.w;
|
||||
return tmp.x+tmp.y+tmp.z+tmp.w;
|
||||
}
|
||||
|
||||
static float4 cross(const float4&a ,const float4& b)
|
||||
{
|
||||
float4 tmp;
|
||||
tmp.x = a.y*b.z - a.z*b.y;
|
||||
tmp.y = -a.x*b.z + a.z*b.x;
|
||||
tmp.z = a.x*b.y - a.y*b.x;
|
||||
tmp.w = 0.f;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static float max(float a, float b)
|
||||
{
|
||||
return (a >= b) ? a : b;
|
||||
}
|
||||
|
||||
|
||||
static float min(float a, float b)
|
||||
{
|
||||
return (a <= b) ? a : b;
|
||||
}
|
||||
|
||||
static float fmax(float a, float b)
|
||||
{
|
||||
return (a >= b) ? a : b;
|
||||
}
|
||||
|
||||
static float fmin(float a, float b)
|
||||
{
|
||||
return (a <= b) ? a : b;
|
||||
}
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x,y;
|
||||
};
|
||||
|
||||
struct uint2
|
||||
{
|
||||
unsigned int x,y;
|
||||
};
|
||||
|
||||
//typedef int2 uint2;
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
struct int4
|
||||
{
|
||||
int x,y,z,w;
|
||||
};
|
||||
|
||||
struct uint4
|
||||
{
|
||||
unsigned int x,y,z,w;
|
||||
uint4() {}
|
||||
uint4(uint val) { x = y = z = w = val; }
|
||||
uint4& operator+=(const uint4& other)
|
||||
{
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
z += other.z;
|
||||
w += other.w;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
static uint4 operator+(const uint4& a,const uint4& b)
|
||||
{
|
||||
uint4 tmp;
|
||||
tmp.x = a.x + b.x;
|
||||
tmp.y = a.y + b.y;
|
||||
tmp.z = a.z + b.z;
|
||||
tmp.w = a.w + b.w;
|
||||
return tmp;
|
||||
}
|
||||
static uint4 operator-(const uint4& a,const uint4& b)
|
||||
{
|
||||
uint4 tmp;
|
||||
tmp.x = a.x - b.x;
|
||||
tmp.y = a.y - b.y;
|
||||
tmp.z = a.z - b.z;
|
||||
tmp.w = a.w - b.w;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#define native_sqrt sqrtf
|
||||
#define native_sin sinf
|
||||
#define native_cos cosf
|
||||
#define native_powr powf
|
||||
|
||||
#define GUID_ARG ,int __guid_arg
|
||||
#define GUID_ARG_VAL ,__guid_arg
|
||||
|
||||
|
||||
#define as_int(a) (*((int*)&(a)))
|
||||
|
||||
extern "C" int gMiniCLNumOutstandingTasks;
|
||||
// extern "C" void __kernel_func(); \
|
||||
@@ -1,244 +1,254 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2009 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __CL_PLATFORM_H
|
||||
#define __CL_PLATFORM_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
|
||||
#include <AvailabilityMacros.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CL_API_ENTRY
|
||||
#define CL_API_CALL
|
||||
#ifdef __APPLE__
|
||||
#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
|
||||
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
|
||||
#else
|
||||
#define CL_API_SUFFIX__VERSION_1_0
|
||||
#define CL_EXTENSION_WEAK_LINK
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef signed __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
typedef int8_t cl_char;
|
||||
typedef uint8_t cl_uchar;
|
||||
typedef int16_t cl_short ;
|
||||
typedef uint16_t cl_ushort ;
|
||||
typedef int32_t cl_int ;
|
||||
typedef uint32_t cl_uint ;
|
||||
typedef int64_t cl_long ;
|
||||
typedef uint64_t cl_ulong ;
|
||||
|
||||
typedef uint16_t cl_half ;
|
||||
typedef float cl_float ;
|
||||
typedef double cl_double ;
|
||||
|
||||
|
||||
typedef int8_t cl_char2[2] ;
|
||||
typedef int8_t cl_char4[4] ;
|
||||
typedef int8_t cl_char8[8] ;
|
||||
typedef int8_t cl_char16[16] ;
|
||||
typedef uint8_t cl_uchar2[2] ;
|
||||
typedef uint8_t cl_uchar4[4] ;
|
||||
typedef uint8_t cl_uchar8[8] ;
|
||||
typedef uint8_t cl_uchar16[16] ;
|
||||
|
||||
typedef int16_t cl_short2[2] ;
|
||||
typedef int16_t cl_short4[4] ;
|
||||
typedef int16_t cl_short8[8] ;
|
||||
typedef int16_t cl_short16[16] ;
|
||||
typedef uint16_t cl_ushort2[2] ;
|
||||
typedef uint16_t cl_ushort4[4] ;
|
||||
typedef uint16_t cl_ushort8[8] ;
|
||||
typedef uint16_t cl_ushort16[16] ;
|
||||
|
||||
typedef int32_t cl_int2[2] ;
|
||||
typedef int32_t cl_int4[4] ;
|
||||
typedef int32_t cl_int8[8] ;
|
||||
typedef int32_t cl_int16[16] ;
|
||||
typedef uint32_t cl_uint2[2] ;
|
||||
typedef uint32_t cl_uint4[4] ;
|
||||
typedef uint32_t cl_uint8[8] ;
|
||||
typedef uint32_t cl_uint16[16] ;
|
||||
|
||||
typedef int64_t cl_long2[2] ;
|
||||
typedef int64_t cl_long4[4] ;
|
||||
typedef int64_t cl_long8[8] ;
|
||||
typedef int64_t cl_long16[16] ;
|
||||
typedef uint64_t cl_ulong2[2] ;
|
||||
typedef uint64_t cl_ulong4[4] ;
|
||||
typedef uint64_t cl_ulong8[8] ;
|
||||
typedef uint64_t cl_ulong16[16] ;
|
||||
|
||||
typedef float cl_float2[2] ;
|
||||
typedef float cl_float4[4] ;
|
||||
typedef float cl_float8[8] ;
|
||||
typedef float cl_float16[16] ;
|
||||
|
||||
typedef double cl_double2[2] ;
|
||||
typedef double cl_double4[4] ;
|
||||
typedef double cl_double8[8] ;
|
||||
typedef double cl_double16[16] ;
|
||||
|
||||
|
||||
#else
|
||||
#include <stdint.h>
|
||||
|
||||
/* scalar types */
|
||||
typedef int8_t cl_char;
|
||||
typedef uint8_t cl_uchar;
|
||||
typedef int16_t cl_short __attribute__((aligned(2)));
|
||||
typedef uint16_t cl_ushort __attribute__((aligned(2)));
|
||||
typedef int32_t cl_int __attribute__((aligned(4)));
|
||||
typedef uint32_t cl_uint __attribute__((aligned(4)));
|
||||
typedef int64_t cl_long __attribute__((aligned(8)));
|
||||
typedef uint64_t cl_ulong __attribute__((aligned(8)));
|
||||
|
||||
typedef uint16_t cl_half __attribute__((aligned(2)));
|
||||
typedef float cl_float __attribute__((aligned(4)));
|
||||
typedef double cl_double __attribute__((aligned(8)));
|
||||
|
||||
|
||||
/*
|
||||
* Vector types
|
||||
*
|
||||
* Note: OpenCL requires that all types be naturally aligned.
|
||||
* This means that vector types must be naturally aligned.
|
||||
* For example, a vector of four floats must be aligned to
|
||||
* a 16 byte boundary (calculated as 4 * the natural 4-byte
|
||||
* alignment of the float). The alignment qualifiers here
|
||||
* will only function properly if your compiler supports them
|
||||
* and if you don't actively work to defeat them. For example,
|
||||
* in order for a cl_float4 to be 16 byte aligned in a struct,
|
||||
* the start of the struct must itself be 16-byte aligned.
|
||||
*
|
||||
* Maintaining proper alignment is the user's responsibility.
|
||||
*/
|
||||
typedef int8_t cl_char2[2] __attribute__((aligned(2)));
|
||||
typedef int8_t cl_char4[4] __attribute__((aligned(4)));
|
||||
typedef int8_t cl_char8[8] __attribute__((aligned(8)));
|
||||
typedef int8_t cl_char16[16] __attribute__((aligned(16)));
|
||||
typedef uint8_t cl_uchar2[2] __attribute__((aligned(2)));
|
||||
typedef uint8_t cl_uchar4[4] __attribute__((aligned(4)));
|
||||
typedef uint8_t cl_uchar8[8] __attribute__((aligned(8)));
|
||||
typedef uint8_t cl_uchar16[16] __attribute__((aligned(16)));
|
||||
|
||||
typedef int16_t cl_short2[2] __attribute__((aligned(4)));
|
||||
typedef int16_t cl_short4[4] __attribute__((aligned(8)));
|
||||
typedef int16_t cl_short8[8] __attribute__((aligned(16)));
|
||||
typedef int16_t cl_short16[16] __attribute__((aligned(32)));
|
||||
typedef uint16_t cl_ushort2[2] __attribute__((aligned(4)));
|
||||
typedef uint16_t cl_ushort4[4] __attribute__((aligned(8)));
|
||||
typedef uint16_t cl_ushort8[8] __attribute__((aligned(16)));
|
||||
typedef uint16_t cl_ushort16[16] __attribute__((aligned(32)));
|
||||
|
||||
typedef int32_t cl_int2[2] __attribute__((aligned(8)));
|
||||
typedef int32_t cl_int4[4] __attribute__((aligned(16)));
|
||||
typedef int32_t cl_int8[8] __attribute__((aligned(32)));
|
||||
typedef int32_t cl_int16[16] __attribute__((aligned(64)));
|
||||
typedef uint32_t cl_uint2[2] __attribute__((aligned(8)));
|
||||
typedef uint32_t cl_uint4[4] __attribute__((aligned(16)));
|
||||
typedef uint32_t cl_uint8[8] __attribute__((aligned(32)));
|
||||
typedef uint32_t cl_uint16[16] __attribute__((aligned(64)));
|
||||
|
||||
typedef int64_t cl_long2[2] __attribute__((aligned(16)));
|
||||
typedef int64_t cl_long4[4] __attribute__((aligned(32)));
|
||||
typedef int64_t cl_long8[8] __attribute__((aligned(64)));
|
||||
typedef int64_t cl_long16[16] __attribute__((aligned(128)));
|
||||
typedef uint64_t cl_ulong2[2] __attribute__((aligned(16)));
|
||||
typedef uint64_t cl_ulong4[4] __attribute__((aligned(32)));
|
||||
typedef uint64_t cl_ulong8[8] __attribute__((aligned(64)));
|
||||
typedef uint64_t cl_ulong16[16] __attribute__((aligned(128)));
|
||||
|
||||
typedef float cl_float2[2] __attribute__((aligned(8)));
|
||||
typedef float cl_float4[4] __attribute__((aligned(16)));
|
||||
typedef float cl_float8[8] __attribute__((aligned(32)));
|
||||
typedef float cl_float16[16] __attribute__((aligned(64)));
|
||||
|
||||
typedef double cl_double2[2] __attribute__((aligned(16)));
|
||||
typedef double cl_double4[4] __attribute__((aligned(32)));
|
||||
typedef double cl_double8[8] __attribute__((aligned(64)));
|
||||
typedef double cl_double16[16] __attribute__((aligned(128)));
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* and a few goodies to go with them */
|
||||
#define CL_CHAR_BIT 8
|
||||
#define CL_SCHAR_MAX 127
|
||||
#define CL_SCHAR_MIN (-127-1)
|
||||
#define CL_CHAR_MAX CL_SCHAR_MAX
|
||||
#define CL_CHAR_MIN CL_SCHAR_MIN
|
||||
#define CL_UCHAR_MAX 255
|
||||
#define CL_SHRT_MAX 32767
|
||||
#define CL_SHRT_MIN (-32767-1)
|
||||
#define CL_USHRT_MAX 65535
|
||||
#define CL_INT_MAX 2147483647
|
||||
#define CL_INT_MIN (-2147483647-1)
|
||||
#define CL_UINT_MAX 0xffffffffU
|
||||
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
|
||||
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
|
||||
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
|
||||
|
||||
#define CL_FLT_DIG 6
|
||||
#define CL_FLT_MANT_DIG 24
|
||||
#define CL_FLT_MAX_10_EXP +38
|
||||
#define CL_FLT_MAX_EXP +128
|
||||
#define CL_FLT_MIN_10_EXP -37
|
||||
#define CL_FLT_MIN_EXP -125
|
||||
#define CL_FLT_RADIX 2
|
||||
#define CL_FLT_MAX 0x1.fffffep127f
|
||||
#define CL_FLT_MIN 0x1.0p-126f
|
||||
#define CL_FLT_EPSILON 0x1.0p-23f
|
||||
|
||||
#define CL_DBL_DIG 15
|
||||
#define CL_DBL_MANT_DIG 53
|
||||
#define CL_DBL_MAX_10_EXP +308
|
||||
#define CL_DBL_MAX_EXP +1024
|
||||
#define CL_DBL_MIN_10_EXP -307
|
||||
#define CL_DBL_MIN_EXP -1021
|
||||
#define CL_DBL_RADIX 2
|
||||
#define CL_DBL_MAX 0x1.fffffffffffffp1023
|
||||
#define CL_DBL_MIN 0x1.0p-1022
|
||||
#define CL_DBL_EPSILON 0x1.0p-52
|
||||
|
||||
/* There are no vector types for half */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __CL_PLATFORM_H
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2009 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __CL_PLATFORM_H
|
||||
#define __CL_PLATFORM_H
|
||||
|
||||
#define CL_PLATFORM_MINI_CL 0x12345
|
||||
|
||||
struct MiniCLKernelDesc
|
||||
{
|
||||
MiniCLKernelDesc(void* pCode, char* pName);
|
||||
};
|
||||
|
||||
#define MINICL_REGISTER(__kernel_func) static MiniCLKernelDesc __kernel_func##Desc(__kernel_func, #__kernel_func);
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
|
||||
#include <AvailabilityMacros.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CL_API_ENTRY
|
||||
#define CL_API_CALL
|
||||
#ifdef __APPLE__
|
||||
#define CL_API_SUFFIX__VERSION_1_0 // AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
|
||||
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
|
||||
#else
|
||||
#define CL_API_SUFFIX__VERSION_1_0
|
||||
#define CL_EXTENSION_WEAK_LINK
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef signed __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
typedef int8_t cl_char;
|
||||
typedef uint8_t cl_uchar;
|
||||
typedef int16_t cl_short ;
|
||||
typedef uint16_t cl_ushort ;
|
||||
typedef int32_t cl_int ;
|
||||
typedef uint32_t cl_uint ;
|
||||
typedef int64_t cl_long ;
|
||||
typedef uint64_t cl_ulong ;
|
||||
|
||||
typedef uint16_t cl_half ;
|
||||
typedef float cl_float ;
|
||||
typedef double cl_double ;
|
||||
|
||||
|
||||
typedef int8_t cl_char2[2] ;
|
||||
typedef int8_t cl_char4[4] ;
|
||||
typedef int8_t cl_char8[8] ;
|
||||
typedef int8_t cl_char16[16] ;
|
||||
typedef uint8_t cl_uchar2[2] ;
|
||||
typedef uint8_t cl_uchar4[4] ;
|
||||
typedef uint8_t cl_uchar8[8] ;
|
||||
typedef uint8_t cl_uchar16[16] ;
|
||||
|
||||
typedef int16_t cl_short2[2] ;
|
||||
typedef int16_t cl_short4[4] ;
|
||||
typedef int16_t cl_short8[8] ;
|
||||
typedef int16_t cl_short16[16] ;
|
||||
typedef uint16_t cl_ushort2[2] ;
|
||||
typedef uint16_t cl_ushort4[4] ;
|
||||
typedef uint16_t cl_ushort8[8] ;
|
||||
typedef uint16_t cl_ushort16[16] ;
|
||||
|
||||
typedef int32_t cl_int2[2] ;
|
||||
typedef int32_t cl_int4[4] ;
|
||||
typedef int32_t cl_int8[8] ;
|
||||
typedef int32_t cl_int16[16] ;
|
||||
typedef uint32_t cl_uint2[2] ;
|
||||
typedef uint32_t cl_uint4[4] ;
|
||||
typedef uint32_t cl_uint8[8] ;
|
||||
typedef uint32_t cl_uint16[16] ;
|
||||
|
||||
typedef int64_t cl_long2[2] ;
|
||||
typedef int64_t cl_long4[4] ;
|
||||
typedef int64_t cl_long8[8] ;
|
||||
typedef int64_t cl_long16[16] ;
|
||||
typedef uint64_t cl_ulong2[2] ;
|
||||
typedef uint64_t cl_ulong4[4] ;
|
||||
typedef uint64_t cl_ulong8[8] ;
|
||||
typedef uint64_t cl_ulong16[16] ;
|
||||
|
||||
typedef float cl_float2[2] ;
|
||||
typedef float cl_float4[4] ;
|
||||
typedef float cl_float8[8] ;
|
||||
typedef float cl_float16[16] ;
|
||||
|
||||
typedef double cl_double2[2] ;
|
||||
typedef double cl_double4[4] ;
|
||||
typedef double cl_double8[8] ;
|
||||
typedef double cl_double16[16] ;
|
||||
|
||||
|
||||
#else
|
||||
#include <stdint.h>
|
||||
|
||||
/* scalar types */
|
||||
typedef int8_t cl_char;
|
||||
typedef uint8_t cl_uchar;
|
||||
typedef int16_t cl_short __attribute__((aligned(2)));
|
||||
typedef uint16_t cl_ushort __attribute__((aligned(2)));
|
||||
typedef int32_t cl_int __attribute__((aligned(4)));
|
||||
typedef uint32_t cl_uint __attribute__((aligned(4)));
|
||||
typedef int64_t cl_long __attribute__((aligned(8)));
|
||||
typedef uint64_t cl_ulong __attribute__((aligned(8)));
|
||||
|
||||
typedef uint16_t cl_half __attribute__((aligned(2)));
|
||||
typedef float cl_float __attribute__((aligned(4)));
|
||||
typedef double cl_double __attribute__((aligned(8)));
|
||||
|
||||
|
||||
/*
|
||||
* Vector types
|
||||
*
|
||||
* Note: OpenCL requires that all types be naturally aligned.
|
||||
* This means that vector types must be naturally aligned.
|
||||
* For example, a vector of four floats must be aligned to
|
||||
* a 16 byte boundary (calculated as 4 * the natural 4-byte
|
||||
* alignment of the float). The alignment qualifiers here
|
||||
* will only function properly if your compiler supports them
|
||||
* and if you don't actively work to defeat them. For example,
|
||||
* in order for a cl_float4 to be 16 byte aligned in a struct,
|
||||
* the start of the struct must itself be 16-byte aligned.
|
||||
*
|
||||
* Maintaining proper alignment is the user's responsibility.
|
||||
*/
|
||||
typedef int8_t cl_char2[2] __attribute__((aligned(2)));
|
||||
typedef int8_t cl_char4[4] __attribute__((aligned(4)));
|
||||
typedef int8_t cl_char8[8] __attribute__((aligned(8)));
|
||||
typedef int8_t cl_char16[16] __attribute__((aligned(16)));
|
||||
typedef uint8_t cl_uchar2[2] __attribute__((aligned(2)));
|
||||
typedef uint8_t cl_uchar4[4] __attribute__((aligned(4)));
|
||||
typedef uint8_t cl_uchar8[8] __attribute__((aligned(8)));
|
||||
typedef uint8_t cl_uchar16[16] __attribute__((aligned(16)));
|
||||
|
||||
typedef int16_t cl_short2[2] __attribute__((aligned(4)));
|
||||
typedef int16_t cl_short4[4] __attribute__((aligned(8)));
|
||||
typedef int16_t cl_short8[8] __attribute__((aligned(16)));
|
||||
typedef int16_t cl_short16[16] __attribute__((aligned(32)));
|
||||
typedef uint16_t cl_ushort2[2] __attribute__((aligned(4)));
|
||||
typedef uint16_t cl_ushort4[4] __attribute__((aligned(8)));
|
||||
typedef uint16_t cl_ushort8[8] __attribute__((aligned(16)));
|
||||
typedef uint16_t cl_ushort16[16] __attribute__((aligned(32)));
|
||||
|
||||
typedef int32_t cl_int2[2] __attribute__((aligned(8)));
|
||||
typedef int32_t cl_int4[4] __attribute__((aligned(16)));
|
||||
typedef int32_t cl_int8[8] __attribute__((aligned(32)));
|
||||
typedef int32_t cl_int16[16] __attribute__((aligned(64)));
|
||||
typedef uint32_t cl_uint2[2] __attribute__((aligned(8)));
|
||||
typedef uint32_t cl_uint4[4] __attribute__((aligned(16)));
|
||||
typedef uint32_t cl_uint8[8] __attribute__((aligned(32)));
|
||||
typedef uint32_t cl_uint16[16] __attribute__((aligned(64)));
|
||||
|
||||
typedef int64_t cl_long2[2] __attribute__((aligned(16)));
|
||||
typedef int64_t cl_long4[4] __attribute__((aligned(32)));
|
||||
typedef int64_t cl_long8[8] __attribute__((aligned(64)));
|
||||
typedef int64_t cl_long16[16] __attribute__((aligned(128)));
|
||||
typedef uint64_t cl_ulong2[2] __attribute__((aligned(16)));
|
||||
typedef uint64_t cl_ulong4[4] __attribute__((aligned(32)));
|
||||
typedef uint64_t cl_ulong8[8] __attribute__((aligned(64)));
|
||||
typedef uint64_t cl_ulong16[16] __attribute__((aligned(128)));
|
||||
|
||||
typedef float cl_float2[2] __attribute__((aligned(8)));
|
||||
typedef float cl_float4[4] __attribute__((aligned(16)));
|
||||
typedef float cl_float8[8] __attribute__((aligned(32)));
|
||||
typedef float cl_float16[16] __attribute__((aligned(64)));
|
||||
|
||||
typedef double cl_double2[2] __attribute__((aligned(16)));
|
||||
typedef double cl_double4[4] __attribute__((aligned(32)));
|
||||
typedef double cl_double8[8] __attribute__((aligned(64)));
|
||||
typedef double cl_double16[16] __attribute__((aligned(128)));
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* and a few goodies to go with them */
|
||||
#define CL_CHAR_BIT 8
|
||||
#define CL_SCHAR_MAX 127
|
||||
#define CL_SCHAR_MIN (-127-1)
|
||||
#define CL_CHAR_MAX CL_SCHAR_MAX
|
||||
#define CL_CHAR_MIN CL_SCHAR_MIN
|
||||
#define CL_UCHAR_MAX 255
|
||||
#define CL_SHRT_MAX 32767
|
||||
#define CL_SHRT_MIN (-32767-1)
|
||||
#define CL_USHRT_MAX 65535
|
||||
#define CL_INT_MAX 2147483647
|
||||
#define CL_INT_MIN (-2147483647-1)
|
||||
#define CL_UINT_MAX 0xffffffffU
|
||||
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
|
||||
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
|
||||
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
|
||||
|
||||
#define CL_FLT_DIG 6
|
||||
#define CL_FLT_MANT_DIG 24
|
||||
#define CL_FLT_MAX_10_EXP +38
|
||||
#define CL_FLT_MAX_EXP +128
|
||||
#define CL_FLT_MIN_10_EXP -37
|
||||
#define CL_FLT_MIN_EXP -125
|
||||
#define CL_FLT_RADIX 2
|
||||
#define CL_FLT_MAX 0x1.fffffep127f
|
||||
#define CL_FLT_MIN 0x1.0p-126f
|
||||
#define CL_FLT_EPSILON 0x1.0p-23f
|
||||
|
||||
#define CL_DBL_DIG 15
|
||||
#define CL_DBL_MANT_DIG 53
|
||||
#define CL_DBL_MAX_10_EXP +308
|
||||
#define CL_DBL_MAX_EXP +1024
|
||||
#define CL_DBL_MIN_10_EXP -307
|
||||
#define CL_DBL_MIN_EXP -1021
|
||||
#define CL_DBL_RADIX 2
|
||||
#define CL_DBL_MAX 0x1.fffffffffffffp1023
|
||||
#define CL_DBL_MIN 0x1.0p-1022
|
||||
#define CL_DBL_EPSILON 0x1.0p-52
|
||||
|
||||
/* There are no vector types for half */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __CL_PLATFORM_H
|
||||
|
||||
Reference in New Issue
Block a user