move some recent MiniCL work to trunk

This commit is contained in:
erwin.coumans
2010-02-08 22:42:58 +00:00
parent 52e60c8246
commit 7d4e2873e2
15 changed files with 1471 additions and 724 deletions

View File

@@ -11,7 +11,7 @@ LINK_LIBRARIES(
ADD_EXECUTABLE(AppMiniCLVectorAdd
MiniCL_VectorAdd.cpp
MiniCL.cpp
VectorAddKernels.cl
)
IF (UNIX)

View File

@@ -1,5 +0,0 @@
SubDir TOP Demos MiniCL_VectorAdd ;
BulletMiniCLDemo MiniCL_VectorAdd : [ Wildcard *.h *.cpp ] ;
MsvcIncDirs MiniCL_VectorAdd : "../../src" ;

View File

@@ -1,346 +0,0 @@
#include <MiniCL/cl.h>
#define __PHYSICS_COMMON_H__ 1
#ifdef WIN32
#include "BulletMultiThreaded/Win32ThreadSupport.h"
#else
#include "BulletMultiThreaded/SequentialThreadSupport.h"
#endif
#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
#include "LinearMath/btMinMax.h"
/*
m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
"collision",
processCollisionTask,
createCollisionLocalStoreMemory,
maxNumOutstandingTasks));
if (!m_spuCollisionTaskProcess)
m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
//make sure all SPU work is done
m_spuCollisionTaskProcess->flush2();
*/
CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
cl_device_id device ,
cl_device_info param_name ,
size_t param_value_size ,
void * param_value ,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
{
switch (param_name)
{
case CL_DEVICE_NAME:
{
char deviceName[] = "CPU";
int nameLen = strlen(deviceName)+1;
assert(param_value_size>strlen(deviceName));
if (nameLen < param_value_size)
{
sprintf((char*)param_value,"CPU");
} else
{
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
}
break;
}
case CL_DEVICE_TYPE:
{
if (param_value_size>=sizeof(cl_device_type))
{
cl_device_type* deviceType = (cl_device_type*)param_value;
*deviceType = CL_DEVICE_TYPE_CPU;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
}
break;
}
case CL_DEVICE_MAX_COMPUTE_UNITS:
{
if (param_value_size>=sizeof(cl_uint))
{
cl_uint* numUnits = (cl_uint*)param_value;
*numUnits= 4;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
}
break;
}
case CL_DEVICE_MAX_WORK_ITEM_SIZES:
{
size_t workitem_size[3];
if (param_value_size>=sizeof(workitem_size))
{
size_t* workItemSize = (size_t*)param_value;
workItemSize[0] = 64;
workItemSize[1] = 24;
workItemSize[2] = 16;
} else
{
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
}
break;
}
default:
{
printf("error: unsupported param_name:%d\n",param_name);
}
}
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
// Enqueued Commands APIs
CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
cl_mem buffer ,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t cb ,
void * ptr ,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
///wait for all work items to be completed
scheduler->flush();
memcpy(ptr,buffer,cb);
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel clKernel ,
cl_uint work_dim ,
const size_t * /* global_work_offset */,
const size_t * global_work_size ,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
for (int ii=0;ii<work_dim;ii++)
{
int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
int numWorkItems = global_work_size[ii];
//at minimum 64 work items per task
int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
for (int t=0;t<numWorkItems;)
{
//Performance Hint: tweak this number during benchmarking
int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
kernel->m_scheduler->issueTask(t,endIndex,kernel->m_kernelProgramCommandId,(char*)&kernel->m_argData[0][0],kernel->m_argSizes);
t = endIndex;
}
}
/*
void* bla = 0;
scheduler->issueTask(bla,2,3);
scheduler->flush();
*/
return 0;
}
CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
cl_uint arg_index ,
size_t arg_size ,
const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
{
MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
assert(arg_size < MINICL_MAX_ARGLENGTH);
if (arg_index>MINI_CL_MAX_ARG)
{
printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
} else
{
if (arg_size>=MINICL_MAX_ARGLENGTH)
{
printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
} else
{
memcpy( kernel->m_argData[arg_index],arg_value,arg_size);
kernel->m_argSizes[arg_index] = arg_size;
}
}
return 0;
}
// Kernel Object APIs
CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
const char * kernel_name ,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
MiniCLKernel* kernel = new MiniCLKernel();
kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
kernel->m_scheduler = scheduler;
return (cl_kernel)kernel;
}
CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
{
return 0;
}
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
return (cl_program)context;
}
// Memory Object APIs
CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
cl_mem_flags flags ,
size_t size,
void * host_ptr ,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
{
cl_mem buf = (cl_mem)malloc(size);
if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
{
memcpy(buf,host_ptr,size);
}
return buf;
}
// Command Queue APIs
CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
return (cl_command_queue) context;
}
extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
cl_context_info param_name ,
size_t param_value_size ,
void * param_value,
size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
{
switch (param_name)
{
case CL_CONTEXT_DEVICES:
{
if (!param_value_size)
{
*param_value_size_ret = 13;
} else
{
sprintf((char*)param_value,"MiniCL_Test.");
}
break;
};
default:
{
printf("unsupported\n");
}
}
return 0;
}
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
{
int maxNumOutstandingTasks = 4;
#ifdef WIN32
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
"MiniCL",
processMiniCLTask, //processCollisionTask,
createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
maxNumOutstandingTasks));
#else
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
#endif
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
return (cl_context)scheduler;
}
CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
{
MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
delete scheduler;
delete threadSupport;
return 0;
}

View File

@@ -5,7 +5,13 @@
///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
#include <MiniCL/cl.h>
#define USE_MINICL 1
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#include <CL/cl.h>
#endif//USE_MINICL
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
@@ -170,3 +176,13 @@ int main(int argc, char **argv)
free(srcB);
free (dst);
}
#ifdef USE_MINICL
#include "MiniCL/cl_MiniCL_Defs.h"
extern "C"
{
#include "VectorAddKernels.cl"
}
MINICL_REGISTER(VectorAdd)
#endif//USE_MINICL

View File

@@ -0,0 +1,47 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation
#ifndef GUID_ARG
#define GUID_ARG
#endif
///////////////////////////////////////////////////
// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG)
{
// get oct-float index into global data array
int iGID = get_global_id(0);
// read inputs into registers
float8 f8InA = a[iGID];
float8 f8InB = b[iGID];
float8 f8Out = (float8)0.0f;
// add the vector elements
f8Out.s0 = f8InA.s0 + f8InB.s0;
f8Out.s1 = f8InA.s1 + f8InB.s1;
f8Out.s2 = f8InA.s2 + f8InB.s2;
f8Out.s3 = f8InA.s3 + f8InB.s3;
f8Out.s4 = f8InA.s4 + f8InB.s4;
f8Out.s5 = f8InA.s5 + f8InB.s5;
f8Out.s6 = f8InA.s6 + f8InB.s6;
f8Out.s7 = f8InA.s7 + f8InB.s7;
// write back out to GMEM
c[get_global_id(0)] = f8Out;
}