Files
bullet3/Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl

385 lines
10 KiB
C++

/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma comment(lib,"OpenCL.lib")
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <CL/cl_platform.h>
namespace adl
{
struct DeviceCL : public Device
{
typedef DeviceUtils::Config Config;
__inline
DeviceCL() : Device( TYPE_CL ), m_kernelManager(0){}
__inline
void* getContext() const { return m_context; }
__inline
void initialize(const Config& cfg);
__inline
void release();
template<typename T>
__inline
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
template<typename T>
__inline
void deallocate(Buffer<T>* buf);
template<typename T>
__inline
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems = 0,int dstOffsetNElems = 0);
template<typename T>
__inline
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
template<typename T>
__inline
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
__inline
void waitForCompletion() const;
__inline
void getDeviceName( char nameOut[128] ) const;
__inline
static
int getNDevices();
__inline
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
enum
{
MAX_NUM_DEVICES = 6,
};
cl_context m_context;
cl_command_queue m_commandQueue;
cl_device_id m_deviceIdx;
KernelManager* m_kernelManager;
};
//===
//===
void DeviceCL::initialize(const Config& cfg)
{
// DeviceUtils::create( cfg, (DeviceCL*)this );
{
// dd = new DeviceCL();
DeviceCL* deviceData = (DeviceCL*)this;
// cl_device_type deviceType = (driverType == DRIVER_HARDWARE)? CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU;
cl_device_type deviceType = (cfg.m_type== Config::DEVICE_GPU)? CL_DEVICE_TYPE_GPU: CL_DEVICE_TYPE_CPU;
// int numContextQueuePairsToCreate = 1;
bool enableProfiling = false;
#ifdef _DEBUG
enableProfiling = true;
#endif
cl_int status;
cl_platform_id platform;
{
cl_uint nPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &nPlatforms);
ADLASSERT( status == CL_SUCCESS );
cl_platform_id pIdx[5];
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
ADLASSERT( status == CL_SUCCESS );
cl_uint atiIdx = -1;
cl_uint intelIdx = -1;
cl_uint nvIdx = -1;
for(cl_uint i=0; i<nPlatforms; i++)
{
char buff[512];
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
ADLASSERT( status == CL_SUCCESS );
//skip the platform if there are no devices available
cl_uint numDevice;
status = clGetDeviceIDs( pIdx[i], deviceType, 0, NULL, &numDevice );
if (numDevice>0)
{
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
if( strcmp( buff, "Intel(R) Corporation" )==0 ) intelIdx = i;
}
}
if( deviceType == CL_DEVICE_TYPE_GPU )
{
switch( cfg.m_vendor )
{
case DeviceUtils::Config::VD_AMD:
if( atiIdx == -1 && nvIdx != -1 ) goto USE_NV_GPU;
USE_AMD_GPU:
ADLASSERT(atiIdx != -1 );
platform = pIdx[atiIdx];
break;
case DeviceUtils::Config::VD_NV:
if( atiIdx != -1 && nvIdx == -1 ) goto USE_AMD_GPU;
USE_NV_GPU:
ADLASSERT(nvIdx != -1 );
platform = pIdx[nvIdx];
break;
default:
ADLASSERT(0);
break;
};
}
else if( deviceType == CL_DEVICE_TYPE_CPU )
{
switch( cfg.m_vendor )
{
case DeviceUtils::Config::VD_AMD:
ADLASSERT(atiIdx != -1 );
platform = pIdx[atiIdx];
break;
case DeviceUtils::Config::VD_INTEL:
ADLASSERT(intelIdx != -1 );
platform = pIdx[intelIdx];
break;
default:
ADLASSERT(0);
break;
};
}
}
cl_uint numDevice;
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
// ADLASSERT( cfg.m_deviceIdx < (int)numDevice );
debugPrintf("CL: %d %s Devices ", numDevice, (deviceType==CL_DEVICE_TYPE_GPU)? "GPU":"CPU");
// numContextQueuePairsToCreate = min( (int)numDevice, numContextQueuePairsToCreate );
// numContextQueuePairsToCreate = ( (int)numDevice < numContextQueuePairsToCreate )? numDevice : numContextQueuePairsToCreate;
cl_device_id deviceIds[ MAX_NUM_DEVICES ];
status = clGetDeviceIDs( platform, deviceType, numDevice, deviceIds, NULL );
ADLASSERT( status == CL_SUCCESS );
{ int i = min( (int)numDevice-1, cfg.m_deviceIdx );
m_deviceIdx = deviceIds[i];
deviceData->m_context = clCreateContext( NULL, 1, &deviceData->m_deviceIdx, NULL, NULL, &status );
ADLASSERT( status == CL_SUCCESS );
char buff[512];
status = clGetDeviceInfo( deviceData->m_deviceIdx, CL_DEVICE_NAME, sizeof(buff), &buff, NULL );
ADLASSERT( status == CL_SUCCESS );
debugPrintf("[%s]\n", buff);
deviceData->m_commandQueue = clCreateCommandQueue( deviceData->m_context, deviceData->m_deviceIdx, (enableProfiling)?CL_QUEUE_PROFILING_ENABLE:NULL, NULL );
ADLASSERT( status == CL_SUCCESS );
// status = clSetCommandQueueProperty( commandQueue, CL_QUEUE_PROFILING_ENABLE, CL_TRUE, 0 );
// CLASSERT( status == CL_SUCCESS );
if(0)
{
cl_bool image_support;
clGetDeviceInfo(deviceData->m_deviceIdx, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
debugPrintf(" CL_DEVICE_IMAGE_SUPPORT : %s\n", image_support?"Yes":"No");
}
}
}
m_kernelManager = new KernelManager;
}
void DeviceCL::release()
{
clReleaseCommandQueue( m_commandQueue );
clReleaseContext( m_context );
if( m_kernelManager ) delete m_kernelManager;
}
template<typename T>
void DeviceCL::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
{
buf->m_device = this;
buf->m_size = nElems;
buf->m_ptr = 0;
if( type == BufferBase::BUFFER_CONST ) return;
#if defined(ADL_CL_DUMP_MEMORY_LOG)
char deviceName[256];
getDeviceName( deviceName );
printf( "adlCLMemoryLog %s : %3.2fMB Allocation: %3.2fKB ", deviceName, m_memoryUsage/1024.f/1024.f, sizeof(T)*nElems/1024.f );
fflush( stdout );
#endif
int sz=sizeof(T)*nElems;
cl_int status = 0;
if( type == BufferBase::BUFFER_ZERO_COPY )
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sz, 0, &status );
else if( type == BufferBase::BUFFER_RAW )
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_WRITE_ONLY, sz, 0, &status );
else
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE, sz, 0, &status );
m_memoryUsage += buf->m_size*sizeof(T);
#if defined(ADL_CL_DUMP_MEMORY_LOG)
printf( "%s\n", (status==CL_SUCCESS)? "Succeed": "Failed" );
fflush( stdout );
#endif
ADLASSERT( status == CL_SUCCESS );
}
template<typename T>
void DeviceCL::deallocate(Buffer<T>* buf)
{
if( buf->m_ptr )
{
m_memoryUsage -= buf->m_size*sizeof(T);
clReleaseMemObject( (cl_mem)buf->m_ptr );
}
buf->m_device = 0;
buf->m_size = 0;
buf->m_ptr = 0;
}
template<typename T>
void DeviceCL::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems,int dstOffsetNElems )
{
if( dst->m_device->m_type == TYPE_CL && src->m_device->m_type == TYPE_CL )
{
cl_int status = 0;
status = clEnqueueCopyBuffer( m_commandQueue, (cl_mem)src->m_ptr, (cl_mem)dst->m_ptr, sizeof(T)*srcOffsetNElems, sizeof(T)*dstOffsetNElems, sizeof(T)*nElems, 0, 0, 0 );
ADLASSERT( status == CL_SUCCESS );
}
else if( src->m_device->m_type == TYPE_HOST )
{
ADLASSERT( dst->getType() == TYPE_CL );
dst->write( src->m_ptr, nElems );
}
else if( dst->m_device->m_type == TYPE_HOST )
{
ADLASSERT( src->getType() == TYPE_CL );
src->read( dst->m_ptr, nElems );
}
else
{
ADLASSERT( 0 );
}
}
template<typename T>
void DeviceCL::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems )
{
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, (cl_mem)src->m_ptr, 0, sizeof(T)*srcOffsetNElems, sizeof(T)*nElems,
dst, 0,0,0 );
ADLASSERT( status == CL_SUCCESS );
}
template<typename T>
void DeviceCL::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems )
{
cl_int status = 0;
int sz=sizeof(T)*nElems;
status = clEnqueueWriteBuffer( m_commandQueue, (cl_mem)dst->m_ptr, 0, sizeof(T)*dstOffsetNElems, sz,
src, 0,0,0 );
ADLASSERT( status == CL_SUCCESS );
}
void DeviceCL::waitForCompletion() const
{
clFinish( m_commandQueue );
}
int DeviceCL::getNDevices()
{
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
cl_int status;
cl_platform_id platform;
{
cl_uint nPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &nPlatforms);
ADLASSERT( status == CL_SUCCESS );
cl_platform_id pIdx[5];
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
ADLASSERT( status == CL_SUCCESS );
cl_uint nvIdx = -1;
cl_uint atiIdx = -1;
for(cl_uint i=0; i<nPlatforms; i++)
{
char buff[512];
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
ADLASSERT( status == CL_SUCCESS );
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
}
if( deviceType == CL_DEVICE_TYPE_GPU )
{
if( nvIdx != -1 ) platform = pIdx[nvIdx];
else platform = pIdx[atiIdx];
}
else if( deviceType == CL_DEVICE_TYPE_CPU )
{
platform = pIdx[atiIdx];
}
}
cl_uint numDevice;
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
ADLASSERT( status == CL_SUCCESS );
return numDevice;
}
void DeviceCL::getDeviceName( char nameOut[128] ) const
{
cl_int status;
status = clGetDeviceInfo( m_deviceIdx, CL_DEVICE_NAME, sizeof(char)*128, nameOut, NULL );
ADLASSERT( status == CL_SUCCESS );
}
Kernel* DeviceCL::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel )const
{
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
}
};