Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
19
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp
Normal file
19
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
|
||||
//KernelManager* KernelManager::s_kManager = NULL;
|
||||
235
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h
Normal file
235
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.h
Normal file
@@ -0,0 +1,235 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_H
|
||||
#define ADL_H
|
||||
|
||||
#pragma warning( disable : 4996 )
|
||||
#include <Adl/AdlConfig.h>
|
||||
#include <Adl/AdlError.h>
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef max
|
||||
#define max(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#ifndef min
|
||||
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
enum DeviceType
|
||||
{
|
||||
TYPE_CL = 0,
|
||||
TYPE_DX11 = 1,
|
||||
TYPE_HOST,
|
||||
};
|
||||
|
||||
|
||||
struct Device;
|
||||
|
||||
struct BufferBase
|
||||
{
|
||||
enum BufferType
|
||||
{
|
||||
BUFFER,
|
||||
|
||||
// for dx
|
||||
BUFFER_CONST,
|
||||
BUFFER_STAGING,
|
||||
BUFFER_APPEND,
|
||||
BUFFER_RAW,
|
||||
BUFFER_W_COUNTER,
|
||||
BUFFER_INDEX,
|
||||
BUFFER_VERTEX,
|
||||
|
||||
// for cl
|
||||
BUFFER_ZERO_COPY,
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
class DeviceUtils
|
||||
{
|
||||
public:
|
||||
struct Config
|
||||
{
|
||||
enum DeviceType
|
||||
{
|
||||
DEVICE_GPU,
|
||||
DEVICE_CPU,
|
||||
};
|
||||
|
||||
// for CL
|
||||
enum DeviceVendor
|
||||
{
|
||||
VD_AMD,
|
||||
VD_INTEL,
|
||||
VD_NV,
|
||||
};
|
||||
|
||||
Config() : m_type(DEVICE_GPU), m_deviceIdx(0), m_vendor(VD_AMD){}
|
||||
|
||||
DeviceType m_type;
|
||||
int m_deviceIdx;
|
||||
DeviceVendor m_vendor;
|
||||
};
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices( DeviceType type );
|
||||
__inline
|
||||
static Device* allocate( DeviceType type, Config& cfg );
|
||||
__inline
|
||||
static void deallocate( Device* deviceData );
|
||||
__inline
|
||||
static void waitForCompletion( const Device* deviceData );
|
||||
};
|
||||
|
||||
//==========================
|
||||
// DeviceData
|
||||
//==========================
|
||||
struct Kernel;
|
||||
|
||||
struct Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
Device( DeviceType type ) : m_type( type ), m_memoryUsage(0)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void* getContext() const { return 0; }
|
||||
virtual void initialize(const Config& cfg){}
|
||||
virtual void release(){}
|
||||
virtual void waitForCompletion() const {}
|
||||
virtual void getDeviceName( char nameOut[128] ) const {}
|
||||
virtual Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true ) const { ADLASSERT(0); return 0;}
|
||||
virtual unsigned int getUsedMemory() const { return m_memoryUsage; }
|
||||
|
||||
DeviceType m_type;
|
||||
unsigned int m_memoryUsage;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// Buffer
|
||||
//==========================
|
||||
|
||||
template<typename T>
|
||||
struct HostBuffer;
|
||||
// overload each deviceDatas
|
||||
template<typename T>
|
||||
struct Buffer : public BufferBase
|
||||
{
|
||||
__inline
|
||||
Buffer();
|
||||
__inline
|
||||
Buffer(const Device* device, int nElems, BufferType type = BUFFER );
|
||||
__inline
|
||||
virtual ~Buffer();
|
||||
|
||||
__inline
|
||||
void setRawPtr( const Device* device, T* ptr, int size, BufferType type = BUFFER );
|
||||
__inline
|
||||
void allocate(const Device* device, int nElems, BufferType type = BUFFER );
|
||||
__inline
|
||||
void write(T* hostSrcPtr, int nElems, int dstOffsetNElems = 0);
|
||||
__inline
|
||||
void read(T* hostDstPtr, int nElems, int srcOffsetNElems = 0) const;
|
||||
__inline
|
||||
void write(Buffer<T>& src, int nElems);
|
||||
__inline
|
||||
void read(Buffer<T>& dst, int nElems) const;
|
||||
// __inline
|
||||
// Buffer<T>& operator = (const Buffer<T>& buffer);
|
||||
__inline
|
||||
int getSize() const { return m_size; }
|
||||
|
||||
DeviceType getType() const { ADLASSERT( m_device ); return m_device->m_type; }
|
||||
|
||||
|
||||
const Device* m_device;
|
||||
int m_size;
|
||||
T* m_ptr;
|
||||
// for DX11
|
||||
void* m_uav;
|
||||
void* m_srv;
|
||||
bool m_allocated; // todo. move this to a bit
|
||||
};
|
||||
|
||||
class BufferUtils
|
||||
{
|
||||
public:
|
||||
template<DeviceType TYPE, bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
typename Buffer<T>* map(const Device* device, const Buffer<T>* in, int copySize = -1);
|
||||
|
||||
template<bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
void unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize = -1 );
|
||||
};
|
||||
|
||||
//==========================
|
||||
// HostBuffer
|
||||
//==========================
|
||||
struct DeviceHost;
|
||||
|
||||
template<typename T>
|
||||
struct HostBuffer : public Buffer<T>
|
||||
{
|
||||
__inline
|
||||
HostBuffer():Buffer<T>(){}
|
||||
__inline
|
||||
HostBuffer(const Device* device, int nElems, BufferType type = BUFFER ) : Buffer<T>(device, nElems, type) {}
|
||||
// HostBuffer(const Device* deviceData, T* rawPtr, int nElems);
|
||||
|
||||
|
||||
__inline
|
||||
T& operator[](int idx);
|
||||
__inline
|
||||
const T& operator[](int idx) const;
|
||||
__inline
|
||||
T* begin() { return m_ptr; }
|
||||
|
||||
__inline
|
||||
HostBuffer<T>& operator = (const Buffer<T>& device);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#include <Adl/AdlKernel.h>
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#include <Adl/CL/AdlCL.inl>
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#include <Adl/DX11/AdlDX11.inl>
|
||||
#endif
|
||||
|
||||
#include <Adl/Host/AdlHost.inl>
|
||||
#include <Adl/AdlKernel.inl>
|
||||
#include <Adl/Adl.inl>
|
||||
|
||||
|
||||
#include <Adl/AdlStopwatch.h>
|
||||
|
||||
#include <Adl/Host/AdlStopwatchHost.inl>
|
||||
#include <Adl/AdlStopwatch.inl>
|
||||
|
||||
#endif
|
||||
344
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl
Normal file
344
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/Adl.inl
Normal file
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
int DeviceUtils::getNDevices( DeviceType type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
return DeviceCL::getNDevices();
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
return DeviceDX11::getNDevices();
|
||||
#endif
|
||||
default:
|
||||
return 1;
|
||||
};
|
||||
}
|
||||
|
||||
Device* DeviceUtils::allocate( DeviceType type, Config& cfg )
|
||||
{
|
||||
Device* deviceData;
|
||||
switch( type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
deviceData = new DeviceCL();
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
deviceData = new DeviceDX11();
|
||||
break;
|
||||
#endif
|
||||
case TYPE_HOST:
|
||||
deviceData = new DeviceHost();
|
||||
break;
|
||||
default:
|
||||
ADLASSERT( 0 );
|
||||
break;
|
||||
};
|
||||
deviceData->initialize( cfg );
|
||||
return deviceData;
|
||||
}
|
||||
|
||||
void DeviceUtils::deallocate( Device* deviceData )
|
||||
{
|
||||
ADLASSERT( deviceData->getUsedMemory() == 0 );
|
||||
deviceData->release();
|
||||
delete deviceData;
|
||||
}
|
||||
|
||||
void DeviceUtils::waitForCompletion( const Device* deviceData )
|
||||
{
|
||||
deviceData->waitForCompletion();
|
||||
}
|
||||
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
|
||||
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
|
||||
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#else
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#else
|
||||
#define SELECT_DEVICEDATA( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
|
||||
#define SELECT_DEVICEDATA1( deviceData, func ) \
|
||||
switch( deviceData->m_type ) \
|
||||
{ \
|
||||
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::Buffer()
|
||||
{
|
||||
m_device = 0;
|
||||
m_size = 0;
|
||||
m_ptr = 0;
|
||||
|
||||
m_uav = 0;
|
||||
m_srv = 0;
|
||||
|
||||
m_allocated = false;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::Buffer(const Device* deviceData, int nElems, BufferType type )
|
||||
{
|
||||
m_device = 0;
|
||||
allocate( deviceData, nElems, type );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Buffer<T>::~Buffer()
|
||||
{
|
||||
if( m_allocated )
|
||||
{
|
||||
if( m_device )
|
||||
SELECT_DEVICEDATA( m_device->m_type, deallocate( this ) );
|
||||
}
|
||||
|
||||
m_device = 0;
|
||||
m_ptr = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::setRawPtr( const Device* device, T* ptr, int size, BufferType type )
|
||||
{
|
||||
ADLASSERT( m_device == 0 );
|
||||
ADLASSERT( type == BUFFER ); // todo. implement
|
||||
ADLASSERT( device->m_type != TYPE_DX11 ); // todo. implement set srv, uav
|
||||
|
||||
m_device = device;
|
||||
m_ptr = ptr;
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::allocate(const Device* deviceData, int nElems, BufferType type )
|
||||
{
|
||||
ADLASSERT( m_device == 0 );
|
||||
m_device = deviceData;
|
||||
m_size = 0;
|
||||
m_ptr = 0;
|
||||
|
||||
m_uav = 0;
|
||||
m_srv = 0;
|
||||
|
||||
SELECT_DEVICEDATA( m_device->m_type, allocate( this, nElems, type ) );
|
||||
m_allocated = true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::write(T* hostPtr, int nElems, int offsetNElems)
|
||||
{
|
||||
ADLASSERT( nElems+offsetNElems <= m_size );
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, hostPtr, nElems, offsetNElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::read(T* hostPtr, int nElems, int offsetNElems) const
|
||||
{
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(hostPtr,this, nElems, offsetNElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::write(Buffer<T>& src, int nElems)
|
||||
{
|
||||
ADLASSERT( nElems <= m_size );
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, &src, nElems) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Buffer<T>::read(Buffer<T>& dst, int nElems) const
|
||||
{
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(&dst, this, nElems) );
|
||||
}
|
||||
/*
|
||||
template<typename T>
|
||||
Buffer<T>& Buffer<T>::operator = ( const Buffer<T>& buffer )
|
||||
{
|
||||
// ADLASSERT( buffer.m_size <= m_size );
|
||||
|
||||
SELECT_DEVICEDATA( m_device->m_type, copy(this, &buffer, min2( m_size, buffer.m_size) ) );
|
||||
|
||||
return *this;
|
||||
}
|
||||
*/
|
||||
|
||||
template<DeviceType TYPE, bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
typename Buffer<T>* BufferUtils::map(const Device* device, const Buffer<T>* in, int copySize)
|
||||
{
|
||||
Buffer<T>* native;
|
||||
ADLASSERT( device->m_type == TYPE );
|
||||
|
||||
if( in->getType() == TYPE )
|
||||
native = (Buffer<T>*)in;
|
||||
else
|
||||
{
|
||||
ADLASSERT( copySize <= in->getSize() );
|
||||
copySize = (copySize==-1)? in->getSize() : copySize;
|
||||
|
||||
native = new Buffer<T>( device, copySize );
|
||||
if( COPY )
|
||||
{
|
||||
if( in->getType() == TYPE_HOST )
|
||||
native->write( in->m_ptr, copySize );
|
||||
else if( native->getType() == TYPE_HOST )
|
||||
{
|
||||
in->read( native->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( in->m_device );
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T[copySize];
|
||||
in->read( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( in->m_device );
|
||||
native->write( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return native;
|
||||
}
|
||||
|
||||
template<bool COPY, typename T>
|
||||
__inline
|
||||
static
|
||||
void BufferUtils::unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize )
|
||||
{
|
||||
if( native != orig )
|
||||
{
|
||||
if( COPY )
|
||||
{
|
||||
copySize = (copySize==-1)? orig->getSize() : copySize;
|
||||
ADLASSERT( copySize <= orig->getSize() );
|
||||
if( orig->getType() == TYPE_HOST )
|
||||
{
|
||||
native->read( orig->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
}
|
||||
else if( native->getType() == TYPE_HOST )
|
||||
{
|
||||
Buffer<T>* dst = (Buffer<T>*)orig;
|
||||
dst->write( native->m_ptr, copySize );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T[copySize];
|
||||
native->read( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( native->m_device );
|
||||
Buffer<T>* dst = (Buffer<T>*)orig;
|
||||
dst->write( tmp, copySize );
|
||||
DeviceUtils::waitForCompletion( dst->m_device );
|
||||
delete [] tmp;
|
||||
}
|
||||
}
|
||||
delete native;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T& HostBuffer<T>::operator[](int idx)
|
||||
{
|
||||
return m_ptr[idx];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T& HostBuffer<T>::operator[](int idx) const
|
||||
{
|
||||
return m_ptr[idx];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
HostBuffer<T>& HostBuffer<T>::operator = ( const Buffer<T>& device )
|
||||
{
|
||||
ADLASSERT( device.m_size <= m_size );
|
||||
|
||||
SELECT_DEVICEDATA1( device.m_device, copy( m_ptr, &device, device.m_size ) );
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#undef SELECT_DEVICEDATA
|
||||
|
||||
};
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
//ADL_ENABLE_CL and ADL_ENABLE_DX11 can be set in the build system using C/C++ preprocessor defines
|
||||
//#define ADL_ENABLE_CL
|
||||
//#define ADL_ENABLE_DX11
|
||||
|
||||
//#define ADL_CL_FORCE_UNCACHE_KERNEL
|
||||
#define ADL_CL_DUMP_MEMORY_LOG
|
||||
|
||||
//load the kernels from string instead of loading them from file
|
||||
#define ADL_LOAD_KERNEL_FROM_STRING
|
||||
#define ADL_DUMP_DX11_ERROR
|
||||
80
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h
Normal file
80
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlError.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_ERROR_H
|
||||
#define ADL_ERROR_H
|
||||
|
||||
#if defined(ADL_DUMP_DX11_ERROR)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#ifdef _DEBUG
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define ADLASSERT(x) if(!(x)){__debugbreak(); }
|
||||
#else
|
||||
#define ADLASSERT(x) if(x){}
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
|
||||
#else
|
||||
#define COMPILE_TIME_ASSERT(x)
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
va_list arg;
|
||||
va_start(arg, fmt);
|
||||
#if defined(ADL_DUMP_DX11_ERROR)
|
||||
const int size = 1024*10;
|
||||
char buf[size];
|
||||
vsprintf_s( buf, size, fmt, arg );
|
||||
#ifdef UNICODE
|
||||
WCHAR wbuf[size];
|
||||
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
|
||||
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
|
||||
|
||||
// swprintf_s( wbuf, 256, L"%s", buf );
|
||||
OutputDebugString( wbuf );
|
||||
#else
|
||||
OutputDebugString( buf );
|
||||
#endif
|
||||
#else
|
||||
vprintf(fmt, arg);
|
||||
#endif
|
||||
va_end(arg);
|
||||
}
|
||||
#else
|
||||
__inline
|
||||
void debugPrintf(const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
142
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h
Normal file
142
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.h
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#ifndef ADL_KERNEL_H
|
||||
#define ADL_KERNEL_H
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
//==========================
|
||||
// Kernel
|
||||
//==========================
|
||||
struct Kernel
|
||||
{
|
||||
DeviceType m_type;
|
||||
void* m_kernel;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// KernelManager
|
||||
//==========================
|
||||
class KernelManager
|
||||
{
|
||||
public:
|
||||
typedef std::map<std::string, Kernel*> KMap;
|
||||
|
||||
__inline
|
||||
~KernelManager();
|
||||
|
||||
__inline
|
||||
// static
|
||||
Kernel* query(const Device* dd, const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL,
|
||||
bool cacheKernel = true);
|
||||
|
||||
public:
|
||||
KMap m_map;
|
||||
};
|
||||
|
||||
//==========================
|
||||
// Launcher
|
||||
//==========================
|
||||
class Launcher
|
||||
{
|
||||
public:
|
||||
struct BufferInfo
|
||||
{
|
||||
BufferInfo(){}
|
||||
template<typename T>
|
||||
BufferInfo(Buffer<T>* buff, bool isReadOnly = false): m_buffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
void* m_buffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
__inline
|
||||
Launcher(const Device* dd, char* fileName, char* funcName, char* option = NULL);
|
||||
__inline
|
||||
Launcher(const Device* dd, Kernel* kernel);
|
||||
__inline
|
||||
void setBuffers( BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
void setConst( Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
void launch1D( int numThreads, int localSize = 64 );
|
||||
__inline
|
||||
void launch2D( int numThreadsX, int numThreadsY, int localSizeX = 8, int localSizeY = 8 );
|
||||
|
||||
public:
|
||||
enum
|
||||
{
|
||||
CONST_BUFFER_SIZE = 512,
|
||||
};
|
||||
|
||||
const Device* m_deviceData;
|
||||
Kernel* m_kernel;
|
||||
int m_idx;
|
||||
int m_idxRw;
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class KernelBuilder
|
||||
{
|
||||
public:
|
||||
|
||||
__inline
|
||||
KernelBuilder(): m_ptr(0){}
|
||||
|
||||
__inline
|
||||
void setFromFile( const Device* deviceData, const char* fileName, const char* option = NULL, bool addExtension = false,
|
||||
bool cacheKernel = true);
|
||||
|
||||
__inline
|
||||
void setFromSrc( const Device* deviceData, const char* src, const char* option = NULL );
|
||||
|
||||
__inline
|
||||
void setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option );
|
||||
|
||||
|
||||
__inline
|
||||
void createKernel( const char* funcName, Kernel& kernelOut );
|
||||
|
||||
__inline
|
||||
~KernelBuilder();
|
||||
// todo. implemement in kernel destructor?
|
||||
__inline
|
||||
static void deleteKernel( Kernel& kernel );
|
||||
|
||||
private:
|
||||
enum
|
||||
{
|
||||
MAX_PATH_LENGTH = 260,
|
||||
};
|
||||
const Device* m_deviceData;
|
||||
#ifdef UNICODE
|
||||
wchar_t m_path[MAX_PATH_LENGTH];
|
||||
#else
|
||||
char m_path[MAX_PATH_LENGTH];
|
||||
#endif
|
||||
void* m_ptr;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif //ADL_KERNEL_H
|
||||
223
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl
Normal file
223
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/AdlKernel.inl
Normal file
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#ifdef ADL_ENABLE_CL
|
||||
#include <Adl/CL/AdlKernelUtilsCL.inl>
|
||||
#endif
|
||||
#ifdef ADL_ENABLE_DX11
|
||||
#include <Adl/DX11/AdlKernelUtilsDX11.inl>
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
//==========================
|
||||
// KernelManager
|
||||
//==========================
|
||||
Kernel* KernelManager::query(const Device* dd, const char* fileName, const char* funcName, const char* option, const char* src,
|
||||
bool cacheKernel)
|
||||
{
|
||||
printf("compiling kernel %s",funcName);
|
||||
const int charSize = 1024*2;
|
||||
KernelManager* s_kManager = this;
|
||||
|
||||
char fullFineName[charSize];
|
||||
switch( dd->m_type )
|
||||
{
|
||||
case TYPE_CL:
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
sprintf_s(fullFineName,charSize,"%s.cl", fileName);
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
sprintf_s(fullFineName,charSize,"%s.hlsl", fileName);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
|
||||
char mapName[charSize];
|
||||
{
|
||||
if( option )
|
||||
sprintf_s(mapName, charSize, "%d%s%s%s", (int)dd->getContext(), fullFineName, funcName, option);
|
||||
else
|
||||
sprintf_s(mapName, charSize, "%d%s%s", (int)dd->getContext(), fullFineName, funcName);
|
||||
}
|
||||
|
||||
std::string str(mapName);
|
||||
|
||||
KMap::iterator iter = s_kManager->m_map.find( str );
|
||||
|
||||
Kernel* kernelOut;
|
||||
if( iter == s_kManager->m_map.end() )
|
||||
{
|
||||
kernelOut = new Kernel();
|
||||
|
||||
switch( dd->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
{
|
||||
KernelBuilder<TYPE_CL> builder;
|
||||
if( src )
|
||||
if (cacheKernel)
|
||||
{
|
||||
builder.setFromSrcCached( dd, src, fileName, option );
|
||||
} else
|
||||
{
|
||||
builder.setFromSrc( dd, src, option );
|
||||
}
|
||||
else
|
||||
builder.setFromFile( dd, fileName, option, true, cacheKernel );
|
||||
builder.createKernel( funcName, *kernelOut );
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
{
|
||||
KernelBuilder<TYPE_DX11> builder;
|
||||
if( src )
|
||||
builder.setFromSrc( dd, src, option );
|
||||
else
|
||||
builder.setFromFile( dd, fileName, option, true, cacheKernel );
|
||||
builder.createKernel( funcName, *kernelOut );
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
s_kManager->m_map.insert( KMap::value_type(str,kernelOut) );
|
||||
}
|
||||
else
|
||||
{
|
||||
kernelOut = iter->second;
|
||||
}
|
||||
|
||||
printf(" ready\n");
|
||||
return kernelOut;
|
||||
}
|
||||
|
||||
KernelManager::~KernelManager()
|
||||
{
|
||||
for(KMap::iterator iter = m_map.begin(); iter != m_map.end(); iter++)
|
||||
{
|
||||
Kernel* k = iter->second;
|
||||
switch( k->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
KernelBuilder<TYPE_CL>::deleteKernel( *k );
|
||||
delete k;
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
KernelBuilder<TYPE_DX11>::deleteKernel( *k );
|
||||
delete k;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
//==========================
|
||||
// Launcher
|
||||
//==========================
|
||||
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: LauncherCL::func; break; \
|
||||
case TYPE_DX11: LauncherDX11::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#else
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_DX11: LauncherDX11::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
case TYPE_CL: LauncherCL::func; break; \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#else
|
||||
#define SELECT_LAUNCHER( type, func ) \
|
||||
switch( type ) \
|
||||
{ \
|
||||
default: ADLASSERT(0); break; \
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Launcher::Launcher(const Device *dd, char *fileName, char *funcName, char *option)
|
||||
{
|
||||
m_kernel = dd->getKernel( fileName, funcName, option );
|
||||
m_deviceData = dd;
|
||||
m_idx = 0;
|
||||
m_idxRw = 0;
|
||||
}
|
||||
|
||||
Launcher::Launcher(const Device* dd, Kernel* kernel)
|
||||
{
|
||||
m_kernel = kernel;
|
||||
m_deviceData = dd;
|
||||
m_idx = 0;
|
||||
m_idxRw = 0;
|
||||
}
|
||||
|
||||
void Launcher::setBuffers( BufferInfo* buffInfo, int n )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, setBuffers( this, buffInfo, n ) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Launcher::setConst( Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, setConst( this, constBuff, consts ) );
|
||||
}
|
||||
|
||||
void Launcher::launch1D( int numThreads, int localSize )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreads, 1, localSize, 1 ) );
|
||||
}
|
||||
|
||||
void Launcher::launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreadsX, numThreadsY, localSizeX, localSizeY ) );
|
||||
}
|
||||
|
||||
#undef SELECT_LAUNCHER
|
||||
|
||||
};
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct StopwatchBase
|
||||
{
|
||||
__inline
|
||||
StopwatchBase(): m_device(0){}
|
||||
__inline
|
||||
StopwatchBase( const Device* deviceData ){ init(deviceData); }
|
||||
__inline
|
||||
virtual ~StopwatchBase(){}
|
||||
|
||||
__inline
|
||||
virtual void init( const Device* deviceData ) = 0;
|
||||
__inline
|
||||
virtual void start() = 0;
|
||||
__inline
|
||||
virtual void split() = 0;
|
||||
__inline
|
||||
virtual void stop() = 0;
|
||||
__inline
|
||||
virtual float getMs(int index=0) = 0;
|
||||
__inline
|
||||
virtual void getMs( float* times, int capacity ) = 0;
|
||||
__inline
|
||||
int getNIntervals() const{ return m_idx-1;}
|
||||
|
||||
enum
|
||||
{
|
||||
CAPACITY = 64,
|
||||
};
|
||||
|
||||
const Device* m_device;
|
||||
int m_idx;
|
||||
};
|
||||
|
||||
struct Stopwatch
|
||||
{
|
||||
__inline
|
||||
Stopwatch( const Device* deviceData = NULL ) { m_impl=0; if(deviceData) init(deviceData);}
|
||||
__inline
|
||||
~Stopwatch();
|
||||
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start(){if(!m_impl) init(0); m_impl->start();}
|
||||
__inline
|
||||
void split(){m_impl->split();}
|
||||
__inline
|
||||
void stop(){m_impl->stop();}
|
||||
__inline
|
||||
float getMs(){ return m_impl->getMs();}
|
||||
__inline
|
||||
void getMs( float* times, int capacity ){m_impl->getMs(times, capacity);}
|
||||
__inline
|
||||
int getNIntervals() const{return m_impl->getNIntervals();}
|
||||
|
||||
StopwatchBase* m_impl;
|
||||
};
|
||||
|
||||
};
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
void Stopwatch::init( const Device* deviceData )
|
||||
{
|
||||
ADLASSERT( m_impl == 0 );
|
||||
|
||||
if( deviceData )
|
||||
{
|
||||
switch( deviceData->m_type )
|
||||
{
|
||||
#if defined(ADL_ENABLE_CL)
|
||||
case TYPE_CL:
|
||||
m_impl = new StopwatchHost;//StopwatchCL
|
||||
break;
|
||||
#endif
|
||||
#if defined(ADL_ENABLE_DX11)
|
||||
case TYPE_DX11:
|
||||
m_impl = new StopwatchHost;//StopwatchDX11;
|
||||
break;
|
||||
#endif
|
||||
case TYPE_HOST:
|
||||
m_impl = new StopwatchHost;
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
m_impl = new StopwatchHost;
|
||||
}
|
||||
m_impl->init( deviceData );
|
||||
}
|
||||
|
||||
Stopwatch::~Stopwatch()
|
||||
{
|
||||
if( m_impl == 0 ) return;
|
||||
delete m_impl;
|
||||
}
|
||||
|
||||
};
|
||||
384
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl
Normal file
384
Extras/RigidBodyGpuPipeline/opencl/primitives/Adl/CL/AdlCL.inl
Normal file
@@ -0,0 +1,384 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
#pragma comment(lib,"OpenCL.lib")
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct DeviceCL : public Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
|
||||
__inline
|
||||
DeviceCL() : Device( TYPE_CL ), m_kernelManager(0){}
|
||||
__inline
|
||||
void* getContext() const { return m_context; }
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems = 0,int dstOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
|
||||
__inline
|
||||
void getDeviceName( char nameOut[128] ) const;
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices();
|
||||
|
||||
__inline
|
||||
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_NUM_DEVICES = 6,
|
||||
};
|
||||
|
||||
cl_context m_context;
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_device_id m_deviceIdx;
|
||||
|
||||
KernelManager* m_kernelManager;
|
||||
};
|
||||
|
||||
//===
|
||||
//===
|
||||
|
||||
void DeviceCL::initialize(const Config& cfg)
|
||||
{
|
||||
// DeviceUtils::create( cfg, (DeviceCL*)this );
|
||||
{
|
||||
// dd = new DeviceCL();
|
||||
|
||||
DeviceCL* deviceData = (DeviceCL*)this;
|
||||
|
||||
// cl_device_type deviceType = (driverType == DRIVER_HARDWARE)? CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU;
|
||||
cl_device_type deviceType = (cfg.m_type== Config::DEVICE_GPU)? CL_DEVICE_TYPE_GPU: CL_DEVICE_TYPE_CPU;
|
||||
// int numContextQueuePairsToCreate = 1;
|
||||
bool enableProfiling = false;
|
||||
#ifdef _DEBUG
|
||||
enableProfiling = true;
|
||||
#endif
|
||||
cl_int status;
|
||||
|
||||
cl_platform_id platform;
|
||||
{
|
||||
cl_uint nPlatforms = 0;
|
||||
status = clGetPlatformIDs(0, NULL, &nPlatforms);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_platform_id pIdx[5];
|
||||
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_uint atiIdx = -1;
|
||||
cl_uint intelIdx = -1;
|
||||
cl_uint nvIdx = -1;
|
||||
|
||||
for(cl_uint i=0; i<nPlatforms; i++)
|
||||
{
|
||||
char buff[512];
|
||||
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
//skip the platform if there are no devices available
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( pIdx[i], deviceType, 0, NULL, &numDevice );
|
||||
if (numDevice>0)
|
||||
{
|
||||
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
|
||||
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
|
||||
if( strcmp( buff, "Intel(R) Corporation" )==0 ) intelIdx = i;
|
||||
}
|
||||
}
|
||||
|
||||
if( deviceType == CL_DEVICE_TYPE_GPU )
|
||||
{
|
||||
switch( cfg.m_vendor )
|
||||
{
|
||||
case DeviceUtils::Config::VD_AMD:
|
||||
if( atiIdx == -1 && nvIdx != -1 ) goto USE_NV_GPU;
|
||||
USE_AMD_GPU:
|
||||
ADLASSERT(atiIdx != -1 );
|
||||
platform = pIdx[atiIdx];
|
||||
break;
|
||||
case DeviceUtils::Config::VD_NV:
|
||||
if( atiIdx != -1 && nvIdx == -1 ) goto USE_AMD_GPU;
|
||||
USE_NV_GPU:
|
||||
ADLASSERT(nvIdx != -1 );
|
||||
platform = pIdx[nvIdx];
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
else if( deviceType == CL_DEVICE_TYPE_CPU )
|
||||
{
|
||||
switch( cfg.m_vendor )
|
||||
{
|
||||
case DeviceUtils::Config::VD_AMD:
|
||||
ADLASSERT(atiIdx != -1 );
|
||||
platform = pIdx[atiIdx];
|
||||
break;
|
||||
case DeviceUtils::Config::VD_INTEL:
|
||||
ADLASSERT(intelIdx != -1 );
|
||||
platform = pIdx[intelIdx];
|
||||
break;
|
||||
default:
|
||||
ADLASSERT(0);
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
|
||||
|
||||
// ADLASSERT( cfg.m_deviceIdx < (int)numDevice );
|
||||
|
||||
debugPrintf("CL: %d %s Devices ", numDevice, (deviceType==CL_DEVICE_TYPE_GPU)? "GPU":"CPU");
|
||||
|
||||
// numContextQueuePairsToCreate = min( (int)numDevice, numContextQueuePairsToCreate );
|
||||
// numContextQueuePairsToCreate = ( (int)numDevice < numContextQueuePairsToCreate )? numDevice : numContextQueuePairsToCreate;
|
||||
|
||||
cl_device_id deviceIds[ MAX_NUM_DEVICES ];
|
||||
|
||||
status = clGetDeviceIDs( platform, deviceType, numDevice, deviceIds, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{ int i = min( (int)numDevice-1, cfg.m_deviceIdx );
|
||||
m_deviceIdx = deviceIds[i];
|
||||
deviceData->m_context = clCreateContext( NULL, 1, &deviceData->m_deviceIdx, NULL, NULL, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char buff[512];
|
||||
status = clGetDeviceInfo( deviceData->m_deviceIdx, CL_DEVICE_NAME, sizeof(buff), &buff, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
debugPrintf("[%s]\n", buff);
|
||||
|
||||
deviceData->m_commandQueue = clCreateCommandQueue( deviceData->m_context, deviceData->m_deviceIdx, (enableProfiling)?CL_QUEUE_PROFILING_ENABLE:NULL, NULL );
|
||||
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
// status = clSetCommandQueueProperty( commandQueue, CL_QUEUE_PROFILING_ENABLE, CL_TRUE, 0 );
|
||||
// CLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if(0)
|
||||
{
|
||||
cl_bool image_support;
|
||||
clGetDeviceInfo(deviceData->m_deviceIdx, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
|
||||
debugPrintf(" CL_DEVICE_IMAGE_SUPPORT : %s\n", image_support?"Yes":"No");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_kernelManager = new KernelManager;
|
||||
}
|
||||
|
||||
void DeviceCL::release()
|
||||
{
|
||||
clReleaseCommandQueue( m_commandQueue );
|
||||
clReleaseContext( m_context );
|
||||
|
||||
if( m_kernelManager ) delete m_kernelManager;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
buf->m_device = this;
|
||||
buf->m_size = nElems;
|
||||
buf->m_ptr = 0;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST ) return;
|
||||
|
||||
#if defined(ADL_CL_DUMP_MEMORY_LOG)
|
||||
char deviceName[256];
|
||||
getDeviceName( deviceName );
|
||||
printf( "adlCLMemoryLog %s : %3.2fMB Allocation: %3.2fKB ", deviceName, m_memoryUsage/1024.f/1024.f, sizeof(T)*nElems/1024.f );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
int sz=sizeof(T)*nElems;
|
||||
|
||||
cl_int status = 0;
|
||||
if( type == BufferBase::BUFFER_ZERO_COPY )
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sz, 0, &status );
|
||||
else if( type == BufferBase::BUFFER_RAW )
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_WRITE_ONLY, sz, 0, &status );
|
||||
else
|
||||
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE, sz, 0, &status );
|
||||
|
||||
m_memoryUsage += buf->m_size*sizeof(T);
|
||||
#if defined(ADL_CL_DUMP_MEMORY_LOG)
|
||||
printf( "%s\n", (status==CL_SUCCESS)? "Succeed": "Failed" );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
if( buf->m_ptr )
|
||||
{
|
||||
m_memoryUsage -= buf->m_size*sizeof(T);
|
||||
clReleaseMemObject( (cl_mem)buf->m_ptr );
|
||||
}
|
||||
buf->m_device = 0;
|
||||
buf->m_size = 0;
|
||||
buf->m_ptr = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems,int dstOffsetNElems )
|
||||
{
|
||||
if( dst->m_device->m_type == TYPE_CL && src->m_device->m_type == TYPE_CL )
|
||||
{
|
||||
cl_int status = 0;
|
||||
status = clEnqueueCopyBuffer( m_commandQueue, (cl_mem)src->m_ptr, (cl_mem)dst->m_ptr, sizeof(T)*srcOffsetNElems, sizeof(T)*dstOffsetNElems, sizeof(T)*nElems, 0, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
else if( src->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_CL );
|
||||
dst->write( src->m_ptr, nElems );
|
||||
}
|
||||
else if( dst->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_CL );
|
||||
src->read( dst->m_ptr, nElems );
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems )
|
||||
{
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, (cl_mem)src->m_ptr, 0, sizeof(T)*srcOffsetNElems, sizeof(T)*nElems,
|
||||
dst, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceCL::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems )
|
||||
{
|
||||
cl_int status = 0;
|
||||
int sz=sizeof(T)*nElems;
|
||||
status = clEnqueueWriteBuffer( m_commandQueue, (cl_mem)dst->m_ptr, 0, sizeof(T)*dstOffsetNElems, sz,
|
||||
src, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void DeviceCL::waitForCompletion() const
|
||||
{
|
||||
clFinish( m_commandQueue );
|
||||
}
|
||||
|
||||
int DeviceCL::getNDevices()
|
||||
{
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
|
||||
cl_int status;
|
||||
|
||||
cl_platform_id platform;
|
||||
{
|
||||
cl_uint nPlatforms = 0;
|
||||
status = clGetPlatformIDs(0, NULL, &nPlatforms);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_platform_id pIdx[5];
|
||||
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
cl_uint nvIdx = -1;
|
||||
cl_uint atiIdx = -1;
|
||||
for(cl_uint i=0; i<nPlatforms; i++)
|
||||
{
|
||||
char buff[512];
|
||||
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
|
||||
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
|
||||
}
|
||||
|
||||
if( deviceType == CL_DEVICE_TYPE_GPU )
|
||||
{
|
||||
if( nvIdx != -1 ) platform = pIdx[nvIdx];
|
||||
else platform = pIdx[atiIdx];
|
||||
}
|
||||
else if( deviceType == CL_DEVICE_TYPE_CPU )
|
||||
{
|
||||
platform = pIdx[atiIdx];
|
||||
}
|
||||
}
|
||||
|
||||
cl_uint numDevice;
|
||||
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
return numDevice;
|
||||
}
|
||||
|
||||
void DeviceCL::getDeviceName( char nameOut[128] ) const
|
||||
{
|
||||
cl_int status;
|
||||
status = clGetDeviceInfo( m_deviceIdx, CL_DEVICE_NAME, sizeof(char)*128, nameOut, NULL );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
Kernel* DeviceCL::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel )const
|
||||
{
|
||||
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,541 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct KernelCL : public Kernel
|
||||
{
|
||||
cl_kernel& getKernel() { return (cl_kernel&)m_kernel; }
|
||||
};
|
||||
|
||||
static const char* strip(const char* name, const char* pattern)
|
||||
{
|
||||
size_t const patlen = strlen(pattern);
|
||||
size_t patcnt = 0;
|
||||
const char * oriptr;
|
||||
const char * patloc;
|
||||
// find how many times the pattern occurs in the original string
|
||||
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
|
||||
{
|
||||
patcnt++;
|
||||
}
|
||||
return oriptr;
|
||||
}
|
||||
|
||||
static bool isFileUpToDate(const char* binaryFileName,const char* srcFileName)
|
||||
|
||||
{
|
||||
bool fileUpToDate = false;
|
||||
|
||||
bool binaryFileValid=false;
|
||||
FILETIME modtimeBinary;
|
||||
|
||||
int nameLength = (int)strlen(binaryFileName)+1;
|
||||
#ifdef UNICODE
|
||||
WCHAR* fName = new WCHAR[nameLength];
|
||||
MultiByteToWideChar(CP_ACP,0,binaryFileName,-1, fName, nameLength);
|
||||
HANDLE binaryFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
delete [] fName;
|
||||
#else
|
||||
HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
#endif
|
||||
if (binaryFileHandle ==INVALID_HANDLE_VALUE)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nCached file not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nCached file path not found %s\n", binaryFileName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
debugPrintf("\nFailed reading cached file with errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
} else
|
||||
{
|
||||
if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
} else
|
||||
{
|
||||
binaryFileValid = true;
|
||||
}
|
||||
CloseHandle(binaryFileHandle);
|
||||
}
|
||||
|
||||
if (binaryFileValid)
|
||||
{
|
||||
#ifdef UNICODE
|
||||
int nameLength = (int)strlen(srcFileName)+1;
|
||||
WCHAR* fName = new WCHAR[nameLength];
|
||||
MultiByteToWideChar(CP_ACP,0,srcFileName,-1, fName, nameLength);
|
||||
HANDLE srcFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
delete [] fName;
|
||||
#else
|
||||
HANDLE srcFileHandle = CreateFile(srcFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
|
||||
#endif
|
||||
if (srcFileHandle!=INVALID_HANDLE_VALUE)
|
||||
{
|
||||
FILETIME modtimeSrc;
|
||||
if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0)
|
||||
{
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
|
||||
}
|
||||
if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime)
|
||||
||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
|
||||
{
|
||||
fileUpToDate=true;
|
||||
} else
|
||||
{
|
||||
debugPrintf("\nCached binary file found (%s), but out-of-date\n",binaryFileName);
|
||||
}
|
||||
CloseHandle(srcFileHandle);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
DWORD errorCode;
|
||||
errorCode = GetLastError();
|
||||
switch (errorCode)
|
||||
{
|
||||
case ERROR_FILE_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nSrc file not found %s\n", srcFileName);
|
||||
break;
|
||||
}
|
||||
case ERROR_PATH_NOT_FOUND:
|
||||
{
|
||||
debugPrintf("\nSrc path not found %s\n", srcFileName);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
debugPrintf("\nnSrc file reading errorCode = %d\n", errorCode);
|
||||
}
|
||||
}
|
||||
ADLASSERT(0);
|
||||
#else
|
||||
//if we cannot find the source, assume it is OK in release builds
|
||||
fileUpToDate = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return fileUpToDate;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
|
||||
bool cacheKernel)
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
|
||||
char fileNameWithExtension[256];
|
||||
|
||||
if( addExtension )
|
||||
sprintf_s( fileNameWithExtension, "%s.cl", fileName );
|
||||
else
|
||||
sprintf_s( fileNameWithExtension, "%s", fileName );
|
||||
|
||||
class File
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
bool open(const char* fileNameWithExtension)
|
||||
{
|
||||
size_t size;
|
||||
char* str;
|
||||
|
||||
// Open file stream
|
||||
std::fstream f(fileNameWithExtension, (std::fstream::in | std::fstream::binary));
|
||||
|
||||
// Check if we have opened file stream
|
||||
if (f.is_open()) {
|
||||
size_t sizeFile;
|
||||
// Find the stream size
|
||||
f.seekg(0, std::fstream::end);
|
||||
size = sizeFile = (size_t)f.tellg();
|
||||
f.seekg(0, std::fstream::beg);
|
||||
|
||||
str = new char[size + 1];
|
||||
if (!str) {
|
||||
f.close();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Read file
|
||||
f.read(str, sizeFile);
|
||||
f.close();
|
||||
str[size] = '\0';
|
||||
|
||||
m_source = str;
|
||||
|
||||
delete[] str;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
const std::string& getSource() const {return m_source;}
|
||||
|
||||
private:
|
||||
std::string m_source;
|
||||
};
|
||||
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
|
||||
bool cacheBinary = cacheKernel;
|
||||
#if defined(ADL_CL_FORCE_UNCACHE_KERNEL)
|
||||
cacheBinary = false;
|
||||
#endif
|
||||
|
||||
char binaryFileName[512];
|
||||
{
|
||||
char deviceName[256];
|
||||
deviceData->getDeviceName(deviceName);
|
||||
char driverVersion[256];
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
|
||||
const char* strippedFileName = strip(fileName,"\\");
|
||||
strippedFileName = strip(strippedFileName,"/");
|
||||
|
||||
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
|
||||
}
|
||||
|
||||
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
if( cacheBinary && upToDate)
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "rb");
|
||||
|
||||
if( file )
|
||||
{
|
||||
fseek( file, 0L, SEEK_END );
|
||||
size_t binarySize = ftell( file );
|
||||
|
||||
rewind( file );
|
||||
char* binary = new char[binarySize];
|
||||
fread( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
|
||||
if (binarySize)
|
||||
{
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
|
||||
delete build_log;
|
||||
ADLASSERT(0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if( !m_ptr )
|
||||
{
|
||||
File kernelFile;
|
||||
ADLASSERT( kernelFile.open( fileNameWithExtension ) );
|
||||
const char* source = kernelFile.getSource().c_str();
|
||||
setFromSrc( m_deviceData, source, option );
|
||||
|
||||
if( cacheBinary )
|
||||
{ // write to binary
|
||||
size_t binarySize;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char* binary = new char[binarySize];
|
||||
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "wb");
|
||||
if (file)
|
||||
{
|
||||
fwrite( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
}
|
||||
}
|
||||
|
||||
delete [] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option )
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
|
||||
bool cacheBinary = true;
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
|
||||
char binaryFileName[512];
|
||||
{
|
||||
char deviceName[256];
|
||||
deviceData->getDeviceName(deviceName);
|
||||
char driverVersion[256];
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
|
||||
|
||||
const char* strippedFileName = strip(fileName,"\\");
|
||||
strippedFileName = strip(strippedFileName,"/");
|
||||
|
||||
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
|
||||
}
|
||||
|
||||
|
||||
char fileNameWithExtension[256];
|
||||
sprintf_s(fileNameWithExtension,"%s.cl",fileName, ".cl");
|
||||
|
||||
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
|
||||
if( cacheBinary )
|
||||
{
|
||||
|
||||
bool fileUpToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
|
||||
|
||||
if( fileUpToDate)
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "rb");
|
||||
if (file)
|
||||
{
|
||||
fseek( file, 0L, SEEK_END );
|
||||
size_t binarySize = ftell( file );
|
||||
rewind( file );
|
||||
char* binary = new char[binarySize];
|
||||
fread( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
|
||||
delete build_log;
|
||||
ADLASSERT(0);
|
||||
}
|
||||
delete[] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( !m_ptr )
|
||||
{
|
||||
|
||||
setFromSrc( deviceData, src, option );
|
||||
|
||||
if( cacheBinary )
|
||||
{ // write to binary
|
||||
cl_uint numAssociatedDevices;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
if (numAssociatedDevices==1)
|
||||
{
|
||||
|
||||
|
||||
size_t binarySize;
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
char* binary = new char[binarySize];
|
||||
|
||||
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
{
|
||||
FILE* file = fopen(binaryFileName, "wb");
|
||||
if (file)
|
||||
{
|
||||
fwrite( binary, sizeof(char), binarySize, file );
|
||||
fclose( file );
|
||||
}
|
||||
}
|
||||
|
||||
delete [] binary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::setFromSrc( const Device* deviceData, const char* src, const char* option )
|
||||
{
|
||||
ADLASSERT( deviceData->m_type == TYPE_CL );
|
||||
m_deviceData = deviceData;
|
||||
const DeviceCL* dd = (const DeviceCL*) deviceData;
|
||||
|
||||
cl_program& program = (cl_program&)m_ptr;
|
||||
cl_int status = 0;
|
||||
size_t srcSize[] = {strlen( src )};
|
||||
program = clCreateProgramWithSource( dd->m_context, 1, &src, srcSize, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, NULL, NULL );
|
||||
if( status != CL_SUCCESS )
|
||||
{
|
||||
char *build_log;
|
||||
size_t ret_val_size;
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
build_log = new char[ret_val_size+1];
|
||||
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
|
||||
debugPrintf("%s\n", build_log);
|
||||
printf("%s\n", build_log);
|
||||
|
||||
ADLASSERT(0);
|
||||
delete build_log;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
KernelBuilder<TYPE_CL>::~KernelBuilder()
|
||||
{
|
||||
cl_program program = (cl_program)m_ptr;
|
||||
clReleaseProgram( program );
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::createKernel( const char* funcName, Kernel& kernelOut )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)&kernelOut;
|
||||
|
||||
cl_program program = (cl_program)m_ptr;
|
||||
cl_int status = 0;
|
||||
clKernel->getKernel() = clCreateKernel(program, funcName, &status );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
|
||||
kernelOut.m_type = TYPE_CL;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_CL>::deleteKernel( Kernel& kernel )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)&kernel;
|
||||
clReleaseKernel( clKernel->getKernel() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
class LauncherCL
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
__inline
|
||||
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
|
||||
};
|
||||
|
||||
void LauncherCL::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
Buffer<int>* buff = (Buffer<int>*)buffInfo[i].m_buffer;
|
||||
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sizeof(cl_mem), &buff->m_ptr );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LauncherCL::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
int sz=sizeof(T);
|
||||
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sz, &consts );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void LauncherCL::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
|
||||
const DeviceCL* ddcl = (const DeviceCL*)launcher->m_deviceData;
|
||||
size_t gRange[3] = {1,1,1};
|
||||
size_t lRange[3] = {1,1,1};
|
||||
lRange[0] = localSizeX;
|
||||
lRange[1] = localSizeY;
|
||||
gRange[0] = max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] *= lRange[0];
|
||||
gRange[1] = max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] *= lRange[1];
|
||||
|
||||
cl_int status = clEnqueueNDRangeKernel( ddcl->m_commandQueue,
|
||||
clKernel->getKernel(), 2, NULL, gRange, lRange, 0,0,0 );
|
||||
ADLASSERT( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
@@ -0,0 +1,512 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#include <d3d11.h>
|
||||
#include <d3dx11.h>
|
||||
#include <d3dcompiler.h>
|
||||
#include <DXGI.h>
|
||||
#pragma comment(lib,"d3dx11.lib")
|
||||
#pragma comment(lib,"d3d11.lib")
|
||||
#pragma comment(lib,"DXGI.lib")
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#define u32 unsigned int
|
||||
|
||||
struct DeviceDX11 : public Device
|
||||
{
|
||||
typedef DeviceUtils::Config Config;
|
||||
|
||||
|
||||
__inline
|
||||
DeviceDX11() : Device( TYPE_DX11 ), m_kernelManager(0){}
|
||||
__inline
|
||||
void* getContext() const { return m_context; }
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
|
||||
__inline
|
||||
void getDeviceName( char nameOut[128] ) const;
|
||||
|
||||
__inline
|
||||
static
|
||||
int getNDevices();
|
||||
|
||||
__inline
|
||||
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
|
||||
|
||||
|
||||
ID3D11DeviceContext* m_context;
|
||||
ID3D11Device* m_device;
|
||||
IDXGISwapChain* m_swapChain;
|
||||
|
||||
KernelManager* m_kernelManager;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct BufferDX11 : public Buffer<T>
|
||||
{
|
||||
ID3D11Buffer* getBuffer() { return (ID3D11Buffer*)m_ptr; }
|
||||
ID3D11UnorderedAccessView* getUAV() { return (ID3D11UnorderedAccessView*)m_uav; }
|
||||
ID3D11ShaderResourceView* getSRV() { return (ID3D11ShaderResourceView*)m_srv; }
|
||||
|
||||
ID3D11Buffer** getBufferPtr() { return (ID3D11Buffer**)&m_ptr; }
|
||||
ID3D11UnorderedAccessView** getUAVPtr() { return (ID3D11UnorderedAccessView**)&m_uav; }
|
||||
ID3D11ShaderResourceView** getSRVPtr() { return (ID3D11ShaderResourceView**)&m_srv; }
|
||||
};
|
||||
|
||||
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
|
||||
|
||||
|
||||
void DeviceDX11::initialize(const Config& cfg)
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
UINT createDeviceFlg = 0;
|
||||
#ifdef _DEBUG
|
||||
createDeviceFlg |= D3D11_CREATE_DEVICE_DEBUG;
|
||||
#endif
|
||||
D3D_FEATURE_LEVEL fl[] = {
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
D3D_FEATURE_LEVEL_10_1,
|
||||
D3D_FEATURE_LEVEL_10_0
|
||||
};
|
||||
|
||||
typedef HRESULT (WINAPI * LPD3D11CREATEDEVICE)( IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, u32, D3D_FEATURE_LEVEL*, UINT, u32, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext** );
|
||||
|
||||
HMODULE moduleD3D11 = 0;
|
||||
#ifdef UNICODE
|
||||
moduleD3D11 = LoadLibrary( L"d3d11.dll" );
|
||||
#else
|
||||
moduleD3D11 = LoadLibrary( "d3d11.dll" );
|
||||
#endif
|
||||
ADLASSERT( moduleD3D11 );
|
||||
|
||||
LPD3D11CREATEDEVICE _DynamicD3D11CreateDevice;
|
||||
_DynamicD3D11CreateDevice = ( LPD3D11CREATEDEVICE )GetProcAddress( moduleD3D11, "D3D11CreateDevice" );
|
||||
|
||||
D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE;
|
||||
// http://msdn.microsoft.com/en-us/library/ff476082(v=VS.85).aspx
|
||||
// If you set the pAdapter parameter to a non-NULL value, you must also set the DriverType parameter to the D3D_DRIVER_TYPE_UNKNOWN value. If you set the pAdapter parameter to a non-NULL value and the DriverType parameter to the D3D_DRIVER_TYPE_HARDWARE value, D3D11CreateDevice returns an HRESULT of E_INVALIDARG.
|
||||
type = D3D_DRIVER_TYPE_UNKNOWN;
|
||||
/*
|
||||
// Create a hardware Direct3D 11 device
|
||||
hr = _DynamicD3D11CreateDevice( NULL,
|
||||
type, NULL, createDeviceFlg,
|
||||
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
|
||||
*/
|
||||
IDXGIAdapter* adapter = NULL;
|
||||
{// get adapter of the index
|
||||
IDXGIFactory* factory = NULL;
|
||||
int targetAdapterIdx = cfg.m_deviceIdx;//min( cfg.m_deviceIdx, getNDevices()-1 );
|
||||
CreateDXGIFactory( __uuidof(IDXGIFactory), (void**)&factory );
|
||||
|
||||
u32 i = 0;
|
||||
while( factory->EnumAdapters( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
|
||||
{
|
||||
if( i== targetAdapterIdx ) break;
|
||||
i++;
|
||||
}
|
||||
factory->Release();
|
||||
}
|
||||
|
||||
// Create a hardware Direct3D 11 device
|
||||
hr = D3D11CreateDevice( adapter,
|
||||
type,
|
||||
NULL, createDeviceFlg,
|
||||
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
// Check if the hardware device supports Compute Shader 4.0
|
||||
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
|
||||
deviceData->m_device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts));
|
||||
|
||||
if( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
|
||||
{
|
||||
SAFE_RELEASE( deviceData->m_context );
|
||||
SAFE_RELEASE( deviceData->m_device );
|
||||
|
||||
debugPrintf("DX11 GPU is not present\n");
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
|
||||
m_kernelManager = new KernelManager;
|
||||
}
|
||||
|
||||
void DeviceDX11::release()
|
||||
{
|
||||
SAFE_RELEASE( m_context );
|
||||
SAFE_RELEASE( m_device );
|
||||
|
||||
if( m_kernelManager ) delete m_kernelManager;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
ADLASSERT( type != BufferBase::BUFFER_ZERO_COPY );
|
||||
|
||||
DeviceDX11* deviceData = this;
|
||||
buf->m_device = deviceData;
|
||||
buf->m_size = nElems;
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
|
||||
|
||||
// if( type & BufferBase::BUFFER )
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST )
|
||||
{
|
||||
ADLASSERT( nElems == 1 );
|
||||
D3D11_BUFFER_DESC constant_buffer_desc;
|
||||
ZeroMemory( &constant_buffer_desc, sizeof(constant_buffer_desc) );
|
||||
// constant_buffer_desc.ByteWidth = NEXTMULTIPLEOF( sizeof(T), 16 );
|
||||
constant_buffer_desc.ByteWidth = (((sizeof(T))/(16) + (((sizeof(T))%(16)==0)?0:1))*(16));
|
||||
// constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
// constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
// constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
constant_buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
constant_buffer_desc.CPUAccessFlags = 0;
|
||||
|
||||
hr = deviceData->m_device->CreateBuffer( &constant_buffer_desc, NULL, dBuf->getBufferPtr() );
|
||||
ADLASSERT( hr == S_OK );
|
||||
return;
|
||||
}
|
||||
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
ZeroMemory(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = nElems * sizeof(T);
|
||||
|
||||
if( type != BufferBase::BUFFER_RAW )
|
||||
{
|
||||
buffer_desc.StructureByteStride = sizeof(T);
|
||||
// buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
}
|
||||
|
||||
if( type == BufferBase::BUFFER_STAGING )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_STAGING;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_INDEX )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_VERTEX )
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
|
||||
buffer_desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
// check this
|
||||
if(type == BufferBase::BUFFER_RAW)
|
||||
{
|
||||
// buffer_desc.BindFlags |= D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER;
|
||||
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; // need this to be used for DispatchIndirect
|
||||
}
|
||||
}
|
||||
hr = deviceData->m_device->CreateBuffer(&buffer_desc, NULL, dBuf->getBufferPtr());
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
if( type == BufferBase::BUFFER_INDEX ) return;
|
||||
|
||||
if( type == BufferBase::BUFFER ||
|
||||
type == BufferBase::BUFFER_RAW ||
|
||||
type == BufferBase::BUFFER_W_COUNTER )
|
||||
{
|
||||
// Create UAVs for all CS buffers
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
|
||||
ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
|
||||
uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
if( type == BufferBase::BUFFER_RAW )
|
||||
{
|
||||
uavbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavbuffer_desc.Buffer.NumElements = buffer_desc.ByteWidth / 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavbuffer_desc.Buffer.NumElements = nElems;
|
||||
}
|
||||
|
||||
if( type == BufferBase::BUFFER_W_COUNTER )
|
||||
{
|
||||
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
|
||||
}
|
||||
|
||||
hr = deviceData->m_device->CreateUnorderedAccessView(dBuf->getBuffer(), &uavbuffer_desc, dBuf->getUAVPtr());
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
// Create SRVs for all CS buffers
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
|
||||
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
|
||||
if( type == BufferBase::BUFFER_RAW )
|
||||
{
|
||||
ADLASSERT( sizeof(T) <= 16 );
|
||||
srvbuffer_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
srvbuffer_desc.Buffer.ElementWidth = nElems;
|
||||
// if ( buffer_desc.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS )
|
||||
// {
|
||||
// srvbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
// srvbuffer_desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
|
||||
// srvbuffer_desc.BufferEx.NumElements = buffer_desc.ByteWidth / 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvbuffer_desc.Buffer.ElementWidth = nElems;
|
||||
}
|
||||
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
|
||||
hr = deviceData->m_device->CreateShaderResourceView(dBuf->getBuffer(), &srvbuffer_desc, dBuf->getSRVPtr());
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
else if( type == BufferBase::BUFFER_APPEND )
|
||||
{
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
|
||||
ZeroMemory( &desc, sizeof(desc) );
|
||||
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
desc.Buffer.FirstElement = 0;
|
||||
|
||||
desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_APPEND;
|
||||
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN; // Format must be must be DXGI_FORMAT_UNKNOWN, when creating a View of a Structured Buffer
|
||||
desc.Buffer.NumElements = buffer_desc.ByteWidth / buffer_desc.StructureByteStride;
|
||||
|
||||
hr = deviceData->m_device->CreateUnorderedAccessView( dBuf->getBuffer(), &desc, dBuf->getUAVPtr() );
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// ADLASSERT(0);
|
||||
// }
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
|
||||
|
||||
if( dBuf->getBuffer() )
|
||||
{
|
||||
dBuf->getBuffer()->Release();
|
||||
dBuf->m_ptr = NULL;
|
||||
}
|
||||
if( dBuf->getUAV() )
|
||||
{
|
||||
dBuf->getUAV()->Release();
|
||||
dBuf->m_uav = NULL;
|
||||
}
|
||||
if( dBuf->getSRV() )
|
||||
{
|
||||
dBuf->getSRV()->Release();
|
||||
dBuf->m_srv = NULL;
|
||||
}
|
||||
buf->m_device = 0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
|
||||
{
|
||||
if( dst->m_device->m_type == TYPE_DX11 || src->m_device->m_type == TYPE_DX11 )
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
BufferDX11<T>* dDst = (BufferDX11<T>*)dst;
|
||||
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = 0*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (0+nElems)*sizeof(T);
|
||||
|
||||
deviceData->m_context->CopySubresourceRegion(
|
||||
dDst->getBuffer(),
|
||||
0, 0, 0, 0,
|
||||
dSrc->getBuffer(),
|
||||
0,
|
||||
&destRegion );
|
||||
|
||||
}
|
||||
else if( src->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_DX11 );
|
||||
dst->write( src->m_ptr, nElems );
|
||||
}
|
||||
else if( dst->m_device->m_type == TYPE_HOST )
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_DX11 );
|
||||
src->read( dst->m_ptr, nElems );
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
|
||||
{
|
||||
DeviceDX11* deviceData = this;
|
||||
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
|
||||
Buffer<T> sBuf( deviceData, nElems, BufferBase::BUFFER_STAGING );
|
||||
BufferDX11<T>* dStagingBuf = (BufferDX11<T>*)&sBuf;
|
||||
|
||||
|
||||
ID3D11Buffer *StagingBuffer = dStagingBuf->getBuffer();
|
||||
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = srcOffsetNElems*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (srcOffsetNElems+nElems)*sizeof(T);
|
||||
|
||||
deviceData->m_context->CopySubresourceRegion(
|
||||
StagingBuffer,
|
||||
0, 0, 0, 0,
|
||||
dSrc->getBuffer(),
|
||||
0,
|
||||
&destRegion);
|
||||
|
||||
deviceData->m_context->Map(StagingBuffer, 0, D3D11_MAP_READ, 0, &MappedVelResource);
|
||||
memcpy(dst, MappedVelResource.pData, nElems*sizeof(T));
|
||||
deviceData->m_context->Unmap(StagingBuffer, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceDX11::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
|
||||
{
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)dst;
|
||||
|
||||
DeviceDX11* deviceData = this;
|
||||
|
||||
D3D11_BOX destRegion;
|
||||
destRegion.left = dstOffsetNElems*sizeof(T);
|
||||
destRegion.front = 0;
|
||||
destRegion.top = 0;
|
||||
destRegion.bottom = 1;
|
||||
destRegion.back = 1;
|
||||
destRegion.right = (dstOffsetNElems+nElems)*sizeof(T);
|
||||
deviceData->m_context->UpdateSubresource(dBuf->getBuffer(), 0, &destRegion, src, 0, 0);
|
||||
}
|
||||
|
||||
void DeviceDX11::waitForCompletion() const
|
||||
{
|
||||
const DeviceDX11* deviceData = this;
|
||||
|
||||
ID3D11Query* syncQuery;
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_EVENT;
|
||||
qDesc.MiscFlags = 0;
|
||||
deviceData->m_device->CreateQuery( &qDesc, &syncQuery );
|
||||
deviceData->m_context->End( syncQuery );
|
||||
while( deviceData->m_context->GetData( syncQuery, 0,0,0 ) == S_FALSE ){}
|
||||
syncQuery->Release();
|
||||
}
|
||||
|
||||
int DeviceDX11::getNDevices()
|
||||
{
|
||||
IDXGIFactory1* factory = NULL;
|
||||
IDXGIAdapter1* adapter = NULL;
|
||||
CreateDXGIFactory1( __uuidof(IDXGIFactory1), (void**)&factory );
|
||||
|
||||
u32 i = 0;
|
||||
while( factory->EnumAdapters1( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
|
||||
{
|
||||
i++;
|
||||
}
|
||||
|
||||
factory->Release();
|
||||
return i;
|
||||
}
|
||||
|
||||
void DeviceDX11::getDeviceName( char nameOut[128] ) const
|
||||
{
|
||||
IDXGIAdapter* adapter;// = getAdapterFromDevice( this );
|
||||
{
|
||||
IDXGIDevice* pDXGIDevice;
|
||||
|
||||
ADLASSERT( m_device->QueryInterface(__uuidof(IDXGIDevice), (void **)&pDXGIDevice) == S_OK );
|
||||
ADLASSERT( pDXGIDevice->GetParent(__uuidof(IDXGIAdapter), (void **)&adapter) == S_OK );
|
||||
|
||||
pDXGIDevice->Release();
|
||||
}
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
adapter->GetDesc( &adapterDesc );
|
||||
|
||||
// wcstombs( nameOut, adapterDesc.Description, 128 );
|
||||
size_t i;
|
||||
wcstombs_s( &i, nameOut, 128, adapterDesc.Description, 128 );
|
||||
}
|
||||
|
||||
Kernel* DeviceDX11::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel ) const
|
||||
{
|
||||
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
|
||||
}
|
||||
|
||||
#undef u32
|
||||
|
||||
#undef SAFE_RELEASE
|
||||
|
||||
};
|
||||
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
|
||||
|
||||
struct KernelDX11 : public Kernel
|
||||
{
|
||||
ID3D11ComputeShader* getKernel() { return (ID3D11ComputeShader*)m_kernel; }
|
||||
ID3D11ComputeShader** getKernelPtr() { return (ID3D11ComputeShader**)&m_kernel; }
|
||||
};
|
||||
|
||||
|
||||
__inline
|
||||
#ifdef UNICODE
|
||||
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) WCHAR* strDestPath,
|
||||
int cchDest,
|
||||
__in LPCWSTR strFilename )
|
||||
#else
|
||||
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) CHAR* strDestPath,
|
||||
int cchDest,
|
||||
__in LPCSTR strFilename )
|
||||
#endif
|
||||
{
|
||||
if( NULL == strFilename || strFilename[0] == 0 || NULL == strDestPath || cchDest < 10 )
|
||||
return E_INVALIDARG;
|
||||
|
||||
// Get the exe name, and exe path
|
||||
#ifdef UNICODE
|
||||
WCHAR strExePath[MAX_PATH] =
|
||||
#else
|
||||
CHAR strExePath[MAX_PATH] =
|
||||
#endif
|
||||
{
|
||||
0
|
||||
};
|
||||
#ifdef UNICODE
|
||||
WCHAR strExeName[MAX_PATH] =
|
||||
#else
|
||||
CHAR strExeName[MAX_PATH] =
|
||||
#endif
|
||||
{
|
||||
0
|
||||
};
|
||||
#ifdef UNICODE
|
||||
WCHAR* strLastSlash = NULL;
|
||||
#else
|
||||
CHAR* strLastSlash = NULL;
|
||||
#endif
|
||||
GetModuleFileName( NULL, strExePath, MAX_PATH );
|
||||
strExePath[MAX_PATH - 1] = 0;
|
||||
#ifdef UNICODE
|
||||
strLastSlash = wcsrchr( strExePath, TEXT( '\\' ) );
|
||||
#else
|
||||
strLastSlash = strrchr( strExePath, TEXT( '\\' ) );
|
||||
#endif
|
||||
if( strLastSlash )
|
||||
{
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strExeName, MAX_PATH, &strLastSlash[1] );
|
||||
#else
|
||||
|
||||
#endif
|
||||
// Chop the exe name from the exe path
|
||||
*strLastSlash = 0;
|
||||
|
||||
// Chop the .exe from the exe name
|
||||
#ifdef UNICODE
|
||||
strLastSlash = wcsrchr( strExeName, TEXT( '.' ) );
|
||||
#else
|
||||
strLastSlash = strrchr( strExeName, TEXT( '.' ) );
|
||||
#endif
|
||||
if( strLastSlash )
|
||||
*strLastSlash = 0;
|
||||
}
|
||||
|
||||
// Search in directories:
|
||||
// .\
|
||||
// %EXE_DIR%\..\..\%EXE_NAME%
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strDestPath, cchDest, strFilename );
|
||||
#else
|
||||
strcpy_s( strDestPath, cchDest, strFilename );
|
||||
#endif
|
||||
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
|
||||
return S_OK;
|
||||
|
||||
// swprintf_s( strDestPath, cchDest, L"%s\\..\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#ifdef UNICODE
|
||||
swprintf_s( strDestPath, cchDest, L"%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#else
|
||||
sprintf_s( strDestPath, cchDest, "%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
|
||||
#endif
|
||||
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
|
||||
return S_OK;
|
||||
|
||||
// On failure, return the file as the path but also return an error code
|
||||
#ifdef UNICODE
|
||||
wcscpy_s( strDestPath, cchDest, strFilename );
|
||||
#else
|
||||
strcpy_s( strDestPath, cchDest, strFilename );
|
||||
#endif
|
||||
|
||||
ADLASSERT( 0 );
|
||||
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
|
||||
bool cacheKernel)
|
||||
{
|
||||
char fileNameWithExtension[256];
|
||||
|
||||
if( addExtension )
|
||||
sprintf_s( fileNameWithExtension, "%s.hlsl", fileName );
|
||||
else
|
||||
sprintf_s( fileNameWithExtension, "%s", fileName );
|
||||
|
||||
m_deviceData = deviceData;
|
||||
|
||||
int nameLength = (int)strlen(fileNameWithExtension)+1;
|
||||
#ifdef UNICODE
|
||||
WCHAR* wfileNameWithExtension = new WCHAR[nameLength];
|
||||
#else
|
||||
CHAR* wfileNameWithExtension = new CHAR[nameLength];
|
||||
#endif
|
||||
memset(wfileNameWithExtension,0,nameLength);
|
||||
#ifdef UNICODE
|
||||
MultiByteToWideChar(CP_ACP,0,fileNameWithExtension,-1, wfileNameWithExtension, nameLength);
|
||||
#else
|
||||
sprintf_s(wfileNameWithExtension, nameLength, "%s", fileNameWithExtension);
|
||||
#endif
|
||||
// swprintf_s(wfileNameWithExtension, nameLength*2, L"%s", fileNameWithExtension);
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
// Finds the correct path for the shader file.
|
||||
// This is only required for this sample to be run correctly from within the Sample Browser,
|
||||
// in your own projects, these lines could be removed safely
|
||||
hr = FindDXSDKShaderFileCch( m_path, MAX_PATH, wfileNameWithExtension );
|
||||
|
||||
delete [] wfileNameWithExtension;
|
||||
|
||||
ADLASSERT( hr == S_OK );
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::setFromSrc( const Device* deviceData, const char* src, const char* option )
|
||||
{
|
||||
m_deviceData = deviceData;
|
||||
m_ptr = (void*)src;
|
||||
m_path[0] = '0';
|
||||
}
|
||||
|
||||
template<>
|
||||
KernelBuilder<TYPE_DX11>::~KernelBuilder()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::createKernel( const char* funcName, Kernel& kernelOut )
|
||||
{
|
||||
const DeviceDX11* deviceData = (const DeviceDX11*)m_deviceData;
|
||||
KernelDX11* dxKernel = (KernelDX11*)&kernelOut;
|
||||
HRESULT hr;
|
||||
|
||||
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
|
||||
#if defined( DEBUG ) || defined( _DEBUG )
|
||||
// Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
|
||||
// Setting this flag improves the shader debugging experience, but still allows
|
||||
// the shaders to be optimized and to run exactly the way they will run in
|
||||
// the release configuration of this program.
|
||||
dwShaderFlags |= D3DCOMPILE_DEBUG;
|
||||
#endif
|
||||
|
||||
const D3D_SHADER_MACRO defines[] =
|
||||
{
|
||||
#ifdef USE_STRUCTURED_BUFFERS
|
||||
"USE_STRUCTURED_BUFFERS", "1",
|
||||
#endif
|
||||
|
||||
#ifdef TEST_DOUBLE
|
||||
"TEST_DOUBLE", "1",
|
||||
#endif
|
||||
NULL, NULL
|
||||
};
|
||||
|
||||
// We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
|
||||
LPCSTR pProfile = ( deviceData->m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0";
|
||||
|
||||
ID3DBlob* pErrorBlob = NULL;
|
||||
ID3DBlob* pBlob = NULL;
|
||||
if( m_path[0] == '0' )
|
||||
{
|
||||
char* src = (char*)m_ptr;
|
||||
hr = D3DX11CompileFromMemory( src, strlen(src), 0, defines, NULL, funcName, pProfile,
|
||||
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
|
||||
}
|
||||
else
|
||||
{
|
||||
hr = D3DX11CompileFromFile( m_path, defines, NULL, funcName, pProfile,
|
||||
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
|
||||
}
|
||||
|
||||
if ( FAILED(hr) )
|
||||
{
|
||||
debugPrintf("%s", (char*)pErrorBlob->GetBufferPointer());
|
||||
}
|
||||
ADLASSERT( hr == S_OK );
|
||||
|
||||
hr = deviceData->m_device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL,
|
||||
dxKernel->getKernelPtr() );
|
||||
|
||||
#if defined(DEBUG) || defined(PROFILE)
|
||||
if ( kernelOut.m_kernel )
|
||||
kernelOut.m_kernel->SetPrivateData( WKPDID_D3DDebugObjectName, lstrlenA(pFunctionName), pFunctionName );
|
||||
#endif
|
||||
|
||||
SAFE_RELEASE( pErrorBlob );
|
||||
SAFE_RELEASE( pBlob );
|
||||
|
||||
kernelOut.m_type = TYPE_DX11;
|
||||
}
|
||||
|
||||
template<>
|
||||
void KernelBuilder<TYPE_DX11>::deleteKernel( Kernel& kernel )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)&kernel;
|
||||
|
||||
if( kernel.m_kernel )
|
||||
{
|
||||
dxKernel->getKernel()->Release();
|
||||
kernel.m_kernel = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class LauncherDX11
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
__inline
|
||||
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
|
||||
template<typename T>
|
||||
__inline
|
||||
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
|
||||
__inline
|
||||
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
|
||||
};
|
||||
|
||||
void LauncherDX11::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
BufferDX11<int>* dBuf = (BufferDX11<int>*)buffInfo[i].m_buffer;
|
||||
if( buffInfo[i].m_isReadOnly )
|
||||
{
|
||||
dddx->m_context->CSSetShaderResources( launcher->m_idx++, 1, dBuf->getSRVPtr() );
|
||||
}
|
||||
else
|
||||
{
|
||||
// todo. cannot initialize append buffer with proper counter value which is the last arg
|
||||
dddx->m_context->CSSetUnorderedAccessViews( launcher->m_idxRw++, 1, dBuf->getUAVPtr(), 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LauncherDX11::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
BufferDX11<T>* dBuf = (BufferDX11<T>*)&constBuff;
|
||||
/*
|
||||
D3D11_MAPPED_SUBRESOURCE MappedResource;
|
||||
dddx->m_context->Map( dBuf->getBuffer(), 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
|
||||
memcpy( MappedResource.pData, &consts, sizeof(T) );
|
||||
dddx->m_context->Unmap( dBuf->getBuffer(), 0 );
|
||||
*/
|
||||
|
||||
dddx->m_context->UpdateSubresource( dBuf->getBuffer(), 0, NULL, &consts, 0, 0 );
|
||||
|
||||
dddx->m_context->CSSetConstantBuffers( 0, 1, dBuf->getBufferPtr() );
|
||||
}
|
||||
|
||||
void LauncherDX11::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
|
||||
{
|
||||
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
|
||||
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
|
||||
|
||||
dddx->m_context->CSSetShader( dxKernel->getKernel(), NULL, 0 );
|
||||
|
||||
int nx, ny, nz;
|
||||
nx = max( 1, (numThreadsX/localSizeX)+(!(numThreadsX%localSizeX)?0:1) );
|
||||
ny = max( 1, (numThreadsY/localSizeY)+(!(numThreadsY%localSizeY)?0:1) );
|
||||
nz = 1;
|
||||
|
||||
dddx->m_context->Dispatch( nx, ny, nz );
|
||||
|
||||
// set 0 to registers
|
||||
{
|
||||
dddx->m_context->CSSetShader( NULL, NULL, 0 );
|
||||
|
||||
if( launcher->m_idxRw )
|
||||
{
|
||||
ID3D11UnorderedAccessView* aUAViewsNULL[ 16 ] = { 0 };
|
||||
dddx->m_context->CSSetUnorderedAccessViews( 0,
|
||||
min( (unsigned int)launcher->m_idxRw, sizeof(aUAViewsNULL)/sizeof(*aUAViewsNULL) ), aUAViewsNULL, NULL );
|
||||
}
|
||||
|
||||
if( launcher->m_idx )
|
||||
{
|
||||
ID3D11ShaderResourceView* ppSRVNULL[16] = { 0 };
|
||||
dddx->m_context->CSSetShaderResources( 0,
|
||||
min( (unsigned int)launcher->m_idx, sizeof(ppSRVNULL)/sizeof(*ppSRVNULL) ), ppSRVNULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef SAFE_RELEASE
|
||||
|
||||
};
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct StopwatchDX11 : public StopwatchBase
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
StopwatchDX11() : StopwatchBase(){}
|
||||
__inline
|
||||
~StopwatchDX11();
|
||||
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start();
|
||||
__inline
|
||||
void split();
|
||||
__inline
|
||||
void stop();
|
||||
__inline
|
||||
float getMs(int index=0);
|
||||
__inline
|
||||
void getMs( float* times, int capacity );
|
||||
|
||||
public:
|
||||
ID3D11Query* m_tQuery[CAPACITY+1];
|
||||
ID3D11Query* m_fQuery;
|
||||
UINT64 m_t[CAPACITY];
|
||||
};
|
||||
|
||||
void StopwatchDX11::init( const Device* deviceData )
|
||||
{
|
||||
ADLASSERT( deviceData->m_type == TYPE_DX11 );
|
||||
m_device = deviceData;
|
||||
{
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
qDesc.MiscFlags = 0;
|
||||
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_fQuery );
|
||||
}
|
||||
for(int i=0; i<CAPACITY+1; i++)
|
||||
{
|
||||
D3D11_QUERY_DESC qDesc;
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
qDesc.MiscFlags = 0;
|
||||
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_tQuery[i] );
|
||||
}
|
||||
}
|
||||
|
||||
StopwatchDX11::~StopwatchDX11()
|
||||
{
|
||||
m_fQuery->Release();
|
||||
for(int i=0; i<CAPACITY+1; i++)
|
||||
{
|
||||
m_tQuery[i]->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void StopwatchDX11::start()
|
||||
{
|
||||
m_idx = 0;
|
||||
((const DeviceDX11*)m_device)->m_context->Begin( m_fQuery );
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
}
|
||||
|
||||
void StopwatchDX11::split()
|
||||
{
|
||||
if( m_idx < CAPACITY )
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
}
|
||||
|
||||
void StopwatchDX11::stop()
|
||||
{
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
|
||||
((const DeviceDX11*)m_device)->m_context->End( m_fQuery );
|
||||
}
|
||||
|
||||
float StopwatchDX11::getMs(int index)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
|
||||
// m_deviceData->m_context->End( m_fQuery );
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
|
||||
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[0], &m_t[index],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[1], &m_t[index+1],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
|
||||
ADLASSERT( d.Disjoint == false );
|
||||
|
||||
float elapsedMs = (m_t[index+1] - m_t[index])/(float)d.Frequency*1000;
|
||||
return elapsedMs;
|
||||
|
||||
}
|
||||
|
||||
void StopwatchDX11::getMs( float* times, int capacity )
|
||||
{
|
||||
ADLASSERT( capacity <= CAPACITY );
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
|
||||
|
||||
for(int i=0; i<m_idx; i++)
|
||||
{
|
||||
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[i], &m_t[i],sizeof(UINT64),0 ) == S_FALSE ){}
|
||||
}
|
||||
|
||||
ADLASSERT( d.Disjoint == false );
|
||||
|
||||
for(int i=0; i<capacity; i++)
|
||||
{
|
||||
times[i] = (m_t[i+1] - m_t[i])/(float)d.Frequency*1000;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
struct DeviceHost : public Device
|
||||
{
|
||||
DeviceHost() : Device( TYPE_HOST ){}
|
||||
|
||||
__inline
|
||||
void initialize(const Config& cfg);
|
||||
__inline
|
||||
void release();
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void deallocate(Buffer<T>* buf);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(T* dst, const Buffer<T>* src, int nElems, int offsetNElems = 0);
|
||||
|
||||
template<typename T>
|
||||
__inline
|
||||
void copy(Buffer<T>* dst, const T* src, int nElems, int offsetNElems = 0);
|
||||
|
||||
__inline
|
||||
void waitForCompletion() const;
|
||||
};
|
||||
|
||||
void DeviceHost::initialize(const Config& cfg)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void DeviceHost::release()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
|
||||
{
|
||||
buf->m_device = this;
|
||||
|
||||
if( type == BufferBase::BUFFER_CONST ) return;
|
||||
|
||||
buf->m_ptr = new T[nElems];
|
||||
ADLASSERT( buf->m_ptr );
|
||||
buf->m_size = nElems;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::deallocate(Buffer<T>* buf)
|
||||
{
|
||||
if( buf->m_ptr ) delete [] buf->m_ptr;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
|
||||
{
|
||||
copy( dst, src->m_ptr, nElems );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
|
||||
{
|
||||
ADLASSERT( src->getType() == TYPE_HOST );
|
||||
memcpy( dst, src->m_ptr+srcOffsetNElems, nElems*sizeof(T) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void DeviceHost::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
|
||||
{
|
||||
ADLASSERT( dst->getType() == TYPE_HOST );
|
||||
memcpy( dst->m_ptr+dstOffsetNElems, src, nElems*sizeof(T) );
|
||||
}
|
||||
|
||||
void DeviceHost::waitForCompletion() const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
class StopwatchHost : public StopwatchBase
|
||||
{
|
||||
public:
|
||||
__inline
|
||||
StopwatchHost();
|
||||
__inline
|
||||
void init( const Device* deviceData );
|
||||
__inline
|
||||
void start();
|
||||
__inline
|
||||
void split();
|
||||
__inline
|
||||
void stop();
|
||||
__inline
|
||||
float getMs(int index=0);
|
||||
__inline
|
||||
void getMs( float* times, int capacity );
|
||||
|
||||
private:
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER m_frequency;
|
||||
LARGE_INTEGER m_t[CAPACITY];
|
||||
#else
|
||||
struct timeval mStartTime;
|
||||
timeval m_t[CAPACITY];
|
||||
#endif
|
||||
};
|
||||
|
||||
__inline
|
||||
StopwatchHost::StopwatchHost()
|
||||
: StopwatchBase()
|
||||
{
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::init( const Device* deviceData )
|
||||
{
|
||||
m_device = deviceData;
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceFrequency( &m_frequency );
|
||||
#else
|
||||
gettimeofday(&mStartTime, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::start()
|
||||
{
|
||||
m_idx = 0;
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceCounter(&m_t[m_idx++]);
|
||||
#else
|
||||
gettimeofday(&m_t[m_idx++], 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::split()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceCounter(&m_t[m_idx++]);
|
||||
#else
|
||||
gettimeofday(&m_t[m_idx++], 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::stop()
|
||||
{
|
||||
split();
|
||||
}
|
||||
|
||||
__inline
|
||||
float StopwatchHost::getMs(int index)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return (float)(1000*(m_t[index+1].QuadPart - m_t[index].QuadPart))/m_frequency.QuadPart;
|
||||
#else
|
||||
return (m_t[index+1].tv_sec - m_t[index].tv_sec) * 1000 +
|
||||
(m_t[index+1].tv_usec - m_t[index].tv_usec) / 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline
|
||||
void StopwatchHost::getMs(float* times, int capacity)
|
||||
{
|
||||
for(int i=0; i<capacity; i++) times[i] = 0.f;
|
||||
|
||||
for(int i=0; i<min(capacity, m_idx-1); i++)
|
||||
{
|
||||
times[i] = getMs(i);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
Reference in New Issue
Block a user