Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

This commit is contained in:
erwin.coumans
2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#include <Adl/Adl.h>
//KernelManager* KernelManager::s_kManager = NULL;

View File

@@ -0,0 +1,235 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_H
#define ADL_H
#pragma warning( disable : 4996 )
#include <Adl/AdlConfig.h>
#include <Adl/AdlError.h>
#include <algorithm>
#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif
namespace adl
{
enum DeviceType
{
TYPE_CL = 0,
TYPE_DX11 = 1,
TYPE_HOST,
};
struct Device;
struct BufferBase
{
enum BufferType
{
BUFFER,
// for dx
BUFFER_CONST,
BUFFER_STAGING,
BUFFER_APPEND,
BUFFER_RAW,
BUFFER_W_COUNTER,
BUFFER_INDEX,
BUFFER_VERTEX,
// for cl
BUFFER_ZERO_COPY,
};
};
class DeviceUtils
{
public:
struct Config
{
enum DeviceType
{
DEVICE_GPU,
DEVICE_CPU,
};
// for CL
enum DeviceVendor
{
VD_AMD,
VD_INTEL,
VD_NV,
};
Config() : m_type(DEVICE_GPU), m_deviceIdx(0), m_vendor(VD_AMD){}
DeviceType m_type;
int m_deviceIdx;
DeviceVendor m_vendor;
};
__inline
static
int getNDevices( DeviceType type );
__inline
static Device* allocate( DeviceType type, Config& cfg );
__inline
static void deallocate( Device* deviceData );
__inline
static void waitForCompletion( const Device* deviceData );
};
//==========================
// DeviceData
//==========================
struct Kernel;
struct Device
{
typedef DeviceUtils::Config Config;
Device( DeviceType type ) : m_type( type ), m_memoryUsage(0)
{
}
virtual void* getContext() const { return 0; }
virtual void initialize(const Config& cfg){}
virtual void release(){}
virtual void waitForCompletion() const {}
virtual void getDeviceName( char nameOut[128] ) const {}
virtual Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true ) const { ADLASSERT(0); return 0;}
virtual unsigned int getUsedMemory() const { return m_memoryUsage; }
DeviceType m_type;
unsigned int m_memoryUsage;
};
//==========================
// Buffer
//==========================
template<typename T>
struct HostBuffer;
// overload each deviceDatas
template<typename T>
struct Buffer : public BufferBase
{
__inline
Buffer();
__inline
Buffer(const Device* device, int nElems, BufferType type = BUFFER );
__inline
virtual ~Buffer();
__inline
void setRawPtr( const Device* device, T* ptr, int size, BufferType type = BUFFER );
__inline
void allocate(const Device* device, int nElems, BufferType type = BUFFER );
__inline
void write(T* hostSrcPtr, int nElems, int dstOffsetNElems = 0);
__inline
void read(T* hostDstPtr, int nElems, int srcOffsetNElems = 0) const;
__inline
void write(Buffer<T>& src, int nElems);
__inline
void read(Buffer<T>& dst, int nElems) const;
// __inline
// Buffer<T>& operator = (const Buffer<T>& buffer);
__inline
int getSize() const { return m_size; }
DeviceType getType() const { ADLASSERT( m_device ); return m_device->m_type; }
const Device* m_device;
int m_size;
T* m_ptr;
// for DX11
void* m_uav;
void* m_srv;
bool m_allocated; // todo. move this to a bit
};
class BufferUtils
{
public:
template<DeviceType TYPE, bool COPY, typename T>
__inline
static
typename Buffer<T>* map(const Device* device, const Buffer<T>* in, int copySize = -1);
template<bool COPY, typename T>
__inline
static
void unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize = -1 );
};
//==========================
// HostBuffer
//==========================
struct DeviceHost;
template<typename T>
struct HostBuffer : public Buffer<T>
{
__inline
HostBuffer():Buffer<T>(){}
__inline
HostBuffer(const Device* device, int nElems, BufferType type = BUFFER ) : Buffer<T>(device, nElems, type) {}
// HostBuffer(const Device* deviceData, T* rawPtr, int nElems);
__inline
T& operator[](int idx);
__inline
const T& operator[](int idx) const;
__inline
T* begin() { return m_ptr; }
__inline
HostBuffer<T>& operator = (const Buffer<T>& device);
};
};
#include <Adl/AdlKernel.h>
#if defined(ADL_ENABLE_CL)
#include <Adl/CL/AdlCL.inl>
#endif
#if defined(ADL_ENABLE_DX11)
#include <Adl/DX11/AdlDX11.inl>
#endif
#include <Adl/Host/AdlHost.inl>
#include <Adl/AdlKernel.inl>
#include <Adl/Adl.inl>
#include <Adl/AdlStopwatch.h>
#include <Adl/Host/AdlStopwatchHost.inl>
#include <Adl/AdlStopwatch.inl>
#endif

View File

@@ -0,0 +1,344 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
int DeviceUtils::getNDevices( DeviceType type )
{
switch( type )
{
#if defined(ADL_ENABLE_CL)
case TYPE_CL:
return DeviceCL::getNDevices();
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
return DeviceDX11::getNDevices();
#endif
default:
return 1;
};
}
Device* DeviceUtils::allocate( DeviceType type, Config& cfg )
{
Device* deviceData;
switch( type )
{
#if defined(ADL_ENABLE_CL)
case TYPE_CL:
deviceData = new DeviceCL();
break;
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
deviceData = new DeviceDX11();
break;
#endif
case TYPE_HOST:
deviceData = new DeviceHost();
break;
default:
ADLASSERT( 0 );
break;
};
deviceData->initialize( cfg );
return deviceData;
}
void DeviceUtils::deallocate( Device* deviceData )
{
ADLASSERT( deviceData->getUsedMemory() == 0 );
deviceData->release();
delete deviceData;
}
void DeviceUtils::waitForCompletion( const Device* deviceData )
{
deviceData->waitForCompletion();
}
#if defined(ADL_ENABLE_DX11)
#if defined(ADL_ENABLE_CL)
#define SELECT_DEVICEDATA( type, func ) \
switch( type ) \
{ \
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
default: ADLASSERT(0); break; \
}
#define SELECT_DEVICEDATA1( deviceData, func ) \
switch( deviceData->m_type ) \
{ \
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
default: ADLASSERT(0); break; \
}
#else
#define SELECT_DEVICEDATA( type, func ) \
switch( type ) \
{ \
case TYPE_DX11: ((DeviceDX11*)m_device)->func; break; \
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
default: ADLASSERT(0); break; \
}
#define SELECT_DEVICEDATA1( deviceData, func ) \
switch( deviceData->m_type ) \
{ \
case TYPE_DX11: ((DeviceDX11*)deviceData)->func; break; \
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
default: ADLASSERT(0); break; \
}
#endif
#else
#if defined(ADL_ENABLE_CL)
#define SELECT_DEVICEDATA( type, func ) \
switch( type ) \
{ \
case TYPE_CL: ((DeviceCL*)m_device)->func; break; \
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
default: ADLASSERT(0); break; \
}
#define SELECT_DEVICEDATA1( deviceData, func ) \
switch( deviceData->m_type ) \
{ \
case TYPE_CL: ((DeviceCL*)deviceData)->func; break; \
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
default: ADLASSERT(0); break; \
}
#else
#define SELECT_DEVICEDATA( type, func ) \
switch( type ) \
{ \
case TYPE_HOST: ((DeviceHost*)m_device)->func; break; \
default: ADLASSERT(0); break; \
}
#define SELECT_DEVICEDATA1( deviceData, func ) \
switch( deviceData->m_type ) \
{ \
case TYPE_HOST: ((DeviceHost*)deviceData)->func; break; \
default: ADLASSERT(0); break; \
}
#endif
#endif
template<typename T>
Buffer<T>::Buffer()
{
m_device = 0;
m_size = 0;
m_ptr = 0;
m_uav = 0;
m_srv = 0;
m_allocated = false;
}
template<typename T>
Buffer<T>::Buffer(const Device* deviceData, int nElems, BufferType type )
{
m_device = 0;
allocate( deviceData, nElems, type );
}
template<typename T>
Buffer<T>::~Buffer()
{
if( m_allocated )
{
if( m_device )
SELECT_DEVICEDATA( m_device->m_type, deallocate( this ) );
}
m_device = 0;
m_ptr = 0;
m_size = 0;
}
template<typename T>
void Buffer<T>::setRawPtr( const Device* device, T* ptr, int size, BufferType type )
{
ADLASSERT( m_device == 0 );
ADLASSERT( type == BUFFER ); // todo. implement
ADLASSERT( device->m_type != TYPE_DX11 ); // todo. implement set srv, uav
m_device = device;
m_ptr = ptr;
m_size = size;
}
template<typename T>
void Buffer<T>::allocate(const Device* deviceData, int nElems, BufferType type )
{
ADLASSERT( m_device == 0 );
m_device = deviceData;
m_size = 0;
m_ptr = 0;
m_uav = 0;
m_srv = 0;
SELECT_DEVICEDATA( m_device->m_type, allocate( this, nElems, type ) );
m_allocated = true;
}
template<typename T>
void Buffer<T>::write(T* hostPtr, int nElems, int offsetNElems)
{
ADLASSERT( nElems+offsetNElems <= m_size );
SELECT_DEVICEDATA( m_device->m_type, copy(this, hostPtr, nElems, offsetNElems) );
}
template<typename T>
void Buffer<T>::read(T* hostPtr, int nElems, int offsetNElems) const
{
SELECT_DEVICEDATA( m_device->m_type, copy(hostPtr,this, nElems, offsetNElems) );
}
template<typename T>
void Buffer<T>::write(Buffer<T>& src, int nElems)
{
ADLASSERT( nElems <= m_size );
SELECT_DEVICEDATA( m_device->m_type, copy(this, &src, nElems) );
}
template<typename T>
void Buffer<T>::read(Buffer<T>& dst, int nElems) const
{
SELECT_DEVICEDATA( m_device->m_type, copy(&dst, this, nElems) );
}
/*
template<typename T>
Buffer<T>& Buffer<T>::operator = ( const Buffer<T>& buffer )
{
// ADLASSERT( buffer.m_size <= m_size );
SELECT_DEVICEDATA( m_device->m_type, copy(this, &buffer, min2( m_size, buffer.m_size) ) );
return *this;
}
*/
template<DeviceType TYPE, bool COPY, typename T>
__inline
static
typename Buffer<T>* BufferUtils::map(const Device* device, const Buffer<T>* in, int copySize)
{
Buffer<T>* native;
ADLASSERT( device->m_type == TYPE );
if( in->getType() == TYPE )
native = (Buffer<T>*)in;
else
{
ADLASSERT( copySize <= in->getSize() );
copySize = (copySize==-1)? in->getSize() : copySize;
native = new Buffer<T>( device, copySize );
if( COPY )
{
if( in->getType() == TYPE_HOST )
native->write( in->m_ptr, copySize );
else if( native->getType() == TYPE_HOST )
{
in->read( native->m_ptr, copySize );
DeviceUtils::waitForCompletion( in->m_device );
}
else
{
T* tmp = new T[copySize];
in->read( tmp, copySize );
DeviceUtils::waitForCompletion( in->m_device );
native->write( tmp, copySize );
DeviceUtils::waitForCompletion( native->m_device );
delete [] tmp;
}
}
}
return native;
}
template<bool COPY, typename T>
__inline
static
void BufferUtils::unmap( Buffer<T>* native, const Buffer<T>* orig, int copySize )
{
if( native != orig )
{
if( COPY )
{
copySize = (copySize==-1)? orig->getSize() : copySize;
ADLASSERT( copySize <= orig->getSize() );
if( orig->getType() == TYPE_HOST )
{
native->read( orig->m_ptr, copySize );
DeviceUtils::waitForCompletion( native->m_device );
}
else if( native->getType() == TYPE_HOST )
{
Buffer<T>* dst = (Buffer<T>*)orig;
dst->write( native->m_ptr, copySize );
DeviceUtils::waitForCompletion( dst->m_device );
}
else
{
T* tmp = new T[copySize];
native->read( tmp, copySize );
DeviceUtils::waitForCompletion( native->m_device );
Buffer<T>* dst = (Buffer<T>*)orig;
dst->write( tmp, copySize );
DeviceUtils::waitForCompletion( dst->m_device );
delete [] tmp;
}
}
delete native;
}
}
template<typename T>
T& HostBuffer<T>::operator[](int idx)
{
return m_ptr[idx];
}
template<typename T>
const T& HostBuffer<T>::operator[](int idx) const
{
return m_ptr[idx];
}
template<typename T>
HostBuffer<T>& HostBuffer<T>::operator = ( const Buffer<T>& device )
{
ADLASSERT( device.m_size <= m_size );
SELECT_DEVICEDATA1( device.m_device, copy( m_ptr, &device, device.m_size ) );
return *this;
}
#undef SELECT_DEVICEDATA
};

View File

@@ -0,0 +1,27 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
//ADL_ENABLE_CL and ADL_ENABLE_DX11 can be set in the build system using C/C++ preprocessor defines
//#define ADL_ENABLE_CL
//#define ADL_ENABLE_DX11
//#define ADL_CL_FORCE_UNCACHE_KERNEL
#define ADL_CL_DUMP_MEMORY_LOG
//load the kernels from string instead of loading them from file
#define ADL_LOAD_KERNEL_FROM_STRING
#define ADL_DUMP_DX11_ERROR

View File

@@ -0,0 +1,80 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_ERROR_H
#define ADL_ERROR_H
#if defined(ADL_DUMP_DX11_ERROR)
#include <windows.h>
#endif
#ifdef _DEBUG
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#endif
namespace adl
{
#ifdef _DEBUG
#define ADLASSERT(x) if(!(x)){__debugbreak(); }
#else
#define ADLASSERT(x) if(x){}
#endif
#ifdef _DEBUG
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
#else
#define COMPILE_TIME_ASSERT(x)
#endif
#ifdef _DEBUG
__inline
void debugPrintf(const char *fmt, ...)
{
va_list arg;
va_start(arg, fmt);
#if defined(ADL_DUMP_DX11_ERROR)
const int size = 1024*10;
char buf[size];
vsprintf_s( buf, size, fmt, arg );
#ifdef UNICODE
WCHAR wbuf[size];
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
// swprintf_s( wbuf, 256, L"%s", buf );
OutputDebugString( wbuf );
#else
OutputDebugString( buf );
#endif
#else
vprintf(fmt, arg);
#endif
va_end(arg);
}
#else
__inline
void debugPrintf(const char *fmt, ...)
{
}
#endif
};
#endif

View File

@@ -0,0 +1,142 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_KERNEL_H
#define ADL_KERNEL_H
#include <map>
#include <string>
#include <fstream>
namespace adl
{
//==========================
// Kernel
//==========================
struct Kernel
{
DeviceType m_type;
void* m_kernel;
};
//==========================
// KernelManager
//==========================
class KernelManager
{
public:
typedef std::map<std::string, Kernel*> KMap;
__inline
~KernelManager();
__inline
// static
Kernel* query(const Device* dd, const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL,
bool cacheKernel = true);
public:
KMap m_map;
};
//==========================
// Launcher
//==========================
class Launcher
{
public:
struct BufferInfo
{
BufferInfo(){}
template<typename T>
BufferInfo(Buffer<T>* buff, bool isReadOnly = false): m_buffer(buff), m_isReadOnly(isReadOnly){}
void* m_buffer;
bool m_isReadOnly;
};
__inline
Launcher(const Device* dd, char* fileName, char* funcName, char* option = NULL);
__inline
Launcher(const Device* dd, Kernel* kernel);
__inline
void setBuffers( BufferInfo* buffInfo, int n );
template<typename T>
__inline
void setConst( Buffer<T>& constBuff, const T& consts );
__inline
void launch1D( int numThreads, int localSize = 64 );
__inline
void launch2D( int numThreadsX, int numThreadsY, int localSizeX = 8, int localSizeY = 8 );
public:
enum
{
CONST_BUFFER_SIZE = 512,
};
const Device* m_deviceData;
Kernel* m_kernel;
int m_idx;
int m_idxRw;
};
template<DeviceType TYPE>
class KernelBuilder
{
public:
__inline
KernelBuilder(): m_ptr(0){}
__inline
void setFromFile( const Device* deviceData, const char* fileName, const char* option = NULL, bool addExtension = false,
bool cacheKernel = true);
__inline
void setFromSrc( const Device* deviceData, const char* src, const char* option = NULL );
__inline
void setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option );
__inline
void createKernel( const char* funcName, Kernel& kernelOut );
__inline
~KernelBuilder();
// todo. implemement in kernel destructor?
__inline
static void deleteKernel( Kernel& kernel );
private:
enum
{
MAX_PATH_LENGTH = 260,
};
const Device* m_deviceData;
#ifdef UNICODE
wchar_t m_path[MAX_PATH_LENGTH];
#else
char m_path[MAX_PATH_LENGTH];
#endif
void* m_ptr;
};
};
#endif //ADL_KERNEL_H

View File

@@ -0,0 +1,223 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifdef ADL_ENABLE_CL
#include <Adl/CL/AdlKernelUtilsCL.inl>
#endif
#ifdef ADL_ENABLE_DX11
#include <Adl/DX11/AdlKernelUtilsDX11.inl>
#endif
namespace adl
{
//==========================
// KernelManager
//==========================
Kernel* KernelManager::query(const Device* dd, const char* fileName, const char* funcName, const char* option, const char* src,
bool cacheKernel)
{
printf("compiling kernel %s",funcName);
const int charSize = 1024*2;
KernelManager* s_kManager = this;
char fullFineName[charSize];
switch( dd->m_type )
{
case TYPE_CL:
#if defined(ADL_ENABLE_CL)
sprintf_s(fullFineName,charSize,"%s.cl", fileName);
break;
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
sprintf_s(fullFineName,charSize,"%s.hlsl", fileName);
break;
#endif
default:
ADLASSERT(0);
break;
};
char mapName[charSize];
{
if( option )
sprintf_s(mapName, charSize, "%d%s%s%s", (int)dd->getContext(), fullFineName, funcName, option);
else
sprintf_s(mapName, charSize, "%d%s%s", (int)dd->getContext(), fullFineName, funcName);
}
std::string str(mapName);
KMap::iterator iter = s_kManager->m_map.find( str );
Kernel* kernelOut;
if( iter == s_kManager->m_map.end() )
{
kernelOut = new Kernel();
switch( dd->m_type )
{
#if defined(ADL_ENABLE_CL)
case TYPE_CL:
{
KernelBuilder<TYPE_CL> builder;
if( src )
if (cacheKernel)
{
builder.setFromSrcCached( dd, src, fileName, option );
} else
{
builder.setFromSrc( dd, src, option );
}
else
builder.setFromFile( dd, fileName, option, true, cacheKernel );
builder.createKernel( funcName, *kernelOut );
}
break;
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
{
KernelBuilder<TYPE_DX11> builder;
if( src )
builder.setFromSrc( dd, src, option );
else
builder.setFromFile( dd, fileName, option, true, cacheKernel );
builder.createKernel( funcName, *kernelOut );
}
break;
#endif
default:
ADLASSERT(0);
break;
};
s_kManager->m_map.insert( KMap::value_type(str,kernelOut) );
}
else
{
kernelOut = iter->second;
}
printf(" ready\n");
return kernelOut;
}
KernelManager::~KernelManager()
{
for(KMap::iterator iter = m_map.begin(); iter != m_map.end(); iter++)
{
Kernel* k = iter->second;
switch( k->m_type )
{
#if defined(ADL_ENABLE_CL)
case TYPE_CL:
KernelBuilder<TYPE_CL>::deleteKernel( *k );
delete k;
break;
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
KernelBuilder<TYPE_DX11>::deleteKernel( *k );
delete k;
break;
#endif
default:
ADLASSERT(0);
break;
};
}
}
//==========================
// Launcher
//==========================
#if defined(ADL_ENABLE_DX11)
#if defined(ADL_ENABLE_CL)
#define SELECT_LAUNCHER( type, func ) \
switch( type ) \
{ \
case TYPE_CL: LauncherCL::func; break; \
case TYPE_DX11: LauncherDX11::func; break; \
default: ADLASSERT(0); break; \
};
#else
#define SELECT_LAUNCHER( type, func ) \
switch( type ) \
{ \
case TYPE_DX11: LauncherDX11::func; break; \
default: ADLASSERT(0); break; \
};
#endif
#else
#if defined(ADL_ENABLE_CL)
#define SELECT_LAUNCHER( type, func ) \
switch( type ) \
{ \
case TYPE_CL: LauncherCL::func; break; \
default: ADLASSERT(0); break; \
};
#else
#define SELECT_LAUNCHER( type, func ) \
switch( type ) \
{ \
default: ADLASSERT(0); break; \
};
#endif
#endif
Launcher::Launcher(const Device *dd, char *fileName, char *funcName, char *option)
{
m_kernel = dd->getKernel( fileName, funcName, option );
m_deviceData = dd;
m_idx = 0;
m_idxRw = 0;
}
Launcher::Launcher(const Device* dd, Kernel* kernel)
{
m_kernel = kernel;
m_deviceData = dd;
m_idx = 0;
m_idxRw = 0;
}
void Launcher::setBuffers( BufferInfo* buffInfo, int n )
{
SELECT_LAUNCHER( m_deviceData->m_type, setBuffers( this, buffInfo, n ) );
}
template<typename T>
void Launcher::setConst( Buffer<T>& constBuff, const T& consts )
{
SELECT_LAUNCHER( m_deviceData->m_type, setConst( this, constBuff, consts ) );
}
void Launcher::launch1D( int numThreads, int localSize )
{
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreads, 1, localSize, 1 ) );
}
void Launcher::launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
{
SELECT_LAUNCHER( m_deviceData->m_type, launch2D( this, numThreadsX, numThreadsY, localSizeX, localSizeY ) );
}
#undef SELECT_LAUNCHER
};

View File

@@ -0,0 +1,81 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#include <windows.h>
namespace adl
{
struct StopwatchBase
{
__inline
StopwatchBase(): m_device(0){}
__inline
StopwatchBase( const Device* deviceData ){ init(deviceData); }
__inline
virtual ~StopwatchBase(){}
__inline
virtual void init( const Device* deviceData ) = 0;
__inline
virtual void start() = 0;
__inline
virtual void split() = 0;
__inline
virtual void stop() = 0;
__inline
virtual float getMs(int index=0) = 0;
__inline
virtual void getMs( float* times, int capacity ) = 0;
__inline
int getNIntervals() const{ return m_idx-1;}
enum
{
CAPACITY = 64,
};
const Device* m_device;
int m_idx;
};
struct Stopwatch
{
__inline
Stopwatch( const Device* deviceData = NULL ) { m_impl=0; if(deviceData) init(deviceData);}
__inline
~Stopwatch();
__inline
void init( const Device* deviceData );
__inline
void start(){if(!m_impl) init(0); m_impl->start();}
__inline
void split(){m_impl->split();}
__inline
void stop(){m_impl->stop();}
__inline
float getMs(){ return m_impl->getMs();}
__inline
void getMs( float* times, int capacity ){m_impl->getMs(times, capacity);}
__inline
int getNIntervals() const{return m_impl->getNIntervals();}
StopwatchBase* m_impl;
};
};

View File

@@ -0,0 +1,59 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
void Stopwatch::init( const Device* deviceData )
{
ADLASSERT( m_impl == 0 );
if( deviceData )
{
switch( deviceData->m_type )
{
#if defined(ADL_ENABLE_CL)
case TYPE_CL:
m_impl = new StopwatchHost;//StopwatchCL
break;
#endif
#if defined(ADL_ENABLE_DX11)
case TYPE_DX11:
m_impl = new StopwatchHost;//StopwatchDX11;
break;
#endif
case TYPE_HOST:
m_impl = new StopwatchHost;
break;
default:
ADLASSERT(0);
break;
};
}
else
{
m_impl = new StopwatchHost;
}
m_impl->init( deviceData );
}
Stopwatch::~Stopwatch()
{
if( m_impl == 0 ) return;
delete m_impl;
}
};

View File

@@ -0,0 +1,384 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma comment(lib,"OpenCL.lib")
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <CL/cl_platform.h>
namespace adl
{
struct DeviceCL : public Device
{
typedef DeviceUtils::Config Config;
__inline
DeviceCL() : Device( TYPE_CL ), m_kernelManager(0){}
__inline
void* getContext() const { return m_context; }
__inline
void initialize(const Config& cfg);
__inline
void release();
template<typename T>
__inline
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
template<typename T>
__inline
void deallocate(Buffer<T>* buf);
template<typename T>
__inline
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems = 0,int dstOffsetNElems = 0);
template<typename T>
__inline
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
template<typename T>
__inline
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
__inline
void waitForCompletion() const;
__inline
void getDeviceName( char nameOut[128] ) const;
__inline
static
int getNDevices();
__inline
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
enum
{
MAX_NUM_DEVICES = 6,
};
cl_context m_context;
cl_command_queue m_commandQueue;
cl_device_id m_deviceIdx;
KernelManager* m_kernelManager;
};
//===
//===
void DeviceCL::initialize(const Config& cfg)
{
// DeviceUtils::create( cfg, (DeviceCL*)this );
{
// dd = new DeviceCL();
DeviceCL* deviceData = (DeviceCL*)this;
// cl_device_type deviceType = (driverType == DRIVER_HARDWARE)? CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU;
cl_device_type deviceType = (cfg.m_type== Config::DEVICE_GPU)? CL_DEVICE_TYPE_GPU: CL_DEVICE_TYPE_CPU;
// int numContextQueuePairsToCreate = 1;
bool enableProfiling = false;
#ifdef _DEBUG
enableProfiling = true;
#endif
cl_int status;
cl_platform_id platform;
{
cl_uint nPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &nPlatforms);
ADLASSERT( status == CL_SUCCESS );
cl_platform_id pIdx[5];
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
ADLASSERT( status == CL_SUCCESS );
cl_uint atiIdx = -1;
cl_uint intelIdx = -1;
cl_uint nvIdx = -1;
for(cl_uint i=0; i<nPlatforms; i++)
{
char buff[512];
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
ADLASSERT( status == CL_SUCCESS );
//skip the platform if there are no devices available
cl_uint numDevice;
status = clGetDeviceIDs( pIdx[i], deviceType, 0, NULL, &numDevice );
if (numDevice>0)
{
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
if( strcmp( buff, "Intel(R) Corporation" )==0 ) intelIdx = i;
}
}
if( deviceType == CL_DEVICE_TYPE_GPU )
{
switch( cfg.m_vendor )
{
case DeviceUtils::Config::VD_AMD:
if( atiIdx == -1 && nvIdx != -1 ) goto USE_NV_GPU;
USE_AMD_GPU:
ADLASSERT(atiIdx != -1 );
platform = pIdx[atiIdx];
break;
case DeviceUtils::Config::VD_NV:
if( atiIdx != -1 && nvIdx == -1 ) goto USE_AMD_GPU;
USE_NV_GPU:
ADLASSERT(nvIdx != -1 );
platform = pIdx[nvIdx];
break;
default:
ADLASSERT(0);
break;
};
}
else if( deviceType == CL_DEVICE_TYPE_CPU )
{
switch( cfg.m_vendor )
{
case DeviceUtils::Config::VD_AMD:
ADLASSERT(atiIdx != -1 );
platform = pIdx[atiIdx];
break;
case DeviceUtils::Config::VD_INTEL:
ADLASSERT(intelIdx != -1 );
platform = pIdx[intelIdx];
break;
default:
ADLASSERT(0);
break;
};
}
}
cl_uint numDevice;
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
// ADLASSERT( cfg.m_deviceIdx < (int)numDevice );
debugPrintf("CL: %d %s Devices ", numDevice, (deviceType==CL_DEVICE_TYPE_GPU)? "GPU":"CPU");
// numContextQueuePairsToCreate = min( (int)numDevice, numContextQueuePairsToCreate );
// numContextQueuePairsToCreate = ( (int)numDevice < numContextQueuePairsToCreate )? numDevice : numContextQueuePairsToCreate;
cl_device_id deviceIds[ MAX_NUM_DEVICES ];
status = clGetDeviceIDs( platform, deviceType, numDevice, deviceIds, NULL );
ADLASSERT( status == CL_SUCCESS );
{ int i = min( (int)numDevice-1, cfg.m_deviceIdx );
m_deviceIdx = deviceIds[i];
deviceData->m_context = clCreateContext( NULL, 1, &deviceData->m_deviceIdx, NULL, NULL, &status );
ADLASSERT( status == CL_SUCCESS );
char buff[512];
status = clGetDeviceInfo( deviceData->m_deviceIdx, CL_DEVICE_NAME, sizeof(buff), &buff, NULL );
ADLASSERT( status == CL_SUCCESS );
debugPrintf("[%s]\n", buff);
deviceData->m_commandQueue = clCreateCommandQueue( deviceData->m_context, deviceData->m_deviceIdx, (enableProfiling)?CL_QUEUE_PROFILING_ENABLE:NULL, NULL );
ADLASSERT( status == CL_SUCCESS );
// status = clSetCommandQueueProperty( commandQueue, CL_QUEUE_PROFILING_ENABLE, CL_TRUE, 0 );
// CLASSERT( status == CL_SUCCESS );
if(0)
{
cl_bool image_support;
clGetDeviceInfo(deviceData->m_deviceIdx, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
debugPrintf(" CL_DEVICE_IMAGE_SUPPORT : %s\n", image_support?"Yes":"No");
}
}
}
m_kernelManager = new KernelManager;
}
void DeviceCL::release()
{
clReleaseCommandQueue( m_commandQueue );
clReleaseContext( m_context );
if( m_kernelManager ) delete m_kernelManager;
}
template<typename T>
void DeviceCL::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
{
buf->m_device = this;
buf->m_size = nElems;
buf->m_ptr = 0;
if( type == BufferBase::BUFFER_CONST ) return;
#if defined(ADL_CL_DUMP_MEMORY_LOG)
char deviceName[256];
getDeviceName( deviceName );
printf( "adlCLMemoryLog %s : %3.2fMB Allocation: %3.2fKB ", deviceName, m_memoryUsage/1024.f/1024.f, sizeof(T)*nElems/1024.f );
fflush( stdout );
#endif
int sz=sizeof(T)*nElems;
cl_int status = 0;
if( type == BufferBase::BUFFER_ZERO_COPY )
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sz, 0, &status );
else if( type == BufferBase::BUFFER_RAW )
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_WRITE_ONLY, sz, 0, &status );
else
buf->m_ptr = (T*)clCreateBuffer( m_context, CL_MEM_READ_WRITE, sz, 0, &status );
m_memoryUsage += buf->m_size*sizeof(T);
#if defined(ADL_CL_DUMP_MEMORY_LOG)
printf( "%s\n", (status==CL_SUCCESS)? "Succeed": "Failed" );
fflush( stdout );
#endif
ADLASSERT( status == CL_SUCCESS );
}
template<typename T>
void DeviceCL::deallocate(Buffer<T>* buf)
{
if( buf->m_ptr )
{
m_memoryUsage -= buf->m_size*sizeof(T);
clReleaseMemObject( (cl_mem)buf->m_ptr );
}
buf->m_device = 0;
buf->m_size = 0;
buf->m_ptr = 0;
}
template<typename T>
void DeviceCL::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems,int srcOffsetNElems,int dstOffsetNElems )
{
if( dst->m_device->m_type == TYPE_CL && src->m_device->m_type == TYPE_CL )
{
cl_int status = 0;
status = clEnqueueCopyBuffer( m_commandQueue, (cl_mem)src->m_ptr, (cl_mem)dst->m_ptr, sizeof(T)*srcOffsetNElems, sizeof(T)*dstOffsetNElems, sizeof(T)*nElems, 0, 0, 0 );
ADLASSERT( status == CL_SUCCESS );
}
else if( src->m_device->m_type == TYPE_HOST )
{
ADLASSERT( dst->getType() == TYPE_CL );
dst->write( src->m_ptr, nElems );
}
else if( dst->m_device->m_type == TYPE_HOST )
{
ADLASSERT( src->getType() == TYPE_CL );
src->read( dst->m_ptr, nElems );
}
else
{
ADLASSERT( 0 );
}
}
template<typename T>
void DeviceCL::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems )
{
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, (cl_mem)src->m_ptr, 0, sizeof(T)*srcOffsetNElems, sizeof(T)*nElems,
dst, 0,0,0 );
ADLASSERT( status == CL_SUCCESS );
}
template<typename T>
void DeviceCL::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems )
{
cl_int status = 0;
int sz=sizeof(T)*nElems;
status = clEnqueueWriteBuffer( m_commandQueue, (cl_mem)dst->m_ptr, 0, sizeof(T)*dstOffsetNElems, sz,
src, 0,0,0 );
ADLASSERT( status == CL_SUCCESS );
}
void DeviceCL::waitForCompletion() const
{
clFinish( m_commandQueue );
}
int DeviceCL::getNDevices()
{
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
cl_int status;
cl_platform_id platform;
{
cl_uint nPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &nPlatforms);
ADLASSERT( status == CL_SUCCESS );
cl_platform_id pIdx[5];
status = clGetPlatformIDs(nPlatforms, pIdx, NULL);
ADLASSERT( status == CL_SUCCESS );
cl_uint nvIdx = -1;
cl_uint atiIdx = -1;
for(cl_uint i=0; i<nPlatforms; i++)
{
char buff[512];
status = clGetPlatformInfo( pIdx[i], CL_PLATFORM_VENDOR, 512, buff, 0 );
ADLASSERT( status == CL_SUCCESS );
if( strcmp( buff, "NVIDIA Corporation" )==0 ) nvIdx = i;
if( strcmp( buff, "Advanced Micro Devices, Inc." )==0 ) atiIdx = i;
}
if( deviceType == CL_DEVICE_TYPE_GPU )
{
if( nvIdx != -1 ) platform = pIdx[nvIdx];
else platform = pIdx[atiIdx];
}
else if( deviceType == CL_DEVICE_TYPE_CPU )
{
platform = pIdx[atiIdx];
}
}
cl_uint numDevice;
status = clGetDeviceIDs( platform, deviceType, 0, NULL, &numDevice );
ADLASSERT( status == CL_SUCCESS );
return numDevice;
}
void DeviceCL::getDeviceName( char nameOut[128] ) const
{
cl_int status;
status = clGetDeviceInfo( m_deviceIdx, CL_DEVICE_NAME, sizeof(char)*128, nameOut, NULL );
ADLASSERT( status == CL_SUCCESS );
}
Kernel* DeviceCL::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel )const
{
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
}
};

View File

@@ -0,0 +1,541 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
struct KernelCL : public Kernel
{
cl_kernel& getKernel() { return (cl_kernel&)m_kernel; }
};
static const char* strip(const char* name, const char* pattern)
{
size_t const patlen = strlen(pattern);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
{
patcnt++;
}
return oriptr;
}
static bool isFileUpToDate(const char* binaryFileName,const char* srcFileName)
{
bool fileUpToDate = false;
bool binaryFileValid=false;
FILETIME modtimeBinary;
int nameLength = (int)strlen(binaryFileName)+1;
#ifdef UNICODE
WCHAR* fName = new WCHAR[nameLength];
MultiByteToWideChar(CP_ACP,0,binaryFileName,-1, fName, nameLength);
HANDLE binaryFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
delete [] fName;
#else
HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
#endif
if (binaryFileHandle ==INVALID_HANDLE_VALUE)
{
DWORD errorCode;
errorCode = GetLastError();
switch (errorCode)
{
case ERROR_FILE_NOT_FOUND:
{
debugPrintf("\nCached file not found %s\n", binaryFileName);
break;
}
case ERROR_PATH_NOT_FOUND:
{
debugPrintf("\nCached file path not found %s\n", binaryFileName);
break;
}
default:
{
debugPrintf("\nFailed reading cached file with errorCode = %d\n", errorCode);
}
}
} else
{
if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0)
{
DWORD errorCode;
errorCode = GetLastError();
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
} else
{
binaryFileValid = true;
}
CloseHandle(binaryFileHandle);
}
if (binaryFileValid)
{
#ifdef UNICODE
int nameLength = (int)strlen(srcFileName)+1;
WCHAR* fName = new WCHAR[nameLength];
MultiByteToWideChar(CP_ACP,0,srcFileName,-1, fName, nameLength);
HANDLE srcFileHandle = CreateFile(fName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
delete [] fName;
#else
HANDLE srcFileHandle = CreateFile(srcFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
#endif
if (srcFileHandle!=INVALID_HANDLE_VALUE)
{
FILETIME modtimeSrc;
if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0)
{
DWORD errorCode;
errorCode = GetLastError();
debugPrintf("\nGetFileTime errorCode = %d\n", errorCode);
}
if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime)
||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
{
fileUpToDate=true;
} else
{
debugPrintf("\nCached binary file found (%s), but out-of-date\n",binaryFileName);
}
CloseHandle(srcFileHandle);
}
else
{
#ifdef _DEBUG
DWORD errorCode;
errorCode = GetLastError();
switch (errorCode)
{
case ERROR_FILE_NOT_FOUND:
{
debugPrintf("\nSrc file not found %s\n", srcFileName);
break;
}
case ERROR_PATH_NOT_FOUND:
{
debugPrintf("\nSrc path not found %s\n", srcFileName);
break;
}
default:
{
debugPrintf("\nnSrc file reading errorCode = %d\n", errorCode);
}
}
ADLASSERT(0);
#else
//if we cannot find the source, assume it is OK in release builds
fileUpToDate = true;
#endif
}
}
return fileUpToDate;
}
template<>
void KernelBuilder<TYPE_CL>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
bool cacheKernel)
{
m_deviceData = deviceData;
char fileNameWithExtension[256];
if( addExtension )
sprintf_s( fileNameWithExtension, "%s.cl", fileName );
else
sprintf_s( fileNameWithExtension, "%s", fileName );
class File
{
public:
__inline
bool open(const char* fileNameWithExtension)
{
size_t size;
char* str;
// Open file stream
std::fstream f(fileNameWithExtension, (std::fstream::in | std::fstream::binary));
// Check if we have opened file stream
if (f.is_open()) {
size_t sizeFile;
// Find the stream size
f.seekg(0, std::fstream::end);
size = sizeFile = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
if (!str) {
f.close();
return NULL;
}
// Read file
f.read(str, sizeFile);
f.close();
str[size] = '\0';
m_source = str;
delete[] str;
return true;
}
return false;
}
const std::string& getSource() const {return m_source;}
private:
std::string m_source;
};
cl_program& program = (cl_program&)m_ptr;
cl_int status = 0;
bool cacheBinary = cacheKernel;
#if defined(ADL_CL_FORCE_UNCACHE_KERNEL)
cacheBinary = false;
#endif
char binaryFileName[512];
{
char deviceName[256];
deviceData->getDeviceName(deviceName);
char driverVersion[256];
const DeviceCL* dd = (const DeviceCL*) deviceData;
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
const char* strippedFileName = strip(fileName,"\\");
strippedFileName = strip(strippedFileName,"/");
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
}
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
if( cacheBinary && upToDate)
{
FILE* file = fopen(binaryFileName, "rb");
if( file )
{
fseek( file, 0L, SEEK_END );
size_t binarySize = ftell( file );
rewind( file );
char* binary = new char[binarySize];
fread( binary, sizeof(char), binarySize, file );
fclose( file );
if (binarySize)
{
const DeviceCL* dd = (const DeviceCL*) deviceData;
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
ADLASSERT( status == CL_SUCCESS );
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
ADLASSERT( status == CL_SUCCESS );
if( status != CL_SUCCESS )
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
debugPrintf("%s\n", build_log);
delete build_log;
ADLASSERT(0);
}
}
}
}
if( !m_ptr )
{
File kernelFile;
ADLASSERT( kernelFile.open( fileNameWithExtension ) );
const char* source = kernelFile.getSource().c_str();
setFromSrc( m_deviceData, source, option );
if( cacheBinary )
{ // write to binary
size_t binarySize;
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
ADLASSERT( status == CL_SUCCESS );
char* binary = new char[binarySize];
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
ADLASSERT( status == CL_SUCCESS );
{
FILE* file = fopen(binaryFileName, "wb");
if (file)
{
fwrite( binary, sizeof(char), binarySize, file );
fclose( file );
}
}
delete [] binary;
}
}
}
template<>
void KernelBuilder<TYPE_CL>::setFromSrcCached( const Device* deviceData, const char* src, const char* fileName, const char* option )
{
m_deviceData = deviceData;
bool cacheBinary = true;
cl_program& program = (cl_program&)m_ptr;
cl_int status = 0;
char binaryFileName[512];
{
char deviceName[256];
deviceData->getDeviceName(deviceName);
char driverVersion[256];
const DeviceCL* dd = (const DeviceCL*) deviceData;
clGetDeviceInfo(dd->m_deviceIdx, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
const char* strippedFileName = strip(fileName,"\\");
strippedFileName = strip(strippedFileName,"/");
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedFileName, deviceName,driverVersion );
}
char fileNameWithExtension[256];
sprintf_s(fileNameWithExtension,"%s.cl",fileName, ".cl");
bool upToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
if( cacheBinary )
{
bool fileUpToDate = isFileUpToDate(binaryFileName,fileNameWithExtension);
if( fileUpToDate)
{
FILE* file = fopen(binaryFileName, "rb");
if (file)
{
fseek( file, 0L, SEEK_END );
size_t binarySize = ftell( file );
rewind( file );
char* binary = new char[binarySize];
fread( binary, sizeof(char), binarySize, file );
fclose( file );
const DeviceCL* dd = (const DeviceCL*) deviceData;
program = clCreateProgramWithBinary( dd->m_context, 1, &dd->m_deviceIdx, &binarySize, (const unsigned char**)&binary, 0, &status );
ADLASSERT( status == CL_SUCCESS );
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, 0, 0 );
ADLASSERT( status == CL_SUCCESS );
if( status != CL_SUCCESS )
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
debugPrintf("%s\n", build_log);
delete build_log;
ADLASSERT(0);
}
delete[] binary;
}
}
}
if( !m_ptr )
{
setFromSrc( deviceData, src, option );
if( cacheBinary )
{ // write to binary
cl_uint numAssociatedDevices;
status = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
ADLASSERT( status == CL_SUCCESS );
if (numAssociatedDevices==1)
{
size_t binarySize;
status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
ADLASSERT( status == CL_SUCCESS );
char* binary = new char[binarySize];
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
ADLASSERT( status == CL_SUCCESS );
{
FILE* file = fopen(binaryFileName, "wb");
if (file)
{
fwrite( binary, sizeof(char), binarySize, file );
fclose( file );
}
}
delete [] binary;
}
}
}
}
template<>
void KernelBuilder<TYPE_CL>::setFromSrc( const Device* deviceData, const char* src, const char* option )
{
ADLASSERT( deviceData->m_type == TYPE_CL );
m_deviceData = deviceData;
const DeviceCL* dd = (const DeviceCL*) deviceData;
cl_program& program = (cl_program&)m_ptr;
cl_int status = 0;
size_t srcSize[] = {strlen( src )};
program = clCreateProgramWithSource( dd->m_context, 1, &src, srcSize, &status );
ADLASSERT( status == CL_SUCCESS );
status = clBuildProgram( program, 1, &dd->m_deviceIdx, option, NULL, NULL );
if( status != CL_SUCCESS )
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(program, dd->m_deviceIdx, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
debugPrintf("%s\n", build_log);
printf("%s\n", build_log);
ADLASSERT(0);
delete build_log;
}
}
template<>
KernelBuilder<TYPE_CL>::~KernelBuilder()
{
cl_program program = (cl_program)m_ptr;
clReleaseProgram( program );
}
template<>
void KernelBuilder<TYPE_CL>::createKernel( const char* funcName, Kernel& kernelOut )
{
KernelCL* clKernel = (KernelCL*)&kernelOut;
cl_program program = (cl_program)m_ptr;
cl_int status = 0;
clKernel->getKernel() = clCreateKernel(program, funcName, &status );
ADLASSERT( status == CL_SUCCESS );
kernelOut.m_type = TYPE_CL;
}
template<>
void KernelBuilder<TYPE_CL>::deleteKernel( Kernel& kernel )
{
KernelCL* clKernel = (KernelCL*)&kernel;
clReleaseKernel( clKernel->getKernel() );
}
class LauncherCL
{
public:
typedef Launcher::BufferInfo BufferInfo;
__inline
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
template<typename T>
__inline
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
__inline
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
};
void LauncherCL::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
{
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
for(int i=0; i<n; i++)
{
Buffer<int>* buff = (Buffer<int>*)buffInfo[i].m_buffer;
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sizeof(cl_mem), &buff->m_ptr );
ADLASSERT( status == CL_SUCCESS );
}
}
template<typename T>
void LauncherCL::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
{
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
int sz=sizeof(T);
cl_int status = clSetKernelArg( clKernel->getKernel(), launcher->m_idx++, sz, &consts );
ADLASSERT( status == CL_SUCCESS );
}
void LauncherCL::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
{
KernelCL* clKernel = (KernelCL*)launcher->m_kernel;
const DeviceCL* ddcl = (const DeviceCL*)launcher->m_deviceData;
size_t gRange[3] = {1,1,1};
size_t lRange[3] = {1,1,1};
lRange[0] = localSizeX;
lRange[1] = localSizeY;
gRange[0] = max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
gRange[0] *= lRange[0];
gRange[1] = max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
gRange[1] *= lRange[1];
cl_int status = clEnqueueNDRangeKernel( ddcl->m_commandQueue,
clKernel->getKernel(), 2, NULL, gRange, lRange, 0,0,0 );
ADLASSERT( status == CL_SUCCESS );
}
};

View File

@@ -0,0 +1,512 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#include <windows.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dcompiler.h>
#include <DXGI.h>
#pragma comment(lib,"d3dx11.lib")
#pragma comment(lib,"d3d11.lib")
#pragma comment(lib,"DXGI.lib")
namespace adl
{
#define u32 unsigned int
struct DeviceDX11 : public Device
{
typedef DeviceUtils::Config Config;
__inline
DeviceDX11() : Device( TYPE_DX11 ), m_kernelManager(0){}
__inline
void* getContext() const { return m_context; }
__inline
void initialize(const Config& cfg);
__inline
void release();
template<typename T>
__inline
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
template<typename T>
__inline
void deallocate(Buffer<T>* buf);
template<typename T>
__inline
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
template<typename T>
__inline
void copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems = 0);
template<typename T>
__inline
void copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems = 0);
__inline
void waitForCompletion() const;
__inline
void getDeviceName( char nameOut[128] ) const;
__inline
static
int getNDevices();
__inline
Kernel* getKernel(const char* fileName, const char* funcName, const char* option = NULL, const char* src = NULL, bool cacheKernel = true )const;
ID3D11DeviceContext* m_context;
ID3D11Device* m_device;
IDXGISwapChain* m_swapChain;
KernelManager* m_kernelManager;
};
template<typename T>
struct BufferDX11 : public Buffer<T>
{
ID3D11Buffer* getBuffer() { return (ID3D11Buffer*)m_ptr; }
ID3D11UnorderedAccessView* getUAV() { return (ID3D11UnorderedAccessView*)m_uav; }
ID3D11ShaderResourceView* getSRV() { return (ID3D11ShaderResourceView*)m_srv; }
ID3D11Buffer** getBufferPtr() { return (ID3D11Buffer**)&m_ptr; }
ID3D11UnorderedAccessView** getUAVPtr() { return (ID3D11UnorderedAccessView**)&m_uav; }
ID3D11ShaderResourceView** getSRVPtr() { return (ID3D11ShaderResourceView**)&m_srv; }
};
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
void DeviceDX11::initialize(const Config& cfg)
{
DeviceDX11* deviceData = this;
HRESULT hr = S_OK;
UINT createDeviceFlg = 0;
#ifdef _DEBUG
createDeviceFlg |= D3D11_CREATE_DEVICE_DEBUG;
#endif
D3D_FEATURE_LEVEL fl[] = {
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0
};
typedef HRESULT (WINAPI * LPD3D11CREATEDEVICE)( IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, u32, D3D_FEATURE_LEVEL*, UINT, u32, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext** );
HMODULE moduleD3D11 = 0;
#ifdef UNICODE
moduleD3D11 = LoadLibrary( L"d3d11.dll" );
#else
moduleD3D11 = LoadLibrary( "d3d11.dll" );
#endif
ADLASSERT( moduleD3D11 );
LPD3D11CREATEDEVICE _DynamicD3D11CreateDevice;
_DynamicD3D11CreateDevice = ( LPD3D11CREATEDEVICE )GetProcAddress( moduleD3D11, "D3D11CreateDevice" );
D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE;
// http://msdn.microsoft.com/en-us/library/ff476082(v=VS.85).aspx
// If you set the pAdapter parameter to a non-NULL value, you must also set the DriverType parameter to the D3D_DRIVER_TYPE_UNKNOWN value. If you set the pAdapter parameter to a non-NULL value and the DriverType parameter to the D3D_DRIVER_TYPE_HARDWARE value, D3D11CreateDevice returns an HRESULT of E_INVALIDARG.
type = D3D_DRIVER_TYPE_UNKNOWN;
/*
// Create a hardware Direct3D 11 device
hr = _DynamicD3D11CreateDevice( NULL,
type, NULL, createDeviceFlg,
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
*/
IDXGIAdapter* adapter = NULL;
{// get adapter of the index
IDXGIFactory* factory = NULL;
int targetAdapterIdx = cfg.m_deviceIdx;//min( cfg.m_deviceIdx, getNDevices()-1 );
CreateDXGIFactory( __uuidof(IDXGIFactory), (void**)&factory );
u32 i = 0;
while( factory->EnumAdapters( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
{
if( i== targetAdapterIdx ) break;
i++;
}
factory->Release();
}
// Create a hardware Direct3D 11 device
hr = D3D11CreateDevice( adapter,
type,
NULL, createDeviceFlg,
fl, _countof(fl), D3D11_SDK_VERSION, &deviceData->m_device, NULL, &deviceData->m_context );
ADLASSERT( hr == S_OK );
// Check if the hardware device supports Compute Shader 4.0
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
deviceData->m_device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts));
if( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
{
SAFE_RELEASE( deviceData->m_context );
SAFE_RELEASE( deviceData->m_device );
debugPrintf("DX11 GPU is not present\n");
ADLASSERT( 0 );
}
m_kernelManager = new KernelManager;
}
void DeviceDX11::release()
{
SAFE_RELEASE( m_context );
SAFE_RELEASE( m_device );
if( m_kernelManager ) delete m_kernelManager;
}
template<typename T>
void DeviceDX11::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
{
ADLASSERT( type != BufferBase::BUFFER_ZERO_COPY );
DeviceDX11* deviceData = this;
buf->m_device = deviceData;
buf->m_size = nElems;
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
// if( type & BufferBase::BUFFER )
{
HRESULT hr = S_OK;
if( type == BufferBase::BUFFER_CONST )
{
ADLASSERT( nElems == 1 );
D3D11_BUFFER_DESC constant_buffer_desc;
ZeroMemory( &constant_buffer_desc, sizeof(constant_buffer_desc) );
// constant_buffer_desc.ByteWidth = NEXTMULTIPLEOF( sizeof(T), 16 );
constant_buffer_desc.ByteWidth = (((sizeof(T))/(16) + (((sizeof(T))%(16)==0)?0:1))*(16));
// constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
// constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
// constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
constant_buffer_desc.Usage = D3D11_USAGE_DEFAULT;
constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constant_buffer_desc.CPUAccessFlags = 0;
hr = deviceData->m_device->CreateBuffer( &constant_buffer_desc, NULL, dBuf->getBufferPtr() );
ADLASSERT( hr == S_OK );
return;
}
D3D11_BUFFER_DESC buffer_desc;
ZeroMemory(&buffer_desc, sizeof(buffer_desc));
buffer_desc.ByteWidth = nElems * sizeof(T);
if( type != BufferBase::BUFFER_RAW )
{
buffer_desc.StructureByteStride = sizeof(T);
// buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
}
if( type == BufferBase::BUFFER_STAGING )
{
buffer_desc.Usage = D3D11_USAGE_STAGING;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
}
else if( type == BufferBase::BUFFER_INDEX )
{
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
}
else if( type == BufferBase::BUFFER_VERTEX )
{
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
}
else
{
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
buffer_desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
// check this
if(type == BufferBase::BUFFER_RAW)
{
// buffer_desc.BindFlags |= D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER;
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS | D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; // need this to be used for DispatchIndirect
}
}
hr = deviceData->m_device->CreateBuffer(&buffer_desc, NULL, dBuf->getBufferPtr());
ADLASSERT( hr == S_OK );
if( type == BufferBase::BUFFER_INDEX ) return;
if( type == BufferBase::BUFFER ||
type == BufferBase::BUFFER_RAW ||
type == BufferBase::BUFFER_W_COUNTER )
{
// Create UAVs for all CS buffers
D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
if( type == BufferBase::BUFFER_RAW )
{
uavbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
uavbuffer_desc.Buffer.NumElements = buffer_desc.ByteWidth / 4;
}
else
{
uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
uavbuffer_desc.Buffer.NumElements = nElems;
}
if( type == BufferBase::BUFFER_W_COUNTER )
{
uavbuffer_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
}
hr = deviceData->m_device->CreateUnorderedAccessView(dBuf->getBuffer(), &uavbuffer_desc, dBuf->getUAVPtr());
ADLASSERT( hr == S_OK );
// Create SRVs for all CS buffers
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
if( type == BufferBase::BUFFER_RAW )
{
ADLASSERT( sizeof(T) <= 16 );
srvbuffer_desc.Format = DXGI_FORMAT_R32_UINT;
srvbuffer_desc.Buffer.ElementWidth = nElems;
// if ( buffer_desc.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS )
// {
// srvbuffer_desc.Format = DXGI_FORMAT_R32_TYPELESS;
// srvbuffer_desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
// srvbuffer_desc.BufferEx.NumElements = buffer_desc.ByteWidth / 4;
}
else
{
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
srvbuffer_desc.Buffer.ElementWidth = nElems;
}
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
hr = deviceData->m_device->CreateShaderResourceView(dBuf->getBuffer(), &srvbuffer_desc, dBuf->getSRVPtr());
ADLASSERT( hr == S_OK );
}
else if( type == BufferBase::BUFFER_APPEND )
{
D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
ZeroMemory( &desc, sizeof(desc) );
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = 0;
desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_APPEND;
desc.Format = DXGI_FORMAT_UNKNOWN; // Format must be must be DXGI_FORMAT_UNKNOWN, when creating a View of a Structured Buffer
desc.Buffer.NumElements = buffer_desc.ByteWidth / buffer_desc.StructureByteStride;
hr = deviceData->m_device->CreateUnorderedAccessView( dBuf->getBuffer(), &desc, dBuf->getUAVPtr() );
ADLASSERT( hr == S_OK );
}
}
// else
// {
// ADLASSERT(0);
// }
}
template<typename T>
void DeviceDX11::deallocate(Buffer<T>* buf)
{
BufferDX11<T>* dBuf = (BufferDX11<T>*)buf;
if( dBuf->getBuffer() )
{
dBuf->getBuffer()->Release();
dBuf->m_ptr = NULL;
}
if( dBuf->getUAV() )
{
dBuf->getUAV()->Release();
dBuf->m_uav = NULL;
}
if( dBuf->getSRV() )
{
dBuf->getSRV()->Release();
dBuf->m_srv = NULL;
}
buf->m_device = 0;
}
template<typename T>
void DeviceDX11::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
{
if( dst->m_device->m_type == TYPE_DX11 || src->m_device->m_type == TYPE_DX11 )
{
DeviceDX11* deviceData = this;
BufferDX11<T>* dDst = (BufferDX11<T>*)dst;
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
D3D11_BOX destRegion;
destRegion.left = 0*sizeof(T);
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (0+nElems)*sizeof(T);
deviceData->m_context->CopySubresourceRegion(
dDst->getBuffer(),
0, 0, 0, 0,
dSrc->getBuffer(),
0,
&destRegion );
}
else if( src->m_device->m_type == TYPE_HOST )
{
ADLASSERT( dst->getType() == TYPE_DX11 );
dst->write( src->m_ptr, nElems );
}
else if( dst->m_device->m_type == TYPE_HOST )
{
ADLASSERT( src->getType() == TYPE_DX11 );
src->read( dst->m_ptr, nElems );
}
else
{
ADLASSERT( 0 );
}
}
template<typename T>
void DeviceDX11::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
{
DeviceDX11* deviceData = this;
BufferDX11<T>* dSrc = (BufferDX11<T>*)src;
Buffer<T> sBuf( deviceData, nElems, BufferBase::BUFFER_STAGING );
BufferDX11<T>* dStagingBuf = (BufferDX11<T>*)&sBuf;
ID3D11Buffer *StagingBuffer = dStagingBuf->getBuffer();
D3D11_MAPPED_SUBRESOURCE MappedVelResource = {0};
D3D11_BOX destRegion;
destRegion.left = srcOffsetNElems*sizeof(T);
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (srcOffsetNElems+nElems)*sizeof(T);
deviceData->m_context->CopySubresourceRegion(
StagingBuffer,
0, 0, 0, 0,
dSrc->getBuffer(),
0,
&destRegion);
deviceData->m_context->Map(StagingBuffer, 0, D3D11_MAP_READ, 0, &MappedVelResource);
memcpy(dst, MappedVelResource.pData, nElems*sizeof(T));
deviceData->m_context->Unmap(StagingBuffer, 0);
}
template<typename T>
void DeviceDX11::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
{
BufferDX11<T>* dBuf = (BufferDX11<T>*)dst;
DeviceDX11* deviceData = this;
D3D11_BOX destRegion;
destRegion.left = dstOffsetNElems*sizeof(T);
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (dstOffsetNElems+nElems)*sizeof(T);
deviceData->m_context->UpdateSubresource(dBuf->getBuffer(), 0, &destRegion, src, 0, 0);
}
void DeviceDX11::waitForCompletion() const
{
const DeviceDX11* deviceData = this;
ID3D11Query* syncQuery;
D3D11_QUERY_DESC qDesc;
qDesc.Query = D3D11_QUERY_EVENT;
qDesc.MiscFlags = 0;
deviceData->m_device->CreateQuery( &qDesc, &syncQuery );
deviceData->m_context->End( syncQuery );
while( deviceData->m_context->GetData( syncQuery, 0,0,0 ) == S_FALSE ){}
syncQuery->Release();
}
int DeviceDX11::getNDevices()
{
IDXGIFactory1* factory = NULL;
IDXGIAdapter1* adapter = NULL;
CreateDXGIFactory1( __uuidof(IDXGIFactory1), (void**)&factory );
u32 i = 0;
while( factory->EnumAdapters1( i, &adapter ) != DXGI_ERROR_NOT_FOUND )
{
i++;
}
factory->Release();
return i;
}
void DeviceDX11::getDeviceName( char nameOut[128] ) const
{
IDXGIAdapter* adapter;// = getAdapterFromDevice( this );
{
IDXGIDevice* pDXGIDevice;
ADLASSERT( m_device->QueryInterface(__uuidof(IDXGIDevice), (void **)&pDXGIDevice) == S_OK );
ADLASSERT( pDXGIDevice->GetParent(__uuidof(IDXGIAdapter), (void **)&adapter) == S_OK );
pDXGIDevice->Release();
}
DXGI_ADAPTER_DESC adapterDesc;
adapter->GetDesc( &adapterDesc );
// wcstombs( nameOut, adapterDesc.Description, 128 );
size_t i;
wcstombs_s( &i, nameOut, 128, adapterDesc.Description, 128 );
}
Kernel* DeviceDX11::getKernel(const char* fileName, const char* funcName, const char* option, const char* src, bool cacheKernel ) const
{
return m_kernelManager->query( this, fileName, funcName, option, src, cacheKernel );
}
#undef u32
#undef SAFE_RELEASE
};

View File

@@ -0,0 +1,348 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
struct KernelDX11 : public Kernel
{
ID3D11ComputeShader* getKernel() { return (ID3D11ComputeShader*)m_kernel; }
ID3D11ComputeShader** getKernelPtr() { return (ID3D11ComputeShader**)&m_kernel; }
};
__inline
#ifdef UNICODE
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) WCHAR* strDestPath,
int cchDest,
__in LPCWSTR strFilename )
#else
HRESULT FindDXSDKShaderFileCch( __in_ecount(cchDest) CHAR* strDestPath,
int cchDest,
__in LPCSTR strFilename )
#endif
{
if( NULL == strFilename || strFilename[0] == 0 || NULL == strDestPath || cchDest < 10 )
return E_INVALIDARG;
// Get the exe name, and exe path
#ifdef UNICODE
WCHAR strExePath[MAX_PATH] =
#else
CHAR strExePath[MAX_PATH] =
#endif
{
0
};
#ifdef UNICODE
WCHAR strExeName[MAX_PATH] =
#else
CHAR strExeName[MAX_PATH] =
#endif
{
0
};
#ifdef UNICODE
WCHAR* strLastSlash = NULL;
#else
CHAR* strLastSlash = NULL;
#endif
GetModuleFileName( NULL, strExePath, MAX_PATH );
strExePath[MAX_PATH - 1] = 0;
#ifdef UNICODE
strLastSlash = wcsrchr( strExePath, TEXT( '\\' ) );
#else
strLastSlash = strrchr( strExePath, TEXT( '\\' ) );
#endif
if( strLastSlash )
{
#ifdef UNICODE
wcscpy_s( strExeName, MAX_PATH, &strLastSlash[1] );
#else
#endif
// Chop the exe name from the exe path
*strLastSlash = 0;
// Chop the .exe from the exe name
#ifdef UNICODE
strLastSlash = wcsrchr( strExeName, TEXT( '.' ) );
#else
strLastSlash = strrchr( strExeName, TEXT( '.' ) );
#endif
if( strLastSlash )
*strLastSlash = 0;
}
// Search in directories:
// .\
// %EXE_DIR%\..\..\%EXE_NAME%
#ifdef UNICODE
wcscpy_s( strDestPath, cchDest, strFilename );
#else
strcpy_s( strDestPath, cchDest, strFilename );
#endif
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
return S_OK;
// swprintf_s( strDestPath, cchDest, L"%s\\..\\..\\%s\\%s", strExePath, strExeName, strFilename );
#ifdef UNICODE
swprintf_s( strDestPath, cchDest, L"%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
#else
sprintf_s( strDestPath, cchDest, "%s\\..\\%s\\%s", strExePath, strExeName, strFilename );
#endif
if( GetFileAttributes( strDestPath ) != 0xFFFFFFFF )
return S_OK;
// On failure, return the file as the path but also return an error code
#ifdef UNICODE
wcscpy_s( strDestPath, cchDest, strFilename );
#else
strcpy_s( strDestPath, cchDest, strFilename );
#endif
ADLASSERT( 0 );
return E_FAIL;
}
template<>
void KernelBuilder<TYPE_DX11>::setFromFile( const Device* deviceData, const char* fileName, const char* option, bool addExtension,
bool cacheKernel)
{
char fileNameWithExtension[256];
if( addExtension )
sprintf_s( fileNameWithExtension, "%s.hlsl", fileName );
else
sprintf_s( fileNameWithExtension, "%s", fileName );
m_deviceData = deviceData;
int nameLength = (int)strlen(fileNameWithExtension)+1;
#ifdef UNICODE
WCHAR* wfileNameWithExtension = new WCHAR[nameLength];
#else
CHAR* wfileNameWithExtension = new CHAR[nameLength];
#endif
memset(wfileNameWithExtension,0,nameLength);
#ifdef UNICODE
MultiByteToWideChar(CP_ACP,0,fileNameWithExtension,-1, wfileNameWithExtension, nameLength);
#else
sprintf_s(wfileNameWithExtension, nameLength, "%s", fileNameWithExtension);
#endif
// swprintf_s(wfileNameWithExtension, nameLength*2, L"%s", fileNameWithExtension);
HRESULT hr;
// Finds the correct path for the shader file.
// This is only required for this sample to be run correctly from within the Sample Browser,
// in your own projects, these lines could be removed safely
hr = FindDXSDKShaderFileCch( m_path, MAX_PATH, wfileNameWithExtension );
delete [] wfileNameWithExtension;
ADLASSERT( hr == S_OK );
}
template<>
void KernelBuilder<TYPE_DX11>::setFromSrc( const Device* deviceData, const char* src, const char* option )
{
m_deviceData = deviceData;
m_ptr = (void*)src;
m_path[0] = '0';
}
template<>
KernelBuilder<TYPE_DX11>::~KernelBuilder()
{
}
template<>
void KernelBuilder<TYPE_DX11>::createKernel( const char* funcName, Kernel& kernelOut )
{
const DeviceDX11* deviceData = (const DeviceDX11*)m_deviceData;
KernelDX11* dxKernel = (KernelDX11*)&kernelOut;
HRESULT hr;
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined( DEBUG ) || defined( _DEBUG )
// Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
// Setting this flag improves the shader debugging experience, but still allows
// the shaders to be optimized and to run exactly the way they will run in
// the release configuration of this program.
dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif
const D3D_SHADER_MACRO defines[] =
{
#ifdef USE_STRUCTURED_BUFFERS
"USE_STRUCTURED_BUFFERS", "1",
#endif
#ifdef TEST_DOUBLE
"TEST_DOUBLE", "1",
#endif
NULL, NULL
};
// We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
LPCSTR pProfile = ( deviceData->m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0";
ID3DBlob* pErrorBlob = NULL;
ID3DBlob* pBlob = NULL;
if( m_path[0] == '0' )
{
char* src = (char*)m_ptr;
hr = D3DX11CompileFromMemory( src, strlen(src), 0, defines, NULL, funcName, pProfile,
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
}
else
{
hr = D3DX11CompileFromFile( m_path, defines, NULL, funcName, pProfile,
dwShaderFlags, NULL, NULL, &pBlob, &pErrorBlob, NULL );
}
if ( FAILED(hr) )
{
debugPrintf("%s", (char*)pErrorBlob->GetBufferPointer());
}
ADLASSERT( hr == S_OK );
hr = deviceData->m_device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL,
dxKernel->getKernelPtr() );
#if defined(DEBUG) || defined(PROFILE)
if ( kernelOut.m_kernel )
kernelOut.m_kernel->SetPrivateData( WKPDID_D3DDebugObjectName, lstrlenA(pFunctionName), pFunctionName );
#endif
SAFE_RELEASE( pErrorBlob );
SAFE_RELEASE( pBlob );
kernelOut.m_type = TYPE_DX11;
}
template<>
void KernelBuilder<TYPE_DX11>::deleteKernel( Kernel& kernel )
{
KernelDX11* dxKernel = (KernelDX11*)&kernel;
if( kernel.m_kernel )
{
dxKernel->getKernel()->Release();
kernel.m_kernel = NULL;
}
}
class LauncherDX11
{
public:
typedef Launcher::BufferInfo BufferInfo;
__inline
static void setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n );
template<typename T>
__inline
static void setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts );
__inline
static void launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY );
};
void LauncherDX11::setBuffers( Launcher* launcher, BufferInfo* buffInfo, int n )
{
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
for(int i=0; i<n; i++)
{
BufferDX11<int>* dBuf = (BufferDX11<int>*)buffInfo[i].m_buffer;
if( buffInfo[i].m_isReadOnly )
{
dddx->m_context->CSSetShaderResources( launcher->m_idx++, 1, dBuf->getSRVPtr() );
}
else
{
// todo. cannot initialize append buffer with proper counter value which is the last arg
dddx->m_context->CSSetUnorderedAccessViews( launcher->m_idxRw++, 1, dBuf->getUAVPtr(), 0 );
}
}
}
template<typename T>
void LauncherDX11::setConst( Launcher* launcher, Buffer<T>& constBuff, const T& consts )
{
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
BufferDX11<T>* dBuf = (BufferDX11<T>*)&constBuff;
/*
D3D11_MAPPED_SUBRESOURCE MappedResource;
dddx->m_context->Map( dBuf->getBuffer(), 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
memcpy( MappedResource.pData, &consts, sizeof(T) );
dddx->m_context->Unmap( dBuf->getBuffer(), 0 );
*/
dddx->m_context->UpdateSubresource( dBuf->getBuffer(), 0, NULL, &consts, 0, 0 );
dddx->m_context->CSSetConstantBuffers( 0, 1, dBuf->getBufferPtr() );
}
void LauncherDX11::launch2D( Launcher* launcher, int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
{
KernelDX11* dxKernel = (KernelDX11*)launcher->m_kernel;
const DeviceDX11* dddx = (const DeviceDX11*)launcher->m_deviceData;
dddx->m_context->CSSetShader( dxKernel->getKernel(), NULL, 0 );
int nx, ny, nz;
nx = max( 1, (numThreadsX/localSizeX)+(!(numThreadsX%localSizeX)?0:1) );
ny = max( 1, (numThreadsY/localSizeY)+(!(numThreadsY%localSizeY)?0:1) );
nz = 1;
dddx->m_context->Dispatch( nx, ny, nz );
// set 0 to registers
{
dddx->m_context->CSSetShader( NULL, NULL, 0 );
if( launcher->m_idxRw )
{
ID3D11UnorderedAccessView* aUAViewsNULL[ 16 ] = { 0 };
dddx->m_context->CSSetUnorderedAccessViews( 0,
min( (unsigned int)launcher->m_idxRw, sizeof(aUAViewsNULL)/sizeof(*aUAViewsNULL) ), aUAViewsNULL, NULL );
}
if( launcher->m_idx )
{
ID3D11ShaderResourceView* ppSRVNULL[16] = { 0 };
dddx->m_context->CSSetShaderResources( 0,
min( (unsigned int)launcher->m_idx, sizeof(ppSRVNULL)/sizeof(*ppSRVNULL) ), ppSRVNULL );
}
}
}
#undef SAFE_RELEASE
};

View File

@@ -0,0 +1,131 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
struct StopwatchDX11 : public StopwatchBase
{
public:
__inline
StopwatchDX11() : StopwatchBase(){}
__inline
~StopwatchDX11();
__inline
void init( const Device* deviceData );
__inline
void start();
__inline
void split();
__inline
void stop();
__inline
float getMs(int index=0);
__inline
void getMs( float* times, int capacity );
public:
ID3D11Query* m_tQuery[CAPACITY+1];
ID3D11Query* m_fQuery;
UINT64 m_t[CAPACITY];
};
void StopwatchDX11::init( const Device* deviceData )
{
ADLASSERT( deviceData->m_type == TYPE_DX11 );
m_device = deviceData;
{
D3D11_QUERY_DESC qDesc;
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
qDesc.MiscFlags = 0;
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_fQuery );
}
for(int i=0; i<CAPACITY+1; i++)
{
D3D11_QUERY_DESC qDesc;
qDesc.Query = D3D11_QUERY_TIMESTAMP;
qDesc.MiscFlags = 0;
((const DeviceDX11*)m_device)->m_device->CreateQuery( &qDesc, &m_tQuery[i] );
}
}
StopwatchDX11::~StopwatchDX11()
{
m_fQuery->Release();
for(int i=0; i<CAPACITY+1; i++)
{
m_tQuery[i]->Release();
}
}
void StopwatchDX11::start()
{
m_idx = 0;
((const DeviceDX11*)m_device)->m_context->Begin( m_fQuery );
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
}
void StopwatchDX11::split()
{
if( m_idx < CAPACITY )
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
}
void StopwatchDX11::stop()
{
((const DeviceDX11*)m_device)->m_context->End( m_tQuery[m_idx++] );
((const DeviceDX11*)m_device)->m_context->End( m_fQuery );
}
float StopwatchDX11::getMs(int index)
{
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
// m_deviceData->m_context->End( m_fQuery );
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[0], &m_t[index],sizeof(UINT64),0 ) == S_FALSE ){}
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[1], &m_t[index+1],sizeof(UINT64),0 ) == S_FALSE ){}
ADLASSERT( d.Disjoint == false );
float elapsedMs = (m_t[index+1] - m_t[index])/(float)d.Frequency*1000;
return elapsedMs;
}
void StopwatchDX11::getMs( float* times, int capacity )
{
ADLASSERT( capacity <= CAPACITY );
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT d;
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_fQuery, &d,sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT),0 ) == S_FALSE ) {}
for(int i=0; i<m_idx; i++)
{
while( ((const DeviceDX11*)m_device)->m_context->GetData( m_tQuery[i], &m_t[i],sizeof(UINT64),0 ) == S_FALSE ){}
}
ADLASSERT( d.Disjoint == false );
for(int i=0; i<capacity; i++)
{
times[i] = (m_t[i+1] - m_t[i])/(float)d.Frequency*1000;
}
}
};

View File

@@ -0,0 +1,107 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
namespace adl
{
struct DeviceHost : public Device
{
DeviceHost() : Device( TYPE_HOST ){}
__inline
void initialize(const Config& cfg);
__inline
void release();
template<typename T>
__inline
void allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type);
template<typename T>
__inline
void deallocate(Buffer<T>* buf);
template<typename T>
__inline
void copy(Buffer<T>* dst, const Buffer<T>* src, int nElems);
template<typename T>
__inline
void copy(T* dst, const Buffer<T>* src, int nElems, int offsetNElems = 0);
template<typename T>
__inline
void copy(Buffer<T>* dst, const T* src, int nElems, int offsetNElems = 0);
__inline
void waitForCompletion() const;
};
void DeviceHost::initialize(const Config& cfg)
{
}
void DeviceHost::release()
{
}
template<typename T>
void DeviceHost::allocate(Buffer<T>* buf, int nElems, BufferBase::BufferType type)
{
buf->m_device = this;
if( type == BufferBase::BUFFER_CONST ) return;
buf->m_ptr = new T[nElems];
ADLASSERT( buf->m_ptr );
buf->m_size = nElems;
}
template<typename T>
void DeviceHost::deallocate(Buffer<T>* buf)
{
if( buf->m_ptr ) delete [] buf->m_ptr;
}
template<typename T>
void DeviceHost::copy(Buffer<T>* dst, const Buffer<T>* src, int nElems)
{
copy( dst, src->m_ptr, nElems );
}
template<typename T>
void DeviceHost::copy(T* dst, const Buffer<T>* src, int nElems, int srcOffsetNElems)
{
ADLASSERT( src->getType() == TYPE_HOST );
memcpy( dst, src->m_ptr+srcOffsetNElems, nElems*sizeof(T) );
}
template<typename T>
void DeviceHost::copy(Buffer<T>* dst, const T* src, int nElems, int dstOffsetNElems)
{
ADLASSERT( dst->getType() == TYPE_HOST );
memcpy( dst->m_ptr+dstOffsetNElems, src, nElems*sizeof(T) );
}
void DeviceHost::waitForCompletion() const
{
}
};

View File

@@ -0,0 +1,119 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/time.h>
#endif
namespace adl
{
class StopwatchHost : public StopwatchBase
{
public:
__inline
StopwatchHost();
__inline
void init( const Device* deviceData );
__inline
void start();
__inline
void split();
__inline
void stop();
__inline
float getMs(int index=0);
__inline
void getMs( float* times, int capacity );
private:
#ifdef _WIN32
LARGE_INTEGER m_frequency;
LARGE_INTEGER m_t[CAPACITY];
#else
struct timeval mStartTime;
timeval m_t[CAPACITY];
#endif
};
__inline
StopwatchHost::StopwatchHost()
: StopwatchBase()
{
}
__inline
void StopwatchHost::init( const Device* deviceData )
{
m_device = deviceData;
#ifdef _WIN32
QueryPerformanceFrequency( &m_frequency );
#else
gettimeofday(&mStartTime, 0);
#endif
}
__inline
void StopwatchHost::start()
{
m_idx = 0;
#ifdef _WIN32
QueryPerformanceCounter(&m_t[m_idx++]);
#else
gettimeofday(&m_t[m_idx++], 0);
#endif
}
__inline
void StopwatchHost::split()
{
#ifdef _WIN32
QueryPerformanceCounter(&m_t[m_idx++]);
#else
gettimeofday(&m_t[m_idx++], 0);
#endif
}
__inline
void StopwatchHost::stop()
{
split();
}
__inline
float StopwatchHost::getMs(int index)
{
#ifdef _WIN32
return (float)(1000*(m_t[index+1].QuadPart - m_t[index].QuadPart))/m_frequency.QuadPart;
#else
return (m_t[index+1].tv_sec - m_t[index].tv_sec) * 1000 +
(m_t[index+1].tv_usec - m_t[index].tv_usec) / 1000;
#endif
}
__inline
void StopwatchHost::getMs(float* times, int capacity)
{
for(int i=0; i<capacity; i++) times[i] = 0.f;
for(int i=0; i<min(capacity, m_idx-1); i++)
{
times[i] = getMs(i);
}
}
};