Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Adl/Adl.h>
|
||||
#include <AdlPrimitives/Math/Math.h>
|
||||
#include <AdlPrimitives/Sort/SortData.h>
|
||||
#include <AdlPrimitives/Fill/Fill.h>
|
||||
|
||||
namespace adl
|
||||
{
|
||||
|
||||
class BoundSearchBase
|
||||
{
|
||||
public:
|
||||
enum Option
|
||||
{
|
||||
BOUND_LOWER,
|
||||
BOUND_UPPER,
|
||||
COUNT,
|
||||
};
|
||||
};
|
||||
|
||||
template<DeviceType TYPE>
|
||||
class BoundSearch : public BoundSearchBase
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Device* m_device;
|
||||
Kernel* m_lowerSortDataKernel;
|
||||
Kernel* m_upperSortDataKernel;
|
||||
Kernel* m_subtractKernel;
|
||||
Buffer<int4>* m_constBuffer;
|
||||
Buffer<u32>* m_lower;
|
||||
Buffer<u32>* m_upper;
|
||||
typename Fill<TYPE>::Data* m_fillData;
|
||||
};
|
||||
|
||||
static
|
||||
Data* allocate(const Device* deviceData, int maxSize = 0);
|
||||
|
||||
static
|
||||
void deallocate(Data* data);
|
||||
|
||||
// src has to be src[i].m_key <= src[i+1].m_key
|
||||
static
|
||||
void execute(Data* data, Buffer<SortData>& src, u32 nSrc, Buffer<u32>& dst, u32 nDst, Option option = BOUND_LOWER );
|
||||
|
||||
// static
|
||||
// void execute(Data* data, Buffer<u32>& src, Buffer<u32>& dst, int n, Option option = );
|
||||
};
|
||||
|
||||
#include <AdlPrimitives/Search/BoundSearchHost.inl>
|
||||
#include <AdlPrimitives/Search/BoundSearch.inl>
|
||||
|
||||
};
|
||||
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Search\\BoundSearchKernels"
|
||||
#define KERNEL0 "SearchSortDataLowerKernel"
|
||||
#define KERNEL1 "SearchSortDataUpperKernel"
|
||||
#define KERNEL2 "SubtractKernel"
|
||||
|
||||
#include <AdlPrimitives/Search/BoundSearchKernelsCL.h>
|
||||
#include <AdlPrimitives/Search/BoundSearchKernelsDX11.h>
|
||||
|
||||
template<DeviceType TYPE>
|
||||
typename BoundSearch<TYPE>::Data* BoundSearch<TYPE>::allocate(const Device* device, int maxSize)
|
||||
{
|
||||
ADLASSERT( TYPE == device->m_type );
|
||||
|
||||
const char* src[] =
|
||||
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
|
||||
{boundSearchKernelsCL, boundSearchKernelsDX11};
|
||||
#else
|
||||
{0,0};
|
||||
#endif
|
||||
|
||||
Data* data = new Data;
|
||||
|
||||
data->m_device = device;
|
||||
data->m_lowerSortDataKernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] );
|
||||
data->m_upperSortDataKernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] );
|
||||
data->m_constBuffer = new Buffer<int4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
if( maxSize )
|
||||
{
|
||||
data->m_subtractKernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] );
|
||||
}
|
||||
data->m_lower = (maxSize == 0)? 0: new Buffer<u32>( device, maxSize );
|
||||
data->m_upper = (maxSize == 0)? 0: new Buffer<u32>( device, maxSize );
|
||||
data->m_fillData = (maxSize == 0)? 0: Fill<TYPE>::allocate( device );
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void BoundSearch<TYPE>::deallocate(Data* data)
|
||||
{
|
||||
delete data->m_constBuffer;
|
||||
if( data->m_lower ) delete data->m_lower;
|
||||
if( data->m_upper ) delete data->m_upper;
|
||||
if( data->m_fillData ) Fill<TYPE>::deallocate( data->m_fillData );
|
||||
delete data;
|
||||
}
|
||||
|
||||
template<DeviceType TYPE>
|
||||
void BoundSearch<TYPE>::execute(Data* data, Buffer<SortData>& src, u32 nSrc, Buffer<u32>& dst, u32 nDst, Option option )
|
||||
{
|
||||
int4 constBuffer;
|
||||
constBuffer.x = nSrc;
|
||||
constBuffer.y = nDst;
|
||||
|
||||
Buffer<SortData>* srcNative = BufferUtils::map<TYPE, true>( data->m_device, &src );
|
||||
Buffer<u32>* dstNative = BufferUtils::map<TYPE, false>( data->m_device, &dst );
|
||||
|
||||
if( option == BOUND_LOWER )
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( srcNative, true ), BufferInfo( dstNative ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_lowerSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( nSrc, 64 );
|
||||
}
|
||||
else if( option == BOUND_UPPER )
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( srcNative, true ), BufferInfo( dstNative ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_upperSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( nSrc+1, 64 );
|
||||
}
|
||||
else if( option == COUNT )
|
||||
{
|
||||
ADLASSERT( data->m_lower );
|
||||
ADLASSERT( data->m_upper );
|
||||
ADLASSERT( data->m_lower->getSize() <= (int)nDst );
|
||||
ADLASSERT( data->m_upper->getSize() <= (int)nDst );
|
||||
|
||||
int zero = 0;
|
||||
Fill<TYPE>::execute( data->m_fillData, (Buffer<int>&)*data->m_lower, zero, nDst );
|
||||
Fill<TYPE>::execute( data->m_fillData, (Buffer<int>&)*data->m_upper, zero, nDst );
|
||||
|
||||
execute( data, src, nSrc, *data->m_lower, nDst, BOUND_LOWER );
|
||||
execute( data, src, nSrc, *data->m_upper, nDst, BOUND_UPPER );
|
||||
|
||||
{
|
||||
BufferInfo bInfo[] = { BufferInfo( data->m_upper, true ), BufferInfo( data->m_lower, true ), BufferInfo( dstNative ) };
|
||||
|
||||
Launcher launcher( data->m_device, data->m_subtractKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
|
||||
launcher.setConst( *data->m_constBuffer, constBuffer );
|
||||
launcher.launch1D( nDst, 64 );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
|
||||
BufferUtils::unmap<false>( srcNative, &src );
|
||||
BufferUtils::unmap<true>( dstNative, &dst );
|
||||
}
|
||||
|
||||
|
||||
#undef PATH
|
||||
#undef KERNEL0
|
||||
#undef KERNEL1
|
||||
#undef KERNEL2
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
template<>
|
||||
class BoundSearch<TYPE_HOST> : public BoundSearchBase
|
||||
{
|
||||
public:
|
||||
typedef Launcher::BufferInfo BufferInfo;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Device* m_device;
|
||||
};
|
||||
|
||||
static
|
||||
Data* allocate(const Device* deviceData, int maxSize = 0)
|
||||
{
|
||||
ADLASSERT( deviceData->m_type == TYPE_HOST );
|
||||
Data* data = new Data;
|
||||
data->m_device = deviceData;
|
||||
return data;
|
||||
}
|
||||
|
||||
static
|
||||
void deallocate(Data* data)
|
||||
{
|
||||
delete data;
|
||||
}
|
||||
|
||||
static
|
||||
void execute(Data* data, Buffer<SortData>& rawSrc, u32 nSrc, Buffer<u32>& rawDst, u32 nDst, Option option = BOUND_LOWER)
|
||||
{
|
||||
ADLASSERT( rawSrc.getType() == TYPE_HOST );
|
||||
ADLASSERT( rawDst.getType() == TYPE_HOST );
|
||||
|
||||
HostBuffer<SortData>& src = *(HostBuffer<SortData>*)&rawSrc;
|
||||
HostBuffer<u32>& dst = *(HostBuffer<u32>*)&rawDst;
|
||||
|
||||
for(int i=0; i<nSrc-1; i++)
|
||||
ADLASSERT( src[i].m_key <= src[i+1].m_key );
|
||||
|
||||
if( option == BOUND_LOWER )
|
||||
{
|
||||
for(u32 i=0; i<nSrc; i++)
|
||||
{
|
||||
SortData& iData = (i==0)? SortData(-1,-1): src[i-1];
|
||||
SortData& jData = (i==nSrc)? SortData(nDst, nDst): src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key+1; k<=min(jData.m_key,nDst-1); k++)
|
||||
u32 k = jData.m_key;
|
||||
{
|
||||
dst[k] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( option == BOUND_UPPER )
|
||||
{
|
||||
for(u32 i=0; i<nSrc+1; i++)
|
||||
{
|
||||
SortData& iData = (i==0)? SortData(0,0): src[i-1];
|
||||
SortData& jData = (i==nSrc)? SortData(nDst, nDst): src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key; k<min(jData.m_key,nDst); k++)
|
||||
u32 k = iData.m_key;
|
||||
{
|
||||
dst[k] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( option == COUNT )
|
||||
{
|
||||
HostBuffer<u32> lower( data->m_device, nDst );
|
||||
HostBuffer<u32> upper( data->m_device, nDst );
|
||||
|
||||
for(u32 i=0; i<nDst; i++) { lower[i] = upper[i] = 0; }
|
||||
|
||||
execute( data, rawSrc, nSrc, lower, nDst, BOUND_LOWER );
|
||||
execute( data, rawSrc, nSrc, upper, nDst, BOUND_UPPER );
|
||||
|
||||
for(u32 i=0; i<nDst; i++) { dst[i] = upper[i] - lower[i]; }
|
||||
}
|
||||
else
|
||||
{
|
||||
ADLASSERT( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
// void execute(Data* data, Buffer<u32>& src, Buffer<u32>& dst, int n, Option option = );
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
typedef unsigned int u32;
|
||||
#define GET_GROUP_IDX get_group_id(0)
|
||||
#define GET_LOCAL_IDX get_local_id(0)
|
||||
#define GET_GLOBAL_IDX get_global_id(0)
|
||||
#define GET_GROUP_SIZE get_local_size(0)
|
||||
#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 m_key;
|
||||
u32 m_value;
|
||||
}SortData;
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 m_nSrc;
|
||||
u32 m_nDst;
|
||||
u32 m_padding[2];
|
||||
} ConstBuffer;
|
||||
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(64,1,1)))
|
||||
__kernel
|
||||
void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst,
|
||||
ConstBuffer cb)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
u32 nSrc = cb.m_nSrc;
|
||||
u32 nDst = cb.m_nDst;
|
||||
|
||||
if( gIdx < nSrc )
|
||||
{
|
||||
SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);
|
||||
SortData end; end.m_key = nDst; end.m_value = nDst;
|
||||
|
||||
SortData iData = (gIdx==0)? first: src[gIdx-1];
|
||||
SortData jData = (gIdx==nSrc)? end: src[gIdx];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)
|
||||
u32 k = jData.m_key;
|
||||
{
|
||||
dst[k] = gIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(64,1,1)))
|
||||
__kernel
|
||||
void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst,
|
||||
ConstBuffer cb)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
u32 nSrc = cb.m_nSrc;
|
||||
u32 nDst = cb.m_nDst;
|
||||
|
||||
if( gIdx < nSrc+1 )
|
||||
{
|
||||
SortData first; first.m_key = 0; first.m_value = 0;
|
||||
SortData end; end.m_key = nDst; end.m_value = nDst;
|
||||
|
||||
SortData iData = (gIdx==0)? first: src[gIdx-1];
|
||||
SortData jData = (gIdx==nSrc)? end: src[gIdx];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key; k<min(jData.m_key, nDst); k++)
|
||||
u32 k = iData.m_key;
|
||||
{
|
||||
dst[k] = gIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(64,1,1)))
|
||||
__kernel
|
||||
void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C,
|
||||
ConstBuffer cb)
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
u32 nSrc = cb.m_nSrc;
|
||||
u32 nDst = cb.m_nDst;
|
||||
|
||||
if( gIdx < nDst )
|
||||
{
|
||||
C[gIdx] = A[gIdx] - B[gIdx];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
|
||||
typedef uint u32;
|
||||
|
||||
#define GET_GROUP_IDX groupIdx.x
|
||||
#define GET_LOCAL_IDX localIdx.x
|
||||
#define GET_GLOBAL_IDX globalIdx.x
|
||||
#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()
|
||||
#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID
|
||||
#define AtomInc(x) InterlockedAdd(x, 1)
|
||||
#define AtomInc1(x, out) InterlockedAdd(x, 1, out)
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 m_key;
|
||||
u32 m_value;
|
||||
}SortData;
|
||||
|
||||
|
||||
|
||||
cbuffer SortCB : register( b0 )
|
||||
{
|
||||
u32 m_nSrc;
|
||||
u32 m_nDst;
|
||||
u32 m_padding[2];
|
||||
};
|
||||
|
||||
|
||||
StructuredBuffer<SortData> src : register( t0 );
|
||||
RWStructuredBuffer<u32> dst : register( u0 );
|
||||
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void SearchSortDataLowerKernel( DEFAULT_ARGS )
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
u32 nSrc = m_nSrc;
|
||||
u32 nDst = m_nDst;
|
||||
|
||||
if( gIdx < nSrc )
|
||||
{
|
||||
SortData iData;
|
||||
SortData jData;
|
||||
if( gIdx==0 ) iData.m_key = iData.m_value = (u32)-1;
|
||||
else iData = src[gIdx-1];
|
||||
|
||||
if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;
|
||||
else jData = src[gIdx];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)
|
||||
u32 k = jData.m_key;
|
||||
{
|
||||
dst[k] = gIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(64, 1, 1)]
|
||||
void SearchSortDataUpperKernel( DEFAULT_ARGS )
|
||||
{
|
||||
int gIdx = GET_GLOBAL_IDX;
|
||||
u32 nSrc = m_nSrc;
|
||||
u32 nDst = m_nDst;
|
||||
|
||||
if( gIdx < nSrc+1 )
|
||||
{
|
||||
SortData iData;
|
||||
SortData jData;
|
||||
if( gIdx==0 ) iData.m_key = iData.m_value = 0;
|
||||
else iData = src[gIdx-1];
|
||||
|
||||
if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;
|
||||
else jData = src[gIdx];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
// for(u32 k=iData.m_key; k<min(jData.m_key, nDst); k++)
|
||||
u32 k = iData.m_key;
|
||||
{
|
||||
dst[k] = gIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
static const char* boundSearchKernelsCL= \
|
||||
"/*\n"
|
||||
" 2011 Takahiro Harada\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" u32 m_key; \n"
|
||||
" u32 m_value;\n"
|
||||
"}SortData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" u32 m_nSrc;\n"
|
||||
" u32 m_nDst;\n"
|
||||
" u32 m_padding[2];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n"
|
||||
" ConstBuffer cb)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 nSrc = cb.m_nSrc;\n"
|
||||
" u32 nDst = cb.m_nDst;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc )\n"
|
||||
" {\n"
|
||||
" SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n"
|
||||
" SortData end; end.m_key = nDst; end.m_value = nDst;\n"
|
||||
"\n"
|
||||
" SortData iData = (gIdx==0)? first: src[gIdx-1];\n"
|
||||
" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
"// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n"
|
||||
" u32 k = jData.m_key;\n"
|
||||
" {\n"
|
||||
" dst[k] = gIdx;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n"
|
||||
" ConstBuffer cb)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 nSrc = cb.m_nSrc;\n"
|
||||
" u32 nDst = cb.m_nDst;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc+1 )\n"
|
||||
" {\n"
|
||||
" SortData first; first.m_key = 0; first.m_value = 0;\n"
|
||||
" SortData end; end.m_key = nDst; end.m_value = nDst;\n"
|
||||
"\n"
|
||||
" SortData iData = (gIdx==0)? first: src[gIdx-1];\n"
|
||||
" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
"// for(u32 k=iData.m_key; k<min(jData.m_key, nDst); k++)\n"
|
||||
" u32 k = iData.m_key;\n"
|
||||
" {\n"
|
||||
" dst[k] = gIdx;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__attribute__((reqd_work_group_size(64,1,1)))\n"
|
||||
"__kernel\n"
|
||||
"void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n"
|
||||
" ConstBuffer cb)\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 nSrc = cb.m_nSrc;\n"
|
||||
" u32 nDst = cb.m_nDst;\n"
|
||||
"\n"
|
||||
" if( gIdx < nDst )\n"
|
||||
" {\n"
|
||||
" C[gIdx] = A[gIdx] - B[gIdx];\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
@@ -0,0 +1,94 @@
|
||||
static const char* boundSearchKernelsDX11= \
|
||||
"/*\n"
|
||||
" 2011 Takahiro Harada\n"
|
||||
"*/\n"
|
||||
"\n"
|
||||
"typedef uint u32;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX groupIdx.x\n"
|
||||
"#define GET_LOCAL_IDX localIdx.x\n"
|
||||
"#define GET_GLOBAL_IDX globalIdx.x\n"
|
||||
"#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n"
|
||||
"#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID\n"
|
||||
"#define AtomInc(x) InterlockedAdd(x, 1)\n"
|
||||
"#define AtomInc1(x, out) InterlockedAdd(x, 1, out)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" u32 m_key; \n"
|
||||
" u32 m_value;\n"
|
||||
"}SortData;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"cbuffer SortCB : register( b0 )\n"
|
||||
"{\n"
|
||||
" u32 m_nSrc;\n"
|
||||
" u32 m_nDst;\n"
|
||||
" u32 m_padding[2];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"StructuredBuffer<SortData> src : register( t0 );\n"
|
||||
"RWStructuredBuffer<u32> dst : register( u0 );\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"[numthreads(64, 1, 1)]\n"
|
||||
"void SearchSortDataLowerKernel( DEFAULT_ARGS )\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 nSrc = m_nSrc;\n"
|
||||
" u32 nDst = m_nDst;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc )\n"
|
||||
" {\n"
|
||||
" SortData iData;\n"
|
||||
" SortData jData;\n"
|
||||
" if( gIdx==0 ) iData.m_key = iData.m_value = (u32)-1;\n"
|
||||
" else iData = src[gIdx-1];\n"
|
||||
"\n"
|
||||
" if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;\n"
|
||||
" else jData = src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
"// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n"
|
||||
" u32 k = jData.m_key;\n"
|
||||
" {\n"
|
||||
" dst[k] = gIdx;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"[numthreads(64, 1, 1)]\n"
|
||||
"void SearchSortDataUpperKernel( DEFAULT_ARGS )\n"
|
||||
"{\n"
|
||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||
" u32 nSrc = m_nSrc;\n"
|
||||
" u32 nDst = m_nDst;\n"
|
||||
"\n"
|
||||
" if( gIdx < nSrc+1 )\n"
|
||||
" {\n"
|
||||
" SortData iData;\n"
|
||||
" SortData jData;\n"
|
||||
" if( gIdx==0 ) iData.m_key = iData.m_value = 0;\n"
|
||||
" else iData = src[gIdx-1];\n"
|
||||
"\n"
|
||||
" if( gIdx==nSrc ) jData.m_key = jData.m_value = nDst;\n"
|
||||
" else jData = src[gIdx];\n"
|
||||
"\n"
|
||||
" if( iData.m_key != jData.m_key )\n"
|
||||
" {\n"
|
||||
"// for(u32 k=iData.m_key; k<min(jData.m_key, nDst); k++)\n"
|
||||
" u32 k = iData.m_key;\n"
|
||||
" {\n"
|
||||
" dst[k] = gIdx;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
Reference in New Issue
Block a user