bt -> b3 and BT -> B3 rename for content and filenames
This commit is contained in:
@@ -20,12 +20,12 @@ subject to the following restrictions:
|
||||
#define KERNEL2 "SubtractKernel"
|
||||
|
||||
|
||||
#include "btBoundSearchCL.h"
|
||||
#include "b3BoundSearchCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../kernels/BoundSearchKernelsCL.h"
|
||||
|
||||
btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
|
||||
b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
|
||||
:m_context(ctx),
|
||||
m_device(device),
|
||||
m_queue(queue)
|
||||
@@ -38,31 +38,31 @@ btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command
|
||||
const char* kernelSource = boundSearchKernelsCL;
|
||||
|
||||
cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
|
||||
btAssert(boundSearchProg);
|
||||
b3Assert(boundSearchProg);
|
||||
|
||||
m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_lowerSortDataKernel );
|
||||
b3Assert(m_lowerSortDataKernel );
|
||||
|
||||
m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_upperSortDataKernel);
|
||||
b3Assert(m_upperSortDataKernel);
|
||||
|
||||
m_subtractKernel = 0;
|
||||
|
||||
if( maxSize )
|
||||
{
|
||||
m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
|
||||
btAssert(m_subtractKernel);
|
||||
b3Assert(m_subtractKernel);
|
||||
}
|
||||
|
||||
//m_constBuffer = new btOpenCLArray<btInt4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
//m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST );
|
||||
|
||||
m_lower = (maxSize == 0)? 0: new btOpenCLArray<unsigned int>(ctx,queue,maxSize );
|
||||
m_upper = (maxSize == 0)? 0: new btOpenCLArray<unsigned int>(ctx,queue, maxSize );
|
||||
m_lower = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue,maxSize );
|
||||
m_upper = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue, maxSize );
|
||||
|
||||
m_filler = new btFillCL(ctx,device,queue);
|
||||
m_filler = new b3FillCL(ctx,device,queue);
|
||||
}
|
||||
|
||||
btBoundSearchCL::~btBoundSearchCL()
|
||||
b3BoundSearchCL::~b3BoundSearchCL()
|
||||
{
|
||||
|
||||
delete m_lower;
|
||||
@@ -77,18 +77,18 @@ btBoundSearchCL::~btBoundSearchCL()
|
||||
}
|
||||
|
||||
|
||||
void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCLArray<unsigned int>& dst, int nDst, Option option )
|
||||
void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option )
|
||||
{
|
||||
btInt4 constBuffer;
|
||||
b3Int4 constBuffer;
|
||||
constBuffer.x = nSrc;
|
||||
constBuffer.y = nDst;
|
||||
|
||||
if( option == BOUND_LOWER )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL()) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) };
|
||||
|
||||
btLauncherCL launcher( m_queue, m_lowerSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_queue, m_lowerSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -96,10 +96,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else if( option == BOUND_UPPER )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher(m_queue, m_upperSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_queue, m_upperSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -107,10 +107,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else if( option == COUNT )
|
||||
{
|
||||
btAssert( m_lower );
|
||||
btAssert( m_upper );
|
||||
btAssert( m_lower->capacity() <= (int)nDst );
|
||||
btAssert( m_upper->capacity() <= (int)nDst );
|
||||
b3Assert( m_lower );
|
||||
b3Assert( m_upper );
|
||||
b3Assert( m_lower->capacity() <= (int)nDst );
|
||||
b3Assert( m_upper->capacity() <= (int)nDst );
|
||||
|
||||
int zero = 0;
|
||||
m_filler->execute( *m_lower, zero, nDst );
|
||||
@@ -120,10 +120,10 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
execute( src, nSrc, *m_upper, nDst, BOUND_UPPER );
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_upper->getBufferCL(), true ), btBufferInfoCL( m_lower->getBufferCL(), true ), btBufferInfoCL( dst.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_queue, m_subtractKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_queue, m_subtractKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( nSrc );
|
||||
launcher.setConst( nDst );
|
||||
|
||||
@@ -132,21 +132,21 @@ void btBoundSearchCL::execute(btOpenCLArray<btSortData>& src, int nSrc, btOpenCL
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert( 0 );
|
||||
b3Assert( 0 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nSrc,
|
||||
void b3BoundSearchCL::executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc,
|
||||
b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option )
|
||||
{
|
||||
|
||||
|
||||
for(int i=0; i<nSrc-1; i++)
|
||||
btAssert( src[i].m_key <= src[i+1].m_key );
|
||||
b3Assert( src[i].m_key <= src[i+1].m_key );
|
||||
|
||||
btSortData minData,zeroData,maxData;
|
||||
b3SortData minData,zeroData,maxData;
|
||||
minData.m_key = -1;
|
||||
minData.m_value = -1;
|
||||
zeroData.m_key=0;
|
||||
@@ -158,8 +158,8 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
{
|
||||
for(int i=0; i<nSrc; i++)
|
||||
{
|
||||
btSortData& iData = (i==0)? minData: src[i-1];
|
||||
btSortData& jData = (i==nSrc)? maxData: src[i];
|
||||
b3SortData& iData = (i==0)? minData: src[i-1];
|
||||
b3SortData& jData = (i==nSrc)? maxData: src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
@@ -174,8 +174,8 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
{
|
||||
for(int i=1; i<nSrc+1; i++)
|
||||
{
|
||||
btSortData& iData = src[i-1];
|
||||
btSortData& jData = (i==nSrc)? maxData: src[i];
|
||||
b3SortData& iData = src[i-1];
|
||||
b3SortData& jData = (i==nSrc)? maxData: src[i];
|
||||
|
||||
if( iData.m_key != jData.m_key )
|
||||
{
|
||||
@@ -208,6 +208,6 @@ void btBoundSearchCL::executeHost( b3AlignedObjectArray<btSortData>& src, int nS
|
||||
}
|
||||
else
|
||||
{
|
||||
btAssert( 0 );
|
||||
b3Assert( 0 );
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,8 @@ subject to the following restrictions:
|
||||
*/
|
||||
//Originally written by Takahiro Harada
|
||||
|
||||
#ifndef BT_BOUNDSEARCH_H
|
||||
#define BT_BOUNDSEARCH_H
|
||||
#ifndef B3_BOUNDSEARCH_H
|
||||
#define B3_BOUNDSEARCH_H
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -24,10 +24,10 @@ subject to the following restrictions:
|
||||
#include <AdlPrimitives/Fill/Fill.h>
|
||||
*/
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "btFillCL.h"
|
||||
#include "btRadixSort32CL.h" //for btSortData (perhaps move it?)
|
||||
class btBoundSearchCL
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "b3FillCL.h"
|
||||
#include "b3RadixSort32CL.h" //for b3SortData (perhaps move it?)
|
||||
class b3BoundSearchCL
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -47,21 +47,21 @@ class btBoundSearchCL
|
||||
cl_kernel m_upperSortDataKernel;
|
||||
cl_kernel m_subtractKernel;
|
||||
|
||||
btOpenCLArray<btInt4>* m_constbtOpenCLArray;
|
||||
btOpenCLArray<unsigned int>* m_lower;
|
||||
btOpenCLArray<unsigned int>* m_upper;
|
||||
b3OpenCLArray<b3Int4>* m_constbtOpenCLArray;
|
||||
b3OpenCLArray<unsigned int>* m_lower;
|
||||
b3OpenCLArray<unsigned int>* m_upper;
|
||||
|
||||
btFillCL* m_filler;
|
||||
b3FillCL* m_filler;
|
||||
|
||||
btBoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size);
|
||||
b3BoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size);
|
||||
|
||||
virtual ~btBoundSearchCL();
|
||||
virtual ~b3BoundSearchCL();
|
||||
|
||||
// src has to be src[i].m_key <= src[i+1].m_key
|
||||
void execute( btOpenCLArray<btSortData>& src, int nSrc, btOpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER );
|
||||
void execute( b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER );
|
||||
|
||||
void executeHost( b3AlignedObjectArray<btSortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER);
|
||||
void executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER);
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_BOUNDSEARCH_H
|
||||
#endif //B3_BOUNDSEARCH_H
|
||||
19
opencl/parallel_primitives/host/b3BufferInfoCL.h
Normal file
19
opencl/parallel_primitives/host/b3BufferInfoCL.h
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
#ifndef B3_BUFFER_INFO_CL_H
|
||||
#define B3_BUFFER_INFO_CL_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
|
||||
|
||||
struct b3BufferInfoCL
|
||||
{
|
||||
//b3BufferInfoCL(){}
|
||||
|
||||
// template<typename T>
|
||||
b3BufferInfoCL(cl_mem buff, bool isReadOnly = false): m_clBuffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
cl_mem m_clBuffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
#endif //B3_BUFFER_INFO_CL_H
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "btFillCL.h"
|
||||
#include "b3FillCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
|
||||
#define FILL_CL_PROGRAM_PATH "opencl/parallel_primitives/kernels/FillKernels.cl"
|
||||
|
||||
#include "../kernels/FillKernelsCL.h"
|
||||
|
||||
btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
const char* kernelSource = fillKernelsCL;
|
||||
@@ -15,25 +15,25 @@ btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
|
||||
const char* additionalMacros = "";
|
||||
|
||||
cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
|
||||
btAssert(fillProg);
|
||||
b3Assert(fillProg);
|
||||
|
||||
m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillIntKernel);
|
||||
b3Assert(m_fillIntKernel);
|
||||
|
||||
m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillIntKernel);
|
||||
b3Assert(m_fillIntKernel);
|
||||
|
||||
m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillFloatKernel);
|
||||
b3Assert(m_fillFloatKernel);
|
||||
|
||||
|
||||
|
||||
m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
|
||||
btAssert(m_fillKernelInt2);
|
||||
b3Assert(m_fillKernelInt2);
|
||||
|
||||
}
|
||||
|
||||
btFillCL::~btFillCL()
|
||||
b3FillCL::~b3FillCL()
|
||||
{
|
||||
clReleaseKernel(m_fillKernelInt2);
|
||||
clReleaseKernel(m_fillIntKernel);
|
||||
@@ -42,12 +42,12 @@ btFillCL::~btFillCL()
|
||||
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<float>& src, const float value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
{
|
||||
btLauncherCL launcher( m_commandQueue, m_fillFloatKernel );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel );
|
||||
launcher.setBuffer( src.getBufferCL());
|
||||
launcher.setConst( n );
|
||||
launcher.setConst( value );
|
||||
@@ -57,13 +57,13 @@ void btFillCL::execute(btOpenCLArray<float>& src, const float value, int n, int
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<int>& src, const int value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
|
||||
{
|
||||
btLauncherCL launcher( m_commandQueue, m_fillIntKernel );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillIntKernel );
|
||||
launcher.setBuffer(src.getBufferCL());
|
||||
launcher.setConst( n);
|
||||
launcher.setConst( value);
|
||||
@@ -73,15 +73,15 @@ void btFillCL::execute(btOpenCLArray<int>& src, const int value, int n, int offs
|
||||
}
|
||||
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( n );
|
||||
launcher.setConst(value);
|
||||
launcher.setConst(offset);
|
||||
@@ -90,7 +90,7 @@ void btFillCL::execute(btOpenCLArray<unsigned int>& src, const unsigned int valu
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &value, int n, int offset)
|
||||
void b3FillCL::executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset)
|
||||
{
|
||||
for (int i=0;i<n;i++)
|
||||
{
|
||||
@@ -98,7 +98,7 @@ void btFillCL::executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &valu
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset)
|
||||
void b3FillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset)
|
||||
{
|
||||
for (int i=0;i<n;i++)
|
||||
{
|
||||
@@ -106,16 +106,16 @@ void btFillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int
|
||||
}
|
||||
}
|
||||
|
||||
void btFillCL::execute(btOpenCLArray<btInt2> &src, const btInt2 &value, int n, int offset)
|
||||
void b3FillCL::execute(b3OpenCLArray<b3Int2> &src, const b3Int2 &value, int n, int offset)
|
||||
{
|
||||
btAssert( n>0 );
|
||||
b3Assert( n>0 );
|
||||
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src.getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher(m_commandQueue, m_fillKernelInt2);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(n);
|
||||
launcher.setConst(value);
|
||||
launcher.setConst(offset);
|
||||
63
opencl/parallel_primitives/host/b3FillCL.h
Normal file
63
opencl/parallel_primitives/host/b3FillCL.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef B3_FILL_CL_H
|
||||
#define B3_FILL_CL_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
#include "b3Int2.h"
|
||||
#include "b3Int4.h"
|
||||
|
||||
|
||||
class b3FillCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_fillKernelInt2;
|
||||
cl_kernel m_fillIntKernel;
|
||||
cl_kernel m_fillUnsignedIntKernel;
|
||||
cl_kernel m_fillFloatKernel;
|
||||
|
||||
public:
|
||||
|
||||
struct b3ConstData
|
||||
{
|
||||
union
|
||||
{
|
||||
b3Int4 m_data;
|
||||
b3UnsignedInt4 m_UnsignedData;
|
||||
};
|
||||
int m_offset;
|
||||
int m_n;
|
||||
int m_padding[2];
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue);
|
||||
|
||||
virtual ~b3FillCL();
|
||||
|
||||
void execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<int>& src, const int value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<float>& src, const float value, int n, int offset = 0);
|
||||
|
||||
void execute(b3OpenCLArray<b3Int2>& src, const b3Int2& value, int n, int offset = 0);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset);
|
||||
|
||||
// void execute(b3OpenCLArray<b3Int4>& src, const b3Int4& value, int n, int offset = 0);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //B3_FILL_CL_H
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef BT_INT2_H
|
||||
#define BT_INT2_H
|
||||
#ifndef B3_INT2_H
|
||||
#define B3_INT2_H
|
||||
|
||||
struct btUnsignedInt2
|
||||
struct b3UnsignedInt2
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -16,7 +16,7 @@ struct btUnsignedInt2
|
||||
};
|
||||
};
|
||||
|
||||
struct btInt2
|
||||
struct b3Int2
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef BT_INT4_H
|
||||
#define BT_INT4_H
|
||||
#ifndef B3_INT4_H
|
||||
#define B3_INT4_H
|
||||
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4
|
||||
ATTRIBUTE_ALIGNED16(struct) b3UnsignedInt4
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
union
|
||||
{
|
||||
@@ -20,9 +20,9 @@ ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4
|
||||
};
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED16(struct) btInt4
|
||||
ATTRIBUTE_ALIGNED16(struct) b3Int4
|
||||
{
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
B3_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
union
|
||||
{
|
||||
@@ -37,19 +37,19 @@ ATTRIBUTE_ALIGNED16(struct) btInt4
|
||||
};
|
||||
};
|
||||
|
||||
SIMD_FORCE_INLINE btInt4 btMakeInt4(int x, int y, int z, int w = 0)
|
||||
SIMD_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
|
||||
{
|
||||
btInt4 v;
|
||||
b3Int4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE btUnsignedInt4 btMakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
|
||||
SIMD_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
|
||||
{
|
||||
btUnsignedInt4 v;
|
||||
b3UnsignedInt4 v;
|
||||
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
#endif //BT_INT4_H
|
||||
#endif //B3_INT4_H
|
||||
@@ -1,17 +1,17 @@
|
||||
|
||||
#ifndef BT_LAUNCHER_CL_H
|
||||
#define BT_LAUNCHER_CL_H
|
||||
#ifndef B3_LAUNCHER_CL_H
|
||||
#define B3_LAUNCHER_CL_H
|
||||
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "Bullet3Common/b3MinMax.h"
|
||||
#include "btOpenCLArray.h"
|
||||
#include "b3OpenCLArray.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable :4996)
|
||||
#endif
|
||||
#define BT_CL_MAX_ARG_SIZE 16
|
||||
struct btKernelArgData
|
||||
#define B3_CL_MAX_ARG_SIZE 16
|
||||
struct b3KernelArgData
|
||||
{
|
||||
int m_isBuffer;
|
||||
int m_argIndex;
|
||||
@@ -19,28 +19,28 @@ struct btKernelArgData
|
||||
union
|
||||
{
|
||||
cl_mem m_clBuffer;
|
||||
unsigned char m_argData[BT_CL_MAX_ARG_SIZE];
|
||||
unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
class btLauncherCL
|
||||
class b3LauncherCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
cl_kernel m_kernel;
|
||||
int m_idx;
|
||||
|
||||
b3AlignedObjectArray<btKernelArgData> m_kernelArguments;
|
||||
b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
|
||||
|
||||
|
||||
int m_serializationSizeInBytes;
|
||||
|
||||
public:
|
||||
|
||||
b3AlignedObjectArray<btOpenCLArray<unsigned char>* > m_arrays;
|
||||
b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays;
|
||||
|
||||
btLauncherCL(cl_command_queue queue, cl_kernel kernel)
|
||||
b3LauncherCL(cl_command_queue queue, cl_kernel kernel)
|
||||
:m_commandQueue(queue),
|
||||
m_kernel(kernel),
|
||||
m_idx(0)
|
||||
@@ -48,7 +48,7 @@ class btLauncherCL
|
||||
m_serializationSizeInBytes = sizeof(int);
|
||||
}
|
||||
|
||||
virtual ~btLauncherCL()
|
||||
virtual ~b3LauncherCL()
|
||||
{
|
||||
for (int i=0;i<m_arrays.size();i++)
|
||||
{
|
||||
@@ -59,7 +59,7 @@ class btLauncherCL
|
||||
inline void setBuffer( cl_mem clBuffer)
|
||||
{
|
||||
|
||||
btKernelArgData kernelArg;
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 1;
|
||||
kernelArg.m_clBuffer = clBuffer;
|
||||
@@ -75,23 +75,23 @@ class btLauncherCL
|
||||
¶m_value,
|
||||
&actualSizeInBytes);
|
||||
|
||||
btAssert( err == CL_SUCCESS );
|
||||
b3Assert( err == CL_SUCCESS );
|
||||
kernelArg.m_argSizeInBytes = param_value;
|
||||
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+= sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
|
||||
m_serializationSizeInBytes+=param_value;
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
inline void setBuffers( btBufferInfoCL* buffInfo, int n )
|
||||
inline void setBuffers( b3BufferInfoCL* buffInfo, int n )
|
||||
{
|
||||
for(int i=0; i<n; i++)
|
||||
{
|
||||
btKernelArgData kernelArg;
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 1;
|
||||
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
|
||||
@@ -107,15 +107,15 @@ class btLauncherCL
|
||||
¶m_value,
|
||||
&actualSizeInBytes);
|
||||
|
||||
btAssert( err == CL_SUCCESS );
|
||||
b3Assert( err == CL_SUCCESS );
|
||||
kernelArg.m_argSizeInBytes = param_value;
|
||||
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+= sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
|
||||
m_serializationSizeInBytes+=param_value;
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,12 +133,12 @@ class btLauncherCL
|
||||
|
||||
for (int i=0;i<numArguments;i++)
|
||||
{
|
||||
btKernelArgData* arg = (btKernelArgData*)&buf[index];
|
||||
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
|
||||
|
||||
index+=sizeof(btKernelArgData);
|
||||
index+=sizeof(b3KernelArgData);
|
||||
if (arg->m_isBuffer)
|
||||
{
|
||||
btOpenCLArray<unsigned char>* clData = new btOpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
|
||||
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
|
||||
clData->resize(arg->m_argSizeInBytes);
|
||||
|
||||
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
|
||||
@@ -148,12 +148,12 @@ class btLauncherCL
|
||||
m_arrays.push_back(clData);
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
index+=arg->m_argSizeInBytes;
|
||||
} else
|
||||
{
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
m_kernelArguments.push_back(*arg);
|
||||
}
|
||||
@@ -176,7 +176,7 @@ class btLauncherCL
|
||||
|
||||
for (int ii=0;ii<numArguments;ii++)
|
||||
{
|
||||
btKernelArgData* argGold = (btKernelArgData*)&goldBuffer[index];
|
||||
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
|
||||
|
||||
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
|
||||
{
|
||||
@@ -194,7 +194,7 @@ class btLauncherCL
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
index+=sizeof(btKernelArgData);
|
||||
index+=sizeof(b3KernelArgData);
|
||||
|
||||
if (argGold->m_isBuffer)
|
||||
{
|
||||
@@ -209,7 +209,7 @@ class btLauncherCL
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
|
||||
memBuf, 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
clFinish(m_commandQueue);
|
||||
|
||||
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
|
||||
@@ -256,7 +256,7 @@ class btLauncherCL
|
||||
|
||||
assert(destBufferCapacity>=m_serializationSizeInBytes);
|
||||
|
||||
//todo: use the btSerializer for this to allow for 32/64bit, endianness etc
|
||||
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
|
||||
int numArguments = m_kernelArguments.size();
|
||||
int curBufferSize = 0;
|
||||
int* dest = (int*)&destBuffer[curBufferSize];
|
||||
@@ -267,16 +267,16 @@ class btLauncherCL
|
||||
|
||||
for (int i=0;i<this->m_kernelArguments.size();i++)
|
||||
{
|
||||
btKernelArgData* arg = (btKernelArgData*) &destBuffer[curBufferSize];
|
||||
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
|
||||
*arg = m_kernelArguments[i];
|
||||
curBufferSize+=sizeof(btKernelArgData);
|
||||
curBufferSize+=sizeof(b3KernelArgData);
|
||||
if (arg->m_isBuffer==1)
|
||||
{
|
||||
//copy the OpenCL buffer content
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
|
||||
&destBuffer[curBufferSize], 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
clFinish(m_commandQueue);
|
||||
curBufferSize+=arg->m_argSizeInBytes;
|
||||
}
|
||||
@@ -317,18 +317,18 @@ class btLauncherCL
|
||||
inline void setConst( const T& consts )
|
||||
{
|
||||
int sz=sizeof(T);
|
||||
btAssert(sz<=BT_CL_MAX_ARG_SIZE);
|
||||
btKernelArgData kernelArg;
|
||||
b3Assert(sz<=B3_CL_MAX_ARG_SIZE);
|
||||
b3KernelArgData kernelArg;
|
||||
kernelArg.m_argIndex = m_idx;
|
||||
kernelArg.m_isBuffer = 0;
|
||||
T* destArg = (T*)kernelArg.m_argData;
|
||||
*destArg = consts;
|
||||
kernelArg.m_argSizeInBytes = sizeof(T);
|
||||
m_kernelArguments.push_back(kernelArg);
|
||||
m_serializationSizeInBytes+=sizeof(btKernelArgData);
|
||||
m_serializationSizeInBytes+=sizeof(b3KernelArgData);
|
||||
|
||||
cl_int status = clSetKernelArg( m_kernel, m_idx++, sz, &consts );
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
inline void launch1D( int numThreads, int localSize = 64)
|
||||
@@ -342,9 +342,9 @@ class btLauncherCL
|
||||
size_t lRange[3] = {1,1,1};
|
||||
lRange[0] = localSizeX;
|
||||
lRange[1] = localSizeY;
|
||||
gRange[0] = btMax((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] = b3Max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
|
||||
gRange[0] *= lRange[0];
|
||||
gRange[1] = btMax((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] = b3Max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
|
||||
gRange[1] *= lRange[1];
|
||||
|
||||
cl_int status = clEnqueueNDRangeKernel( m_commandQueue,
|
||||
@@ -353,11 +353,11 @@ class btLauncherCL
|
||||
{
|
||||
printf("Error: OpenCL status = %d\n",status);
|
||||
}
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //BT_LAUNCHER_CL_H
|
||||
#endif //B3_LAUNCHER_CL_H
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef BT_OPENCL_ARRAY_H
|
||||
#define BT_OPENCL_ARRAY_H
|
||||
#ifndef B3_OPENCL_ARRAY_H
|
||||
#define B3_OPENCL_ARRAY_H
|
||||
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
#include "../../basic_initialize/b3OpenCLInclude.h"
|
||||
|
||||
template <typename T>
|
||||
class btOpenCLArray
|
||||
class b3OpenCLArray
|
||||
{
|
||||
int m_size;
|
||||
int m_capacity;
|
||||
@@ -28,7 +28,7 @@ class btOpenCLArray
|
||||
m_capacity=0;
|
||||
}
|
||||
|
||||
btOpenCLArray<T>& operator=(const btOpenCLArray<T>& src);
|
||||
b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);
|
||||
|
||||
SIMD_FORCE_INLINE int allocSize(int size)
|
||||
{
|
||||
@@ -37,7 +37,7 @@ class btOpenCLArray
|
||||
|
||||
public:
|
||||
|
||||
btOpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true)
|
||||
b3OpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true)
|
||||
:m_size(0), m_capacity(0),m_clBuffer(0),
|
||||
m_clContext(ctx),m_commandQueue(queue),
|
||||
m_ownsMemory(true),m_allowGrowingCapacity(true)
|
||||
@@ -61,7 +61,7 @@ public:
|
||||
}
|
||||
|
||||
// we could enable this assignment, but need to make sure to avoid accidental deep copies
|
||||
// btOpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
|
||||
// b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
|
||||
// {
|
||||
// copyFromArray(src);
|
||||
// return *this;
|
||||
@@ -74,7 +74,7 @@ public:
|
||||
}
|
||||
|
||||
|
||||
virtual ~btOpenCLArray()
|
||||
virtual ~b3OpenCLArray()
|
||||
{
|
||||
deallocate();
|
||||
m_size=0;
|
||||
@@ -94,8 +94,8 @@ public:
|
||||
|
||||
SIMD_FORCE_INLINE T forcedAt(int n) const
|
||||
{
|
||||
btAssert(n>=0);
|
||||
btAssert(n<capacity());
|
||||
b3Assert(n>=0);
|
||||
b3Assert(n<capacity());
|
||||
T elem;
|
||||
copyToHostPointer(&elem,1,n,true);
|
||||
return elem;
|
||||
@@ -103,8 +103,8 @@ public:
|
||||
|
||||
SIMD_FORCE_INLINE T at(int n) const
|
||||
{
|
||||
btAssert(n>=0);
|
||||
btAssert(n<size());
|
||||
b3Assert(n>=0);
|
||||
b3Assert(n<size());
|
||||
T elem;
|
||||
copyToHostPointer(&elem,1,n,true);
|
||||
return elem;
|
||||
@@ -152,18 +152,18 @@ public:
|
||||
//create a new OpenCL buffer
|
||||
int memSizeInBytes = sizeof(T)*_Count;
|
||||
cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
|
||||
btAssert(ciErrNum==CL_SUCCESS);
|
||||
b3Assert(ciErrNum==CL_SUCCESS);
|
||||
|
||||
//#define BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#ifdef BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
|
||||
for (int i=0;i<memSizeInBytes;i++)
|
||||
src[i] = 0xbb;
|
||||
ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 );
|
||||
btAssert(ciErrNum==CL_SUCCESS);
|
||||
b3Assert(ciErrNum==CL_SUCCESS);
|
||||
clFinish(m_commandQueue);
|
||||
free(src);
|
||||
#endif //BT_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
|
||||
|
||||
if (copyOldContents)
|
||||
copyToCL(buf, size());
|
||||
@@ -177,7 +177,7 @@ public:
|
||||
} else
|
||||
{
|
||||
//fail: assert and
|
||||
btAssert(0);
|
||||
b3Assert(0);
|
||||
deallocate();
|
||||
}
|
||||
}
|
||||
@@ -189,19 +189,19 @@ public:
|
||||
if (numElements<=0)
|
||||
return;
|
||||
|
||||
btAssert(m_clBuffer);
|
||||
btAssert(destination);
|
||||
b3Assert(m_clBuffer);
|
||||
b3Assert(destination);
|
||||
|
||||
//likely some error, destination is same as source
|
||||
btAssert(m_clBuffer != destination);
|
||||
b3Assert(m_clBuffer != destination);
|
||||
|
||||
btAssert((firstElem+numElements)<=m_size);
|
||||
b3Assert((firstElem+numElements)<=m_size);
|
||||
|
||||
cl_int status = 0;
|
||||
|
||||
|
||||
btAssert(numElements>0);
|
||||
btAssert(numElements<=m_size);
|
||||
b3Assert(numElements>0);
|
||||
b3Assert(numElements<=m_size);
|
||||
|
||||
int srcOffsetBytes = sizeof(T)*firstElem;
|
||||
int dstOffsetInBytes = sizeof(T)*dstOffsetInElems;
|
||||
@@ -209,7 +209,7 @@ public:
|
||||
status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination,
|
||||
srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 );
|
||||
|
||||
btAssert( status == CL_SUCCESS );
|
||||
b3Assert( status == CL_SUCCESS );
|
||||
}
|
||||
|
||||
void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true)
|
||||
@@ -225,13 +225,13 @@ public:
|
||||
|
||||
void copyFromHostPointer(const T* src, int numElems, int destFirstElem= 0, bool waitForCompletion=true)
|
||||
{
|
||||
btAssert(numElems+destFirstElem <= capacity());
|
||||
b3Assert(numElems+destFirstElem <= capacity());
|
||||
|
||||
cl_int status = 0;
|
||||
int sizeInBytes=sizeof(T)*numElems;
|
||||
status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes,
|
||||
src, 0,0,0 );
|
||||
btAssert(status == CL_SUCCESS );
|
||||
b3Assert(status == CL_SUCCESS );
|
||||
if (waitForCompletion)
|
||||
clFinish(m_commandQueue);
|
||||
|
||||
@@ -247,18 +247,18 @@ public:
|
||||
|
||||
void copyToHostPointer(T* destPtr, int numElem, int srcFirstElem=0, bool waitForCompletion=true) const
|
||||
{
|
||||
btAssert(numElem+srcFirstElem <= capacity());
|
||||
b3Assert(numElem+srcFirstElem <= capacity());
|
||||
|
||||
cl_int status = 0;
|
||||
status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem,
|
||||
destPtr, 0,0,0 );
|
||||
btAssert( status==CL_SUCCESS );
|
||||
b3Assert( status==CL_SUCCESS );
|
||||
|
||||
if (waitForCompletion)
|
||||
clFinish(m_commandQueue);
|
||||
}
|
||||
|
||||
void copyFromOpenCLArray(const btOpenCLArray& src)
|
||||
void copyFromOpenCLArray(const b3OpenCLArray& src)
|
||||
{
|
||||
int newSize = src.size();
|
||||
resize(newSize);
|
||||
@@ -271,4 +271,4 @@ public:
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_OPENCL_ARRAY_H
|
||||
#endif //B3_OPENCL_ARRAY_H
|
||||
@@ -1,32 +1,32 @@
|
||||
#include "btPrefixScanCL.h"
|
||||
#include "btFillCL.h"
|
||||
#define BT_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
|
||||
#include "b3PrefixScanCL.h"
|
||||
#include "b3FillCL.h"
|
||||
#define B3_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
|
||||
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "../kernels/PrefixScanKernelsCL.h"
|
||||
|
||||
btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
|
||||
b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
const char* scanKernelSource = prefixScanKernelsCL;
|
||||
cl_int pErrNum;
|
||||
char* additionalMacros=0;
|
||||
|
||||
m_workBuffer = new btOpenCLArray<unsigned int>(ctx,queue,size);
|
||||
cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
|
||||
btAssert(scanProg);
|
||||
m_workBuffer = new b3OpenCLArray<unsigned int>(ctx,queue,size);
|
||||
cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, B3_PREFIXSCAN_PROG_PATH);
|
||||
b3Assert(scanProg);
|
||||
|
||||
m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_localScanKernel );
|
||||
b3Assert(m_localScanKernel );
|
||||
m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_blockSumKernel );
|
||||
b3Assert(m_blockSumKernel );
|
||||
m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
|
||||
btAssert(m_propagationKernel );
|
||||
b3Assert(m_propagationKernel );
|
||||
}
|
||||
|
||||
|
||||
btPrefixScanCL::~btPrefixScanCL()
|
||||
b3PrefixScanCL::~b3PrefixScanCL()
|
||||
{
|
||||
delete m_workBuffer;
|
||||
clReleaseKernel(m_localScanKernel);
|
||||
@@ -35,7 +35,7 @@ btPrefixScanCL::~btPrefixScanCL()
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T btNextPowerOf2(T n)
|
||||
T b3NextPowerOf2(T n)
|
||||
{
|
||||
n -= 1;
|
||||
for(int i=0; i<sizeof(T)*8; i++)
|
||||
@@ -43,37 +43,37 @@ T btNextPowerOf2(T n)
|
||||
return n+1;
|
||||
}
|
||||
|
||||
void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
{
|
||||
|
||||
// btAssert( data->m_option == EXCLUSIVE );
|
||||
// b3Assert( data->m_option == EXCLUSIVE );
|
||||
const unsigned int numBlocks = (const unsigned int)( (n+BLOCK_SIZE*2-1)/(BLOCK_SIZE*2) );
|
||||
|
||||
dst.resize(src.size());
|
||||
m_workBuffer->resize(src.size());
|
||||
|
||||
btInt4 constBuffer;
|
||||
b3Int4 constBuffer;
|
||||
constBuffer.x = n;
|
||||
constBuffer.y = numBlocks;
|
||||
constBuffer.z = (int)btNextPowerOf2( numBlocks );
|
||||
constBuffer.z = (int)b3NextPowerOf2( numBlocks );
|
||||
|
||||
btOpenCLArray<unsigned int>* srcNative = &src;
|
||||
btOpenCLArray<unsigned int>* dstNative = &dst;
|
||||
b3OpenCLArray<unsigned int>* srcNative = &src;
|
||||
b3OpenCLArray<unsigned int>* dstNative = &dst;
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( dstNative->getBufferCL() ), btBufferInfoCL( srcNative->getBufferCL() ), btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_localScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_localScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
|
||||
btLauncherCL launcher( m_commandQueue, m_blockSumKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
@@ -81,9 +81,9 @@ void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<uns
|
||||
|
||||
if( numBlocks > 1 )
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( dstNative->getBufferCL() ), btBufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_propagationKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_propagationKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( constBuffer );
|
||||
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );
|
||||
}
|
||||
@@ -98,7 +98,7 @@ void btPrefixScanCL::execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<uns
|
||||
}
|
||||
|
||||
|
||||
void btPrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
void b3PrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum)
|
||||
{
|
||||
unsigned int s = 0;
|
||||
//if( data->m_option == EXCLUSIVE )
|
||||
@@ -1,12 +1,12 @@
|
||||
|
||||
#ifndef BT_PREFIX_SCAN_CL_H
|
||||
#define BT_PREFIX_SCAN_CL_H
|
||||
#ifndef B3_PREFIX_SCAN_CL_H
|
||||
#define B3_PREFIX_SCAN_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "btBufferInfoCL.h"
|
||||
#include "b3OpenCLArray.h"
|
||||
#include "b3BufferInfoCL.h"
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
|
||||
class btPrefixScanCL
|
||||
class b3PrefixScanCL
|
||||
{
|
||||
enum
|
||||
{
|
||||
@@ -21,17 +21,17 @@ class btPrefixScanCL
|
||||
cl_kernel m_blockSumKernel;
|
||||
cl_kernel m_propagationKernel;
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0);
|
||||
b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0);
|
||||
|
||||
virtual ~btPrefixScanCL();
|
||||
virtual ~b3PrefixScanCL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& src, btOpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0);
|
||||
void execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0);
|
||||
void executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum);
|
||||
};
|
||||
|
||||
#endif //BT_PREFIX_SCAN_CL_H
|
||||
#endif //B3_PREFIX_SCAN_CL_H
|
||||
@@ -1,27 +1,27 @@
|
||||
|
||||
#include "btRadixSort32CL.h"
|
||||
#include "btLauncherCL.h"
|
||||
#include "b3RadixSort32CL.h"
|
||||
#include "b3LauncherCL.h"
|
||||
#include "../../basic_initialize/b3OpenCLUtils.h"
|
||||
#include "btPrefixScanCL.h"
|
||||
#include "btFillCL.h"
|
||||
#include "b3PrefixScanCL.h"
|
||||
#include "b3FillCL.h"
|
||||
|
||||
#define RADIXSORT32_PATH "opencl/parallel_primitives/kernels/RadixSort32Kernels.cl"
|
||||
|
||||
#include "../kernels/RadixSort32KernelsCL.h"
|
||||
|
||||
btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity)
|
||||
b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity)
|
||||
:m_commandQueue(queue)
|
||||
{
|
||||
btOpenCLDeviceInfo info;
|
||||
b3OpenCLDeviceInfo info;
|
||||
b3OpenCLUtils::getDeviceInfo(device,&info);
|
||||
m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU)!=0;
|
||||
|
||||
m_workBuffer1 = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer2 = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer3 = new btOpenCLArray<btSortData>(ctx,queue);
|
||||
m_workBuffer3a = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer4 = new btOpenCLArray<btSortData>(ctx,queue);
|
||||
m_workBuffer4a = new btOpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer1 = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer2 = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer3 = new b3OpenCLArray<b3SortData>(ctx,queue);
|
||||
m_workBuffer3a = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
m_workBuffer4 = new b3OpenCLArray<b3SortData>(ctx,queue);
|
||||
m_workBuffer4a = new b3OpenCLArray<unsigned int>(ctx,queue);
|
||||
|
||||
|
||||
if (initialCapacity>0)
|
||||
@@ -33,8 +33,8 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
m_workBuffer4a->resize(initialCapacity);
|
||||
}
|
||||
|
||||
m_scan = new btPrefixScanCL(ctx,device,queue);
|
||||
m_fill = new btFillCL(ctx,device,queue);
|
||||
m_scan = new b3PrefixScanCL(ctx,device,queue);
|
||||
m_fill = new b3FillCL(ctx,device,queue);
|
||||
|
||||
const char* additionalMacros = "";
|
||||
const char* srcFileNameForCaching="";
|
||||
@@ -43,15 +43,15 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
const char* kernelSource = radixSort32KernelsCL;
|
||||
|
||||
cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
|
||||
btAssert(sortProg);
|
||||
b3Assert(sortProg);
|
||||
|
||||
m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_streamCountSortDataKernel );
|
||||
b3Assert(m_streamCountSortDataKernel );
|
||||
|
||||
|
||||
|
||||
m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_streamCountKernel);
|
||||
b3Assert(m_streamCountKernel);
|
||||
|
||||
|
||||
|
||||
@@ -59,23 +59,23 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
|
||||
{
|
||||
|
||||
m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterSortDataKernel);
|
||||
b3Assert(m_sortAndScatterSortDataKernel);
|
||||
m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterKernel);
|
||||
b3Assert(m_sortAndScatterKernel);
|
||||
} else
|
||||
{
|
||||
m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterSortDataKernel);
|
||||
b3Assert(m_sortAndScatterSortDataKernel);
|
||||
m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_sortAndScatterKernel);
|
||||
b3Assert(m_sortAndScatterKernel);
|
||||
}
|
||||
|
||||
m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
|
||||
btAssert(m_prefixScanKernel);
|
||||
b3Assert(m_prefixScanKernel);
|
||||
|
||||
}
|
||||
|
||||
btRadixSort32CL::~btRadixSort32CL()
|
||||
b3RadixSort32CL::~b3RadixSort32CL()
|
||||
{
|
||||
delete m_scan;
|
||||
delete m_fill;
|
||||
@@ -93,7 +93,7 @@ btRadixSort32CL::~btRadixSort32CL()
|
||||
clReleaseKernel(m_prefixScanKernel);
|
||||
}
|
||||
|
||||
void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */)
|
||||
{
|
||||
int n = inout.size();
|
||||
const int BITS_PER_PASS = 8;
|
||||
@@ -103,10 +103,10 @@ void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int s
|
||||
int tables[NUM_TABLES];
|
||||
int counter[NUM_TABLES];
|
||||
|
||||
btSortData* src = &inout[0];
|
||||
b3AlignedObjectArray<btSortData> workbuffer;
|
||||
b3SortData* src = &inout[0];
|
||||
b3AlignedObjectArray<b3SortData> workbuffer;
|
||||
workbuffer.resize(inout.size());
|
||||
btSortData* dst = &workbuffer[0];
|
||||
b3SortData* dst = &workbuffer[0];
|
||||
|
||||
int count=0;
|
||||
for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS)
|
||||
@@ -152,21 +152,21 @@ void btRadixSort32CL::executeHost(b3AlignedObjectArray<btSortData>& inout, int s
|
||||
counter[tableIdx] ++;
|
||||
}
|
||||
|
||||
btSwap( src, dst );
|
||||
b3Swap( src, dst );
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy
|
||||
b3Assert(0);//need to copy
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void btRadixSort32CL::executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
|
||||
b3AlignedObjectArray<btSortData> inout;
|
||||
b3AlignedObjectArray<b3SortData> inout;
|
||||
keyValuesInOut.copyToHost(inout);
|
||||
|
||||
executeHost(inout,sortBits);
|
||||
@@ -174,8 +174,8 @@ void btRadixSort32CL::executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int
|
||||
keyValuesInOut.copyFromHost(inout);
|
||||
}
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray<unsigned int>& keysOut, btOpenCLArray<unsigned int>& valuesIn,
|
||||
btOpenCLArray<unsigned int>& valuesOut, int n, int sortBits)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
|
||||
b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits)
|
||||
{
|
||||
|
||||
}
|
||||
@@ -184,7 +184,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray
|
||||
//#define DEBUG_RADIXSORT2
|
||||
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
|
||||
int originalSize = keyValuesInOut.size();
|
||||
@@ -194,7 +194,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int dataAlignment = DATA_ALIGNMENT;
|
||||
|
||||
#ifdef DEBUG_RADIXSORT2
|
||||
b3AlignedObjectArray<btSortData> test2;
|
||||
b3AlignedObjectArray<b3SortData> test2;
|
||||
keyValuesInOut.copyToHost(test2);
|
||||
printf("numElem = %d\n",test2.size());
|
||||
for (int i=0;i<test2.size();i++)
|
||||
@@ -204,20 +204,20 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
}
|
||||
#endif //DEBUG_RADIXSORT2
|
||||
|
||||
btOpenCLArray<btSortData>* src = 0;
|
||||
b3OpenCLArray<b3SortData>* src = 0;
|
||||
|
||||
if (workingSize%dataAlignment)
|
||||
{
|
||||
workingSize += dataAlignment-(workingSize%dataAlignment);
|
||||
m_workBuffer4->copyFromOpenCLArray(keyValuesInOut);
|
||||
m_workBuffer4->resize(workingSize);
|
||||
btSortData fillValue;
|
||||
b3SortData fillValue;
|
||||
fillValue.m_key = 0xffffffff;
|
||||
fillValue.m_value = 0xffffffff;
|
||||
|
||||
#define USE_BTFILL
|
||||
#ifdef USE_BTFILL
|
||||
m_fill->execute((btOpenCLArray<btInt2>&)*m_workBuffer4,(btInt2&)fillValue,workingSize-originalSize,originalSize);
|
||||
m_fill->execute((b3OpenCLArray<b3Int2>&)*m_workBuffer4,(b3Int2&)fillValue,workingSize-originalSize,originalSize);
|
||||
#else
|
||||
//fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side)
|
||||
|
||||
@@ -234,7 +234,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
m_workBuffer4->resize(0);
|
||||
}
|
||||
|
||||
btAssert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
b3Assert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
int minCap = NUM_BUCKET*NUM_WGS;
|
||||
|
||||
|
||||
@@ -245,20 +245,20 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
|
||||
// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 );
|
||||
btAssert( BITS_PER_PASS == 4 );
|
||||
btAssert( WG_SIZE == 64 );
|
||||
btAssert( (sortBits&0x3) == 0 );
|
||||
b3Assert( BITS_PER_PASS == 4 );
|
||||
b3Assert( WG_SIZE == 64 );
|
||||
b3Assert( (sortBits&0x3) == 0 );
|
||||
|
||||
|
||||
|
||||
btOpenCLArray<btSortData>* dst = m_workBuffer3;
|
||||
b3OpenCLArray<b3SortData>* dst = m_workBuffer3;
|
||||
|
||||
btOpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
|
||||
|
||||
int nWGs = NUM_WGS;
|
||||
btConstData cdata;
|
||||
b3ConstData cdata;
|
||||
|
||||
{
|
||||
int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256
|
||||
@@ -294,10 +294,10 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (src->size())
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
|
||||
int num = NUM_WGS*WG_SIZE;
|
||||
@@ -328,9 +328,9 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (fastScan)
|
||||
{// prefix scan group histogram
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( 128, 128 );
|
||||
destHisto = srcHisto;
|
||||
@@ -362,9 +362,9 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (src->size())
|
||||
{// local sort and distribute
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( destHisto->getBufferCL(), true ), btBufferInfoCL( dst->getBufferCL() )};
|
||||
btLauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
|
||||
|
||||
@@ -379,8 +379,8 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int startBit = ib;
|
||||
|
||||
destHisto->copyToHost(testHist);
|
||||
b3AlignedObjectArray<btSortData> srcHost;
|
||||
b3AlignedObjectArray<btSortData> dstHost;
|
||||
b3AlignedObjectArray<b3SortData> srcHost;
|
||||
b3AlignedObjectArray<b3SortData> dstHost;
|
||||
dstHost.resize(src->size());
|
||||
|
||||
src->copyToHost(srcHost);
|
||||
@@ -405,11 +405,11 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
int tables[NUM_TABLES];
|
||||
b3AlignedObjectArray<btSortData> dstHostOK;
|
||||
b3AlignedObjectArray<b3SortData> dstHostOK;
|
||||
dstHostOK.resize(src->size());
|
||||
|
||||
destHisto->copyToHost(testHist);
|
||||
b3AlignedObjectArray<btSortData> srcHost;
|
||||
b3AlignedObjectArray<b3SortData> srcHost;
|
||||
src->copyToHost(srcHost);
|
||||
|
||||
int blockSize = 256;
|
||||
@@ -435,7 +435,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
}
|
||||
|
||||
|
||||
b3AlignedObjectArray<btSortData> dstHost;
|
||||
b3AlignedObjectArray<b3SortData> dstHost;
|
||||
dstHost.resize(src->size());
|
||||
|
||||
|
||||
@@ -449,7 +449,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;
|
||||
|
||||
for(int iblock=0; iblock<btMin(cdata.m_nBlocksPerWG, nBlocks); iblock++)
|
||||
for(int iblock=0; iblock<b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++)
|
||||
{
|
||||
for (int lIdx = 0;lIdx < 64;lIdx++)
|
||||
{
|
||||
@@ -470,7 +470,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
int destIndex = testHist[tableIdx*NUM_WGS+wgIdx] + counter[tableIdx];
|
||||
|
||||
btSortData ok = dstHostOK[destIndex];
|
||||
b3SortData ok = dstHostOK[destIndex];
|
||||
|
||||
if (ok.m_key != srcHost[i].m_key)
|
||||
{
|
||||
@@ -512,8 +512,8 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
printf("testHist[%d]=%d\n",i,testHist[i]);
|
||||
}
|
||||
#endif //DEBUG_RADIXSORT
|
||||
btSwap(src, dst );
|
||||
btSwap(srcHisto,destHisto);
|
||||
b3Swap(src, dst );
|
||||
b3Swap(srcHisto,destHisto);
|
||||
|
||||
#ifdef DEBUG_RADIXSORT2
|
||||
keyValuesInOut.copyToHost(test2);
|
||||
@@ -537,7 +537,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
b3Assert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
}
|
||||
|
||||
if (m_workBuffer4->size())
|
||||
@@ -565,7 +565,7 @@ void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sor
|
||||
|
||||
|
||||
|
||||
void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */)
|
||||
void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */)
|
||||
{
|
||||
int originalSize = keysInOut.size();
|
||||
int workingSize = originalSize;
|
||||
@@ -573,7 +573,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
int dataAlignment = DATA_ALIGNMENT;
|
||||
|
||||
btOpenCLArray<unsigned int>* src = 0;
|
||||
b3OpenCLArray<unsigned int>* src = 0;
|
||||
|
||||
if (workingSize%dataAlignment)
|
||||
{
|
||||
@@ -593,7 +593,7 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
|
||||
|
||||
btAssert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
b3Assert( workingSize%DATA_ALIGNMENT == 0 );
|
||||
int minCap = NUM_BUCKET*NUM_WGS;
|
||||
|
||||
|
||||
@@ -605,20 +605,20 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
m_workBuffer3a->resize(workingSize);
|
||||
|
||||
// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 );
|
||||
btAssert( BITS_PER_PASS == 4 );
|
||||
btAssert( WG_SIZE == 64 );
|
||||
btAssert( (sortBits&0x3) == 0 );
|
||||
b3Assert( BITS_PER_PASS == 4 );
|
||||
b3Assert( WG_SIZE == 64 );
|
||||
b3Assert( (sortBits&0x3) == 0 );
|
||||
|
||||
|
||||
|
||||
btOpenCLArray<unsigned int>* dst = m_workBuffer3a;
|
||||
b3OpenCLArray<unsigned int>* dst = m_workBuffer3a;
|
||||
|
||||
btOpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2;
|
||||
|
||||
|
||||
int nWGs = NUM_WGS;
|
||||
btConstData cdata;
|
||||
b3ConstData cdata;
|
||||
|
||||
{
|
||||
int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256
|
||||
@@ -641,10 +641,10 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (src->size())
|
||||
{
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_commandQueue, m_streamCountKernel);
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher(m_commandQueue, m_streamCountKernel);
|
||||
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
|
||||
int num = NUM_WGS*WG_SIZE;
|
||||
@@ -663,9 +663,9 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (fastScan)
|
||||
{// prefix scan group histogram
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
btLauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
|
||||
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( 128, 128 );
|
||||
destHisto = srcHisto;
|
||||
@@ -677,23 +677,23 @@ void btRadixSort32CL::execute(btOpenCLArray<unsigned int>& keysInOut, int sortBi
|
||||
|
||||
if (src->size())
|
||||
{// local sort and distribute
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( destHisto->getBufferCL(), true ), btBufferInfoCL( dst->getBufferCL() )};
|
||||
btLauncherCL launcher( m_commandQueue, m_sortAndScatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
|
||||
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
|
||||
|
||||
}
|
||||
|
||||
btSwap(src, dst );
|
||||
btSwap(srcHisto,destHisto);
|
||||
b3Swap(src, dst );
|
||||
b3Swap(srcHisto,destHisto);
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count&1)
|
||||
{
|
||||
btAssert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
b3Assert(0);//need to copy from workbuffer to keyValuesInOut
|
||||
}
|
||||
|
||||
if (m_workBuffer4a->size())
|
||||
85
opencl/parallel_primitives/host/b3RadixSort32CL.h
Normal file
85
opencl/parallel_primitives/host/b3RadixSort32CL.h
Normal file
@@ -0,0 +1,85 @@
|
||||
|
||||
#ifndef B3_RADIXSORT32_H
|
||||
#define B3_RADIXSORT32_H
|
||||
|
||||
#include "b3OpenCLArray.h"
|
||||
|
||||
struct b3SortData
|
||||
{
|
||||
int m_key;
|
||||
int m_value;
|
||||
};
|
||||
#include "b3BufferInfoCL.h"
|
||||
|
||||
class b3RadixSort32CL
|
||||
{
|
||||
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer1;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer2;
|
||||
|
||||
b3OpenCLArray<b3SortData>* m_workBuffer3;
|
||||
b3OpenCLArray<b3SortData>* m_workBuffer4;
|
||||
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer3a;
|
||||
b3OpenCLArray<unsigned int>* m_workBuffer4a;
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_streamCountSortDataKernel;
|
||||
cl_kernel m_streamCountKernel;
|
||||
|
||||
cl_kernel m_prefixScanKernel;
|
||||
cl_kernel m_sortAndScatterSortDataKernel;
|
||||
cl_kernel m_sortAndScatterKernel;
|
||||
|
||||
|
||||
bool m_deviceCPU;
|
||||
|
||||
class b3PrefixScanCL* m_scan;
|
||||
class b3FillCL* m_fill;
|
||||
|
||||
public:
|
||||
struct b3ConstData
|
||||
{
|
||||
int m_n;
|
||||
int m_nWGs;
|
||||
int m_startBit;
|
||||
int m_nBlocksPerWG;
|
||||
};
|
||||
enum
|
||||
{
|
||||
DATA_ALIGNMENT = 256,
|
||||
WG_SIZE = 64,
|
||||
BLOCK_SIZE = 256,
|
||||
ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
|
||||
BITS_PER_PASS = 4,
|
||||
NUM_BUCKET=(1<<BITS_PER_PASS),
|
||||
// if you change this, change nPerWI in kernel as well
|
||||
NUM_WGS = 20*6, // cypress
|
||||
// NUM_WGS = 24*6, // cayman
|
||||
// NUM_WGS = 32*4, // nv
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
public:
|
||||
|
||||
b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
|
||||
|
||||
virtual ~b3RadixSort32CL();
|
||||
|
||||
void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
|
||||
b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
|
||||
|
||||
///keys only
|
||||
void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
|
||||
|
||||
void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 );
|
||||
void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
|
||||
void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
|
||||
|
||||
};
|
||||
#endif //B3_RADIXSORT32_H
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
#ifndef BT_BUFFER_INFO_CL_H
|
||||
#define BT_BUFFER_INFO_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
|
||||
|
||||
struct btBufferInfoCL
|
||||
{
|
||||
//btBufferInfoCL(){}
|
||||
|
||||
// template<typename T>
|
||||
btBufferInfoCL(cl_mem buff, bool isReadOnly = false): m_clBuffer(buff), m_isReadOnly(isReadOnly){}
|
||||
|
||||
cl_mem m_clBuffer;
|
||||
bool m_isReadOnly;
|
||||
};
|
||||
|
||||
#endif //BT_BUFFER_INFO_CL_H
|
||||
@@ -1,63 +0,0 @@
|
||||
#ifndef BT_FILL_CL_H
|
||||
#define BT_FILL_CL_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
#include "btInt2.h"
|
||||
#include "btInt4.h"
|
||||
|
||||
|
||||
class btFillCL
|
||||
{
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_fillKernelInt2;
|
||||
cl_kernel m_fillIntKernel;
|
||||
cl_kernel m_fillUnsignedIntKernel;
|
||||
cl_kernel m_fillFloatKernel;
|
||||
|
||||
public:
|
||||
|
||||
struct btConstData
|
||||
{
|
||||
union
|
||||
{
|
||||
btInt4 m_data;
|
||||
btUnsignedInt4 m_UnsignedData;
|
||||
};
|
||||
int m_offset;
|
||||
int m_n;
|
||||
int m_padding[2];
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue);
|
||||
|
||||
virtual ~btFillCL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<int>& src, const int value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<float>& src, const float value, int n, int offset = 0);
|
||||
|
||||
void execute(btOpenCLArray<btInt2>& src, const btInt2& value, int n, int offset = 0);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<btInt2> &src, const btInt2 &value, int n, int offset);
|
||||
|
||||
void executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset);
|
||||
|
||||
// void execute(btOpenCLArray<btInt4>& src, const btInt4& value, int n, int offset = 0);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //BT_FILL_CL_H
|
||||
@@ -1,85 +0,0 @@
|
||||
|
||||
#ifndef BT_RADIXSORT32_H
|
||||
#define BT_RADIXSORT32_H
|
||||
|
||||
#include "btOpenCLArray.h"
|
||||
|
||||
struct btSortData
|
||||
{
|
||||
int m_key;
|
||||
int m_value;
|
||||
};
|
||||
#include "btBufferInfoCL.h"
|
||||
|
||||
class btRadixSort32CL
|
||||
{
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer1;
|
||||
btOpenCLArray<unsigned int>* m_workBuffer2;
|
||||
|
||||
btOpenCLArray<btSortData>* m_workBuffer3;
|
||||
btOpenCLArray<btSortData>* m_workBuffer4;
|
||||
|
||||
btOpenCLArray<unsigned int>* m_workBuffer3a;
|
||||
btOpenCLArray<unsigned int>* m_workBuffer4a;
|
||||
|
||||
cl_command_queue m_commandQueue;
|
||||
|
||||
cl_kernel m_streamCountSortDataKernel;
|
||||
cl_kernel m_streamCountKernel;
|
||||
|
||||
cl_kernel m_prefixScanKernel;
|
||||
cl_kernel m_sortAndScatterSortDataKernel;
|
||||
cl_kernel m_sortAndScatterKernel;
|
||||
|
||||
|
||||
bool m_deviceCPU;
|
||||
|
||||
class btPrefixScanCL* m_scan;
|
||||
class btFillCL* m_fill;
|
||||
|
||||
public:
|
||||
struct btConstData
|
||||
{
|
||||
int m_n;
|
||||
int m_nWGs;
|
||||
int m_startBit;
|
||||
int m_nBlocksPerWG;
|
||||
};
|
||||
enum
|
||||
{
|
||||
DATA_ALIGNMENT = 256,
|
||||
WG_SIZE = 64,
|
||||
BLOCK_SIZE = 256,
|
||||
ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
|
||||
BITS_PER_PASS = 4,
|
||||
NUM_BUCKET=(1<<BITS_PER_PASS),
|
||||
// if you change this, change nPerWI in kernel as well
|
||||
NUM_WGS = 20*6, // cypress
|
||||
// NUM_WGS = 24*6, // cayman
|
||||
// NUM_WGS = 32*4, // nv
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
public:
|
||||
|
||||
btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
|
||||
|
||||
virtual ~btRadixSort32CL();
|
||||
|
||||
void execute(btOpenCLArray<unsigned int>& keysIn, btOpenCLArray<unsigned int>& keysOut, btOpenCLArray<unsigned int>& valuesIn,
|
||||
btOpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
|
||||
|
||||
///keys only
|
||||
void execute(btOpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
|
||||
|
||||
void execute(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits = 32 );
|
||||
void executeHost(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits = 32);
|
||||
void executeHost(b3AlignedObjectArray<btSortData>& keyValuesInOut, int sortBits = 32);
|
||||
|
||||
};
|
||||
#endif //BT_RADIXSORT32_H
|
||||
|
||||
Reference in New Issue
Block a user