add some command-line options

--use_jacobi
--allow_opencl_cpu

b3LauncherCL constructor takes string, to make it easier to determine failing OpenCL kernel
b3SetCustomErrorMessageFunc, printf error and exit(0)
This commit is contained in:
erwin coumans
2013-11-19 13:42:53 -08:00
parent 54909160a7
commit 26dfaa441e
27 changed files with 491 additions and 448 deletions

View File

@@ -87,7 +87,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) };
b3LauncherCL launcher( m_queue, m_lowerSortDataKernel );
b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc );
launcher.setConst( nDst );
@@ -98,7 +98,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
b3LauncherCL launcher(m_queue, m_upperSortDataKernel );
b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc );
launcher.setConst( nDst );
@@ -122,7 +122,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
b3LauncherCL launcher( m_queue, m_subtractKernel );
b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc );
launcher.setConst( nDst );

View File

@@ -47,7 +47,7 @@ void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int
b3Assert( n>0 );
{
b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel );
b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel,"m_fillFloatKernel" );
launcher.setBuffer( src.getBufferCL());
launcher.setConst( n );
launcher.setConst( value );
@@ -63,7 +63,7 @@ void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offs
{
b3LauncherCL launcher( m_commandQueue, m_fillIntKernel );
b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ,"m_fillIntKernel");
launcher.setBuffer(src.getBufferCL());
launcher.setConst( n);
launcher.setConst( value);
@@ -80,7 +80,7 @@ void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int valu
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel );
b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel,"m_fillUnsignedIntKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( n );
launcher.setConst(value);
@@ -114,7 +114,7 @@ void b3FillCL::execute(b3OpenCLArray<b3Int2> &src, const b3Int2 &value, int n, i
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2);
b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2,"m_fillKernelInt2");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(n);
launcher.setConst(value);

View File

@@ -0,0 +1,287 @@
#include "b3LauncherCL.h"
bool gDebugLauncherCL = false;
b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name)
:m_commandQueue(queue),
m_kernel(kernel),
m_idx(0),
m_enableSerialization(false),
m_name(name)
{
if (gDebugLauncherCL)
{
static int counter = 0;
printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name);
}
m_serializationSizeInBytes = sizeof(int);
}
b3LauncherCL::~b3LauncherCL()
{
for (int i=0;i<m_arrays.size();i++)
{
clReleaseMemObject(m_arrays[i]->getBufferCL());
}
if (gDebugLauncherCL)
{
static int counter = 0;
printf("[%d] Finished launching OpenCL kernel %s [%d]\n", counter++,m_name);
}
}
void b3LauncherCL::setBuffer( cl_mem clBuffer)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
b3Assert( status == CL_SUCCESS );
}
void b3LauncherCL::setBuffers( b3BufferInfoCL* buffInfo, int n )
{
for(int i=0; i<n; i++)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
b3Assert( status == CL_SUCCESS );
}
}
int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &buf[index];
index+=sizeof(int);
for (int i=0;i<numArguments;i++)
{
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
index+=sizeof(b3KernelArgData);
if (arg->m_isBuffer)
{
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
clData->resize(arg->m_argSizeInBytes);
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
arg->m_clBuffer = clData->getBufferCL();
m_arrays.push_back(clData);
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
b3Assert( status == CL_SUCCESS );
index+=arg->m_argSizeInBytes;
} else
{
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
b3Assert( status == CL_SUCCESS );
}
m_kernelArguments.push_back(*arg);
}
m_serializationSizeInBytes = index;
return index;
}
int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &goldBuffer[index];
index+=sizeof(int);
if (numArguments != m_kernelArguments.size())
{
printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size());
return -1;
}
for (int ii=0;ii<numArguments;ii++)
{
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
{
printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes);
return -2;
}
{
int expected = argGold->m_isBuffer;
int found = m_kernelArguments[ii].m_isBuffer;
if (expected != found)
{
printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found);
return -3;
}
}
index+=sizeof(b3KernelArgData);
if (argGold->m_isBuffer)
{
unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes);
unsigned char* goldBuf = &goldBuffer[index];
for (int j=0;j<m_kernelArguments[j].m_argSizeInBytes;j++)
{
memBuf[j] = 0xaa;
}
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
memBuf, 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = goldBuf[b];
int found = memBuf[b];
if (expected != found)
{
printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -4;
}
}
index+=argGold->m_argSizeInBytes;
} else
{
//compare content
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = argGold->m_argData[b];
int found =m_kernelArguments[ii].m_argData[b];
if (expected != found)
{
printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -5;
}
}
}
}
return index;
}
int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity)
{
//initialize to known values
for (int i=0;i<destBufferCapacity;i++)
destBuffer[i] = 0xec;
assert(destBufferCapacity>=m_serializationSizeInBytes);
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
int numArguments = m_kernelArguments.size();
int curBufferSize = 0;
int* dest = (int*)&destBuffer[curBufferSize];
*dest = numArguments;
curBufferSize += sizeof(int);
for (int i=0;i<this->m_kernelArguments.size();i++)
{
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
*arg = m_kernelArguments[i];
curBufferSize+=sizeof(b3KernelArgData);
if (arg->m_isBuffer==1)
{
//copy the OpenCL buffer content
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
&destBuffer[curBufferSize], 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
curBufferSize+=arg->m_argSizeInBytes;
}
}
return curBufferSize;
}
void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems)
{
int num = numWorkItems;
int buffSize = getSerializationBufferSize();
unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
for (int i=0;i<buffSize+1;i++)
{
unsigned char* ptr = (unsigned char*)&buf[i];
*ptr = 0xff;
}
// int actualWrite = serializeArguments(buf,buffSize);
// unsigned char* cptr = (unsigned char*)&buf[buffSize];
// printf("buf[buffSize] = %d\n",*cptr);
assert(buf[buffSize]==0xff);//check for buffer overrun
int* ptr = (int*)&buf[buffSize];
*ptr = num;
FILE* f = fopen(fileName,"wb");
fwrite(buf,buffSize+sizeof(int),1,f);
fclose(f);
delete[] buf;
}

View File

@@ -39,286 +39,31 @@ class b3LauncherCL
int m_serializationSizeInBytes;
bool m_enableSerialization;
const char* m_name;
public:
b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays;
b3LauncherCL(cl_command_queue queue, cl_kernel kernel)
:m_commandQueue(queue),
m_kernel(kernel),
m_idx(0),
m_enableSerialization(false)
{
m_serializationSizeInBytes = sizeof(int);
}
b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
virtual ~b3LauncherCL()
{
for (int i=0;i<m_arrays.size();i++)
{
clReleaseMemObject(m_arrays[i]->getBufferCL());
}
}
virtual ~b3LauncherCL();
void setBuffer( cl_mem clBuffer);
inline void setBuffer( cl_mem clBuffer)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
b3Assert( status == CL_SUCCESS );
}
inline void setBuffers( b3BufferInfoCL* buffInfo, int n )
{
for(int i=0; i<n; i++)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
b3Assert( status == CL_SUCCESS );
}
}
void setBuffers( b3BufferInfoCL* buffInfo, int n );
int getSerializationBufferSize() const
{
return m_serializationSizeInBytes;
}
inline int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &buf[index];
index+=sizeof(int);
for (int i=0;i<numArguments;i++)
{
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
index+=sizeof(b3KernelArgData);
if (arg->m_isBuffer)
{
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
clData->resize(arg->m_argSizeInBytes);
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
arg->m_clBuffer = clData->getBufferCL();
m_arrays.push_back(clData);
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
b3Assert( status == CL_SUCCESS );
index+=arg->m_argSizeInBytes;
} else
{
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
b3Assert( status == CL_SUCCESS );
}
m_kernelArguments.push_back(*arg);
}
m_serializationSizeInBytes = index;
return index;
}
inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &goldBuffer[index];
index+=sizeof(int);
if (numArguments != m_kernelArguments.size())
{
printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size());
return -1;
}
for (int ii=0;ii<numArguments;ii++)
{
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
{
printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes);
return -2;
}
{
int expected = argGold->m_isBuffer;
int found = m_kernelArguments[ii].m_isBuffer;
if (expected != found)
{
printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found);
return -3;
}
}
index+=sizeof(b3KernelArgData);
if (argGold->m_isBuffer)
{
unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes);
unsigned char* goldBuf = &goldBuffer[index];
for (int j=0;j<m_kernelArguments[j].m_argSizeInBytes;j++)
{
memBuf[j] = 0xaa;
}
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
memBuf, 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = goldBuf[b];
int found = memBuf[b];
if (expected != found)
{
printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -4;
}
}
index+=argGold->m_argSizeInBytes;
} else
{
//compare content
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = argGold->m_argData[b];
int found =m_kernelArguments[ii].m_argData[b];
if (expected != found)
{
printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -5;
}
}
}
}
return index;
}
inline int serializeArguments(unsigned char* destBuffer, int destBufferCapacity)
{
//initialize to known values
for (int i=0;i<destBufferCapacity;i++)
destBuffer[i] = 0xec;
assert(destBufferCapacity>=m_serializationSizeInBytes);
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
int numArguments = m_kernelArguments.size();
int curBufferSize = 0;
int* dest = (int*)&destBuffer[curBufferSize];
*dest = numArguments;
curBufferSize += sizeof(int);
for (int i=0;i<this->m_kernelArguments.size();i++)
{
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
*arg = m_kernelArguments[i];
curBufferSize+=sizeof(b3KernelArgData);
if (arg->m_isBuffer==1)
{
//copy the OpenCL buffer content
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
&destBuffer[curBufferSize], 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
curBufferSize+=arg->m_argSizeInBytes;
}
}
return curBufferSize;
}
void serializeToFile(const char* fileName, int numWorkItems)
{
int num = numWorkItems;
int buffSize = getSerializationBufferSize();
unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
for (int i=0;i<buffSize+1;i++)
{
unsigned char* ptr = (unsigned char*)&buf[i];
*ptr = 0xff;
}
// int actualWrite = serializeArguments(buf,buffSize);
// unsigned char* cptr = (unsigned char*)&buf[buffSize];
// printf("buf[buffSize] = %d\n",*cptr);
assert(buf[buffSize]==0xff);//check for buffer overrun
int* ptr = (int*)&buf[buffSize];
*ptr = num;
FILE* f = fopen(fileName,"wb");
fwrite(buf,buffSize+sizeof(int),1,f);
fclose(f);
delete[] buf;
}
inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
void serializeToFile(const char* fileName, int numWorkItems);
template<typename T>
inline void setConst( const T& consts )

View File

@@ -63,7 +63,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_localScanKernel );
b3LauncherCL launcher( m_commandQueue, m_localScanKernel,"m_localScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
@@ -72,7 +72,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel );
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel,"m_blockSumKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
@@ -82,7 +82,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
if( numBlocks > 1 )
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_propagationKernel );
b3LauncherCL launcher( m_commandQueue, m_propagationKernel,"m_propagationKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );

View File

@@ -63,7 +63,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_localScanKernel );
b3LauncherCL launcher( m_commandQueue, m_localScanKernel ,"m_localScanKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
@@ -72,7 +72,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel );
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ,"m_blockSumKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
@@ -82,7 +82,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
if( numBlocks > 1 )
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_propagationKernel );
b3LauncherCL launcher( m_commandQueue, m_propagationKernel ,"m_propagationKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer );
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );

View File

@@ -294,7 +294,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (src->size())
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);
b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel,"m_streamCountSortDataKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
@@ -328,7 +328,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (fastScan)
{// prefix scan group histogram
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
launcher.launch1D( 128, 128 );
@@ -362,7 +362,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (src->size())
{// local sort and distribute
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel,"m_sortAndScatterSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
@@ -641,7 +641,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (src->size())
{
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_streamCountKernel);
b3LauncherCL launcher(m_commandQueue, m_streamCountKernel,"m_streamCountKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
@@ -662,7 +662,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (fastScan)
{// prefix scan group histogram
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
launcher.launch1D( 128, 128 );
@@ -676,7 +676,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (src->size())
{// local sort and distribute
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel );
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ,"m_sortAndScatterKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata );
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );