compute actual local workgroup size, instead of a hard-coded value

This commit is contained in:
erwin coumans
2013-05-01 10:04:56 -07:00
parent 6d21d10cb4
commit 3438d1c8f6
3 changed files with 13 additions and 0 deletions

View File

@@ -15,6 +15,13 @@ void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info)
info.dir = (info.dir != 0); info.dir = (info.dir != 0);
cl_int ciErrNum; cl_int ciErrNum;
size_t localWorkSize, globalWorkSize; size_t localWorkSize, globalWorkSize;
int res = -1;
cl_int clerr=clGetKernelWorkGroupInfo (info.bitonicSortLocal1,info.dev,CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t),&res,NULL);
if((clerr==CL_SUCCESS)&&(res>0))
info.localSizeLimit=res;
if(arrayLength <= info.localSizeLimit) if(arrayLength <= info.localSizeLimit)
{ {
b3Assert( ( arrayLength) % info.localSizeLimit == 0); b3Assert( ( arrayLength) % info.localSizeLimit == 0);

View File

@@ -6,15 +6,20 @@
struct b3BitonicSortInfo struct b3BitonicSortInfo
{ {
cl_command_queue m_cqCommandQue; cl_command_queue m_cqCommandQue;
cl_device_id dev;
cl_kernel bitonicSortLocal; cl_kernel bitonicSortLocal;
cl_kernel bitonicSortLocal1; cl_kernel bitonicSortLocal1;
cl_kernel bitonicSortMergeGlobal; cl_kernel bitonicSortMergeGlobal;
cl_kernel bitonicSortMergeLocal; cl_kernel bitonicSortMergeLocal;
unsigned int dir; unsigned int dir;
unsigned int localSizeLimit; unsigned int localSizeLimit;
b3BitonicSortInfo() b3BitonicSortInfo()
{ {
dev = 0;
m_cqCommandQue = 0;
bitonicSortLocal=0; bitonicSortLocal=0;
bitonicSortLocal1=0; bitonicSortLocal1=0;
bitonicSortMergeGlobal=0; bitonicSortMergeGlobal=0;

View File

@@ -95,6 +95,7 @@ int main(int argc, char* argv[])
info.bitonicSortMergeLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeLocal",&ciErrNum,0,""); info.bitonicSortMergeLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeLocal",&ciErrNum,0,"");
oclCHECKERROR(ciErrNum, CL_SUCCESS); oclCHECKERROR(ciErrNum, CL_SUCCESS);
info.m_cqCommandQue = g_cqCommandQue; info.m_cqCommandQue = g_cqCommandQue;
info.dev = dev;
b3OpenCLArray<b3Int2> keyValuesGPU(context,g_cqCommandQue); b3OpenCLArray<b3Int2> keyValuesGPU(context,g_cqCommandQue);
b3AlignedObjectArray<b3Int2> keyValuesCPU; b3AlignedObjectArray<b3Int2> keyValuesCPU;