From 3438d1c8f6487d0f9d47db56ff84030521b76fbc Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Wed, 1 May 2013 10:04:56 -0700 Subject: [PATCH] compute actual local workgroup size, instead of a hard-coded value --- test/OpenCL/BitonicSort/b3BitonicSort.cpp | 7 +++++++ test/OpenCL/BitonicSort/b3BitonicSort.h | 5 +++++ test/OpenCL/BitonicSort/main.cpp | 1 + 3 files changed, 13 insertions(+) diff --git a/test/OpenCL/BitonicSort/b3BitonicSort.cpp b/test/OpenCL/BitonicSort/b3BitonicSort.cpp index 83e2cf0ff..68e2843d6 100644 --- a/test/OpenCL/BitonicSort/b3BitonicSort.cpp +++ b/test/OpenCL/BitonicSort/b3BitonicSort.cpp @@ -15,6 +15,13 @@ void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info) info.dir = (info.dir != 0); cl_int ciErrNum; size_t localWorkSize, globalWorkSize; + int res = -1; + + + cl_int clerr=clGetKernelWorkGroupInfo (info.bitonicSortLocal1,info.dev,CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t),&res,NULL); + if((clerr==CL_SUCCESS)&&(res>0)) + info.localSizeLimit=res; + if(arrayLength <= info.localSizeLimit) { b3Assert( ( arrayLength) % info.localSizeLimit == 0); diff --git a/test/OpenCL/BitonicSort/b3BitonicSort.h b/test/OpenCL/BitonicSort/b3BitonicSort.h index ebd4ecb1e..dd25b0138 100644 --- a/test/OpenCL/BitonicSort/b3BitonicSort.h +++ b/test/OpenCL/BitonicSort/b3BitonicSort.h @@ -6,15 +6,20 @@ struct b3BitonicSortInfo { cl_command_queue m_cqCommandQue; + cl_device_id dev; + cl_kernel bitonicSortLocal; cl_kernel bitonicSortLocal1; cl_kernel bitonicSortMergeGlobal; cl_kernel bitonicSortMergeLocal; + unsigned int dir; unsigned int localSizeLimit; b3BitonicSortInfo() { + dev = 0; + m_cqCommandQue = 0; bitonicSortLocal=0; bitonicSortLocal1=0; bitonicSortMergeGlobal=0; diff --git a/test/OpenCL/BitonicSort/main.cpp b/test/OpenCL/BitonicSort/main.cpp index fb1494aa0..4f9a4d2c7 100644 --- a/test/OpenCL/BitonicSort/main.cpp +++ b/test/OpenCL/BitonicSort/main.cpp @@ -95,6 +95,7 @@ int main(int argc, char* argv[]) info.bitonicSortMergeLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeLocal",&ciErrNum,0,""); oclCHECKERROR(ciErrNum, CL_SUCCESS); info.m_cqCommandQue = g_cqCommandQue; + info.dev = dev; b3OpenCLArray keyValuesGPU(context,g_cqCommandQue); b3AlignedObjectArray keyValuesCPU;