#include #include #include #include "btOclUtils.h" ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the nth device from the context //! //! @return the id or -1 when out of range //! @param cxMainContext OpenCL context //! @param device_idx index of the device of interest ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); if( szParmDataBytes / sizeof(cl_device_id) < nr ) { return (cl_device_id)-1; } cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id device = cdDevices[nr]; free(cdDevices); return device; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of device with maximal FLOPS from the context //! //! @return the id //! @param cxMainContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); size_t device_count = szParmDataBytes / sizeof(cl_device_id); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id max_flops_device = cdDevices[0]; int max_flops = 0; size_t current_device = 0; // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); max_flops = compute_units * clock_frequency; ++current_device; while( current_device < device_count ) { // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); int flops = compute_units * clock_frequency; if( flops > max_flops ) { max_flops = flops; max_flops_device = cdDevices[current_device]; } ++current_device; } free(cdDevices); return max_flops_device; } ////////////////////////////////////////////////////////////////////////////// //! Loads a Program file and prepends the cPreamble to the code. //! //! @return the source string if succeeded, 0 otherwise //! @param cFilename program filename //! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header //! @param szFinalLength returned length of the code string ////////////////////////////////////////////////////////////////////////////// char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) { // locals FILE* pFileStream = NULL; size_t szSourceLength; // open the OpenCL source code file #ifdef _WIN32 // Windows version if(fopen_s(&pFileStream, cFilename, "rb") != 0) { return NULL; } #else // Linux version pFileStream = fopen(cFilename, "rb"); if(pFileStream == 0) { return NULL; } #endif size_t szPreambleLength = strlen(cPreamble); // get the length of the source code fseek(pFileStream, 0, SEEK_END); szSourceLength = ftell(pFileStream); fseek(pFileStream, 0, SEEK_SET); // allocate a buffer for the source code string and read it in char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); memcpy(cSourceString, cPreamble, szPreambleLength); fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); // close the file and return the total length of the combined (preamble + source) string fclose(pFileStream); if(szFinalLength != 0) { *szFinalLength = szSourceLength + szPreambleLength; } cSourceString[szSourceLength + szPreambleLength] = '\0'; return cSourceString; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the first device from the context //! //! @return the id //! @param cxMainContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetFirstDev(cl_context cxMainContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id first = cdDevices[0]; free(cdDevices); return first; } ////////////////////////////////////////////////////////////////////////////// //! Print info about the device //! //! @param device OpenCL id of the device ////////////////////////////////////////////////////////////////////////////// void btOclPrintDevInfo(cl_device_id device) { char device_string[1024]; bool nv_device_attibute_query = false; // CL_DEVICE_NAME clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); printf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string); // CL_DEVICE_VENDOR clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL); printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); // CL_DRIVER_VERSION clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL); printf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string); // CL_DEVICE_INFO cl_device_type type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if( type & CL_DEVICE_TYPE_CPU ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); if( type & CL_DEVICE_TYPE_GPU ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); if( type & CL_DEVICE_TYPE_ACCELERATOR ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); if( type & CL_DEVICE_TYPE_DEFAULT ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS size_t workitem_dims; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL); printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims); // CL_DEVICE_MAX_WORK_ITEM_SIZES size_t workitem_size[3]; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); // CL_DEVICE_MAX_WORK_GROUP_SIZE size_t workgroup_size; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL); printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); // CL_DEVICE_ADDRESS_BITS cl_uint addr_bits; clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL); printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); // CL_DEVICE_MAX_MEM_ALLOC_SIZE cl_ulong max_mem_alloc_size; clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); // CL_DEVICE_GLOBAL_MEM_SIZE cl_ulong mem_size; clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); // CL_DEVICE_ERROR_CORRECTION_SUPPORT cl_bool error_correction_support; clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL); printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); // CL_DEVICE_LOCAL_MEM_TYPE cl_device_local_mem_type local_mem_type; clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL); printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); // CL_DEVICE_LOCAL_MEM_SIZE clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_QUEUE_PROPERTIES cl_command_queue_properties queue_properties; clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL); if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); if( queue_properties & CL_QUEUE_PROFILING_ENABLE ) printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); // CL_DEVICE_IMAGE_SUPPORT cl_bool image_support; clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); // CL_DEVICE_MAX_READ_IMAGE_ARGS cl_uint max_read_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL); printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); // CL_DEVICE_MAX_WRITE_IMAGE_ARGS cl_uint max_write_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL); printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH size_t szMaxDims[5]; printf("\n CL_DEVICE_IMAGE "); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL); printf("\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL); printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL); printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL); printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL); printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]); // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL); if (device_string != 0) { printf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string); } else { printf(" CL_DEVICE_EXTENSIONS: None\n"); } // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); cl_uint vec_width [6]; clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL); printf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n\n", vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4]); }