/* Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ #include #include #include #include "btOclUtils.h" ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the nth device from the context //! //! @return the id or -1 when out of range //! @param cxMainContext OpenCL context //! @param device_idx index of the device of interest ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); if( szParmDataBytes / sizeof(cl_device_id) < nr ) { return (cl_device_id)-1; } cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id device = cdDevices[nr]; free(cdDevices); return device; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of device with maximal FLOPS from the context //! //! @return the id //! @param cxMainContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); size_t device_count = szParmDataBytes / sizeof(cl_device_id); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id max_flops_device = cdDevices[0]; int max_flops = 0; size_t current_device = 0; // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); cl_device_type device_type; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL); int SIMDmultiplier = 1; if( device_type == CL_DEVICE_TYPE_CPU ) { // For simplicity assume that the CPU is running single SSE instructions // This will of course depend on the kernel SIMDmultiplier = 4; } else if( device_type == CL_DEVICE_TYPE_GPU ) { // Approximation to GPU compute power // As long as this beats the CPU number that's the important thing, really #if defined(CL_PLATFORM_AMD) // 16 processing elements, 5 ALUs each SIMDmultiplier = 80; #elif defined(CL_PLATFORM_NVIDIA) // 8 processing elements, dual issue - pre-Fermi at least SIMDmultiplier = 16; #else SIMDmultiplier = 1; #endif } max_flops = compute_units * clock_frequency * SIMDmultiplier; ++current_device; while( current_device < device_count ) { // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); int flops = compute_units * clock_frequency; if( flops > max_flops ) { max_flops = flops; max_flops_device = cdDevices[current_device]; } ++current_device; } free(cdDevices); return max_flops_device; } ////////////////////////////////////////////////////////////////////////////// //! Loads a Program file and prepends the cPreamble to the code. //! //! @return the source string if succeeded, 0 otherwise //! @param cFilename program filename //! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header //! @param szFinalLength returned length of the code string ////////////////////////////////////////////////////////////////////////////// char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) { // locals FILE* pFileStream = NULL; size_t szSourceLength; // open the OpenCL source code file pFileStream = fopen(cFilename, "rb"); if(pFileStream == 0) { return NULL; } size_t szPreambleLength = strlen(cPreamble); // get the length of the source code fseek(pFileStream, 0, SEEK_END); szSourceLength = ftell(pFileStream); fseek(pFileStream, 0, SEEK_SET); // allocate a buffer for the source code string and read it in char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); memcpy(cSourceString, cPreamble, szPreambleLength); fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); // close the file and return the total length of the combined (preamble + source) string fclose(pFileStream); if(szFinalLength != 0) { *szFinalLength = szSourceLength + szPreambleLength; } cSourceString[szSourceLength + szPreambleLength] = '\0'; return cSourceString; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the first device from the context //! //! @return the id //! @param cxMainContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id btOclGetFirstDev(cl_context cxMainContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id first = cdDevices[0]; free(cdDevices); return first; } ////////////////////////////////////////////////////////////////////////////// //! Print info about the device //! //! @param device OpenCL id of the device ////////////////////////////////////////////////////////////////////////////// void btOclPrintDevInfo(cl_device_id device) { char device_string[1024]; bool nv_device_attibute_query = false; // CL_DEVICE_NAME clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); printf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string); // CL_DEVICE_VENDOR clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL); printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); // CL_DRIVER_VERSION clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL); printf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string); // CL_DEVICE_INFO cl_device_type type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if( type & CL_DEVICE_TYPE_CPU ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); if( type & CL_DEVICE_TYPE_GPU ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); if( type & CL_DEVICE_TYPE_ACCELERATOR ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); if( type & CL_DEVICE_TYPE_DEFAULT ) printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS size_t workitem_dims; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL); printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims); // CL_DEVICE_MAX_WORK_ITEM_SIZES size_t workitem_size[3]; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); // CL_DEVICE_MAX_WORK_GROUP_SIZE size_t workgroup_size; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL); printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); // CL_DEVICE_ADDRESS_BITS cl_uint addr_bits; clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL); printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); // CL_DEVICE_MAX_MEM_ALLOC_SIZE cl_ulong max_mem_alloc_size; clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); // CL_DEVICE_GLOBAL_MEM_SIZE cl_ulong mem_size; clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); // CL_DEVICE_ERROR_CORRECTION_SUPPORT cl_bool error_correction_support; clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL); printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); // CL_DEVICE_LOCAL_MEM_TYPE cl_device_local_mem_type local_mem_type; clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL); printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); // CL_DEVICE_LOCAL_MEM_SIZE clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL); printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_QUEUE_PROPERTIES cl_command_queue_properties queue_properties; clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL); if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); if( queue_properties & CL_QUEUE_PROFILING_ENABLE ) printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); // CL_DEVICE_IMAGE_SUPPORT cl_bool image_support; clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); // CL_DEVICE_MAX_READ_IMAGE_ARGS cl_uint max_read_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL); printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); // CL_DEVICE_MAX_WRITE_IMAGE_ARGS cl_uint max_write_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL); printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH size_t szMaxDims[5]; printf("\n CL_DEVICE_IMAGE "); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL); printf("\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL); printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL); printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL); printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL); printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]); // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL); if (device_string != 0) { printf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string); } else { printf(" CL_DEVICE_EXTENSIONS: None\n"); } // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); cl_uint vec_width [6]; clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL); printf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n\n", vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4]); }