primarily to distribute project files that work. CMake is still better supported in general (not all demos/libs have been ported) Revert a recent change about warnings: %zu doesn't work in printf, %d does
368 lines
15 KiB
C++
368 lines
15 KiB
C++
/*
|
|
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
|
Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc.
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it freely,
|
|
subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#include "btOclUtils.h"
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//! Gets the id of the nth device from the context
|
|
//!
|
|
//! @return the id or -1 when out of range
|
|
//! @param cxMainContext OpenCL context
|
|
//! @param device_idx index of the device of interest
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr)
|
|
{
|
|
size_t szParmDataBytes;
|
|
cl_device_id* cdDevices;
|
|
|
|
// get the list of GPU devices associated with context
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
|
|
|
if( szParmDataBytes / sizeof(cl_device_id) < nr ) {
|
|
return (cl_device_id)-1;
|
|
}
|
|
|
|
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
|
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
|
|
|
cl_device_id device = cdDevices[nr];
|
|
free(cdDevices);
|
|
|
|
return device;
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//! Gets the id of device with maximal FLOPS from the context
|
|
//!
|
|
//! @return the id
|
|
//! @param cxMainContext OpenCL context
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext)
|
|
{
|
|
size_t szParmDataBytes;
|
|
cl_device_id* cdDevices;
|
|
|
|
// get the list of GPU devices associated with context
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
|
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
|
size_t device_count = szParmDataBytes / sizeof(cl_device_id);
|
|
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
|
|
|
cl_device_id max_flops_device = cdDevices[0];
|
|
int max_flops = 0;
|
|
|
|
size_t current_device = 0;
|
|
|
|
// CL_DEVICE_MAX_COMPUTE_UNITS
|
|
cl_uint compute_units;
|
|
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
|
|
|
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
|
cl_uint clock_frequency;
|
|
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
|
|
|
cl_device_type device_type;
|
|
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
|
|
|
|
int SIMDmultiplier = 1;
|
|
|
|
if( device_type == CL_DEVICE_TYPE_CPU )
|
|
{
|
|
// For simplicity assume that the CPU is running single SSE instructions
|
|
// This will of course depend on the kernel
|
|
SIMDmultiplier = 4;
|
|
} else if( device_type == CL_DEVICE_TYPE_GPU ) {
|
|
// Approximation to GPU compute power
|
|
// As long as this beats the CPU number that's the important thing, really
|
|
#if defined(CL_PLATFORM_INTEL)
|
|
// SSE - 4, AVX1,2 - 8 : TODO: detect AVX?
|
|
SIMDmultiplier = 4;
|
|
#elif defined(CL_PLATFORM_AMD)
|
|
// 16 processing elements, 5 ALUs each
|
|
SIMDmultiplier = 80;
|
|
#elif defined(CL_PLATFORM_NVIDIA)
|
|
// 8 processing elements, dual issue - pre-Fermi at least
|
|
SIMDmultiplier = 16;
|
|
#else
|
|
SIMDmultiplier = 1;
|
|
#endif
|
|
}
|
|
|
|
|
|
max_flops = compute_units * clock_frequency * SIMDmultiplier;
|
|
++current_device;
|
|
|
|
while( current_device < device_count )
|
|
{
|
|
// CL_DEVICE_MAX_COMPUTE_UNITS
|
|
cl_uint compute_units;
|
|
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
|
|
|
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
|
cl_uint clock_frequency;
|
|
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
|
|
|
int flops = compute_units * clock_frequency;
|
|
if( flops > max_flops )
|
|
{
|
|
max_flops = flops;
|
|
max_flops_device = cdDevices[current_device];
|
|
}
|
|
++current_device;
|
|
}
|
|
|
|
free(cdDevices);
|
|
|
|
return max_flops_device;
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//! Loads a Program file and prepends the cPreamble to the code.
|
|
//!
|
|
//! @return the source string if succeeded, 0 otherwise
|
|
//! @param cFilename program filename
|
|
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
|
|
//! @param szFinalLength returned length of the code string
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength)
|
|
{
|
|
// locals
|
|
FILE* pFileStream = NULL;
|
|
size_t szSourceLength;
|
|
|
|
// open the OpenCL source code file
|
|
pFileStream = fopen(cFilename, "rb");
|
|
if(pFileStream == 0)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
size_t szPreambleLength = strlen(cPreamble);
|
|
|
|
// get the length of the source code
|
|
fseek(pFileStream, 0, SEEK_END);
|
|
szSourceLength = ftell(pFileStream);
|
|
fseek(pFileStream, 0, SEEK_SET);
|
|
|
|
// allocate a buffer for the source code string and read it in
|
|
char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1);
|
|
memcpy(cSourceString, cPreamble, szPreambleLength);
|
|
fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream);
|
|
|
|
// close the file and return the total length of the combined (preamble + source) string
|
|
fclose(pFileStream);
|
|
if(szFinalLength != 0)
|
|
{
|
|
*szFinalLength = szSourceLength + szPreambleLength;
|
|
}
|
|
cSourceString[szSourceLength + szPreambleLength] = '\0';
|
|
|
|
return cSourceString;
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//! Gets the id of the first device from the context
|
|
//!
|
|
//! @return the id
|
|
//! @param cxMainContext OpenCL context
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
cl_device_id btOclGetFirstDev(cl_context cxMainContext)
|
|
{
|
|
size_t szParmDataBytes;
|
|
cl_device_id* cdDevices;
|
|
|
|
// get the list of GPU devices associated with context
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
|
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
|
|
|
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
|
|
|
cl_device_id first = cdDevices[0];
|
|
free(cdDevices);
|
|
|
|
return first;
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//! Print info about the device
|
|
//!
|
|
//! @param device OpenCL id of the device
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
void btOclPrintDevInfo(cl_device_id device)
|
|
{
|
|
char device_string[1024];
|
|
bool nv_device_attibute_query = false;
|
|
|
|
// CL_DEVICE_NAME
|
|
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
|
printf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string);
|
|
|
|
// CL_DEVICE_VENDOR
|
|
clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL);
|
|
printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string);
|
|
|
|
// CL_DRIVER_VERSION
|
|
clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL);
|
|
printf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string);
|
|
|
|
// CL_DEVICE_INFO
|
|
cl_device_type type;
|
|
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
|
if( type & CL_DEVICE_TYPE_CPU )
|
|
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
|
|
if( type & CL_DEVICE_TYPE_GPU )
|
|
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
|
|
if( type & CL_DEVICE_TYPE_ACCELERATOR )
|
|
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
|
|
if( type & CL_DEVICE_TYPE_DEFAULT )
|
|
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
|
|
|
|
// CL_DEVICE_MAX_COMPUTE_UNITS
|
|
cl_uint compute_units;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
|
printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units);
|
|
|
|
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
|
|
size_t workitem_dims;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL);
|
|
printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%d\n", workitem_dims);
|
|
|
|
// CL_DEVICE_MAX_WORK_ITEM_SIZES
|
|
size_t workitem_size[3];
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
|
|
printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]);
|
|
|
|
// CL_DEVICE_MAX_WORK_GROUP_SIZE
|
|
size_t workgroup_size;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL);
|
|
printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%d\n", workgroup_size);
|
|
|
|
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
|
cl_uint clock_frequency;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
|
printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency);
|
|
|
|
// CL_DEVICE_ADDRESS_BITS
|
|
cl_uint addr_bits;
|
|
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL);
|
|
printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits);
|
|
|
|
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
|
|
cl_ulong max_mem_alloc_size;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL);
|
|
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024)));
|
|
|
|
// CL_DEVICE_GLOBAL_MEM_SIZE
|
|
cl_ulong mem_size;
|
|
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
|
printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024)));
|
|
|
|
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
|
|
cl_bool error_correction_support;
|
|
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL);
|
|
printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no");
|
|
|
|
// CL_DEVICE_LOCAL_MEM_TYPE
|
|
cl_device_local_mem_type local_mem_type;
|
|
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL);
|
|
printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global");
|
|
|
|
// CL_DEVICE_LOCAL_MEM_SIZE
|
|
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
|
printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
|
|
|
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL);
|
|
printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
|
|
|
// CL_DEVICE_QUEUE_PROPERTIES
|
|
cl_command_queue_properties queue_properties;
|
|
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL);
|
|
if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
|
|
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
|
|
if( queue_properties & CL_QUEUE_PROFILING_ENABLE )
|
|
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
|
|
|
|
// CL_DEVICE_IMAGE_SUPPORT
|
|
cl_bool image_support;
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
|
|
printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support);
|
|
|
|
// CL_DEVICE_MAX_READ_IMAGE_ARGS
|
|
cl_uint max_read_image_args;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL);
|
|
printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args);
|
|
|
|
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
|
|
cl_uint max_write_image_args;
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL);
|
|
printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args);
|
|
|
|
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
|
|
size_t szMaxDims[5];
|
|
printf("\n CL_DEVICE_IMAGE <dim>");
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL);
|
|
printf("\t\t\t2D_MAX_WIDTH\t %d\n", szMaxDims[0]);
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL);
|
|
printf("\t\t\t\t\t2D_MAX_HEIGHT\t %d\n", szMaxDims[1]);
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL);
|
|
printf("\t\t\t\t\t3D_MAX_WIDTH\t %d\n", szMaxDims[2]);
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL);
|
|
printf("\t\t\t\t\t3D_MAX_HEIGHT\t %d\n", szMaxDims[3]);
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL);
|
|
printf("\t\t\t\t\t3D_MAX_DEPTH\t %d\n", szMaxDims[4]);
|
|
|
|
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
|
|
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL);
|
|
if (device_string != 0)
|
|
{
|
|
printf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string);
|
|
}
|
|
else
|
|
{
|
|
printf(" CL_DEVICE_EXTENSIONS: None\n");
|
|
}
|
|
|
|
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
|
|
printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
|
|
cl_uint vec_width [6];
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL);
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL);
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL);
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL);
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL);
|
|
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL);
|
|
printf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n\n",
|
|
vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4]);
|
|
}
|