///VectorAdd sample, from the NVidia JumpStart Guide ///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf ///Instead of #include we include ///Apart from this include file, all other code should compile and work on OpenCL compliant implementation #include #include #include #include void printDevInfo(cl_device_id device) { char device_string[1024]; clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); printf( " Device %s:\n", device_string); // CL_DEVICE_INFO cl_device_type type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if( type & CL_DEVICE_TYPE_CPU ) printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU"); if( type & CL_DEVICE_TYPE_GPU ) printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU"); if( type & CL_DEVICE_TYPE_ACCELERATOR ) printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); if( type & CL_DEVICE_TYPE_DEFAULT ) printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); printf( " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units); // CL_DEVICE_MAX_WORK_GROUP_SIZE size_t workitem_size[3]; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); printf( " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]); } // Main function // ********************************************************************* int main(int argc, char **argv) { void *srcA, *srcB, *dst; // Host buffers for OpenCL test cl_context cxGPUContext; // OpenCL context cl_command_queue cqCommandQue; // OpenCL command que cl_device_id* cdDevices; // OpenCL device list cl_program cpProgram; // OpenCL program cl_kernel ckKernel; // OpenCL kernel cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device size_t szGlobalWorkSize[1]; // 1D var for Total # of work items size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group size_t szParmDataBytes; // Byte size of context information cl_int ciErr1, ciErr2; // Error code var int iTestN = 100000 * 8; // Size of Vectors to process // set Global and Local work size dimensions szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item szLocalWorkSize[0]= iTestN>>3; // Allocate and initialize host arrays srcA = (void *)malloc (sizeof(cl_float) * iTestN); srcB = (void *)malloc (sizeof(cl_float) * iTestN); dst = (void *)malloc (sizeof(cl_float) * iTestN); int i; // Initialize arrays with some values for (i=0;i