fix: some file didn't have the svn:eol-style native yet

2010-03-06 15:23:36 +00:00
parent 4fd48ac691
commit 81f04a4d48
641 changed files with 301123 additions and 301123 deletions
--- a/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp
+++ b/Demos/MiniCL_VectorAdd/MiniCL_VectorAdd.cpp
@@ -1,188 +1,188 @@
-
-///VectorAdd sample, from the NVidia JumpStart Guide
-///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf
-
-///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
-///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
-
-#define USE_MINICL 1
-#ifdef USE_MINICL
-#include "MiniCL/cl.h"
-#else //USE_MINICL
-#include <CL/cl.h>
-#endif//USE_MINICL
-
-#include <stdio.h>
-#include <math.h>
-#include <stdlib.h>
-
-
-void printDevInfo(cl_device_id device)
-{
-    char device_string[1024];
-	
-    clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
-    printf(  " Device %s:\n", device_string);
-
-    // CL_DEVICE_INFO
-    cl_device_type type;
-    clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
-    if( type & CL_DEVICE_TYPE_CPU )
-        printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU");
-    if( type & CL_DEVICE_TYPE_GPU )
-        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU");
-    if( type & CL_DEVICE_TYPE_ACCELERATOR )
-        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
-    if( type & CL_DEVICE_TYPE_DEFAULT )
-        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
-    
-    // CL_DEVICE_MAX_COMPUTE_UNITS
-    cl_uint compute_units;
-    clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
-    printf(  " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units);
-
-    // CL_DEVICE_MAX_WORK_GROUP_SIZE
-    size_t workitem_size[3];
-    clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
-    printf(  " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]);
-    
-}
-
-
-
-
-// Main function 
-// *********************************************************************
-int main(int argc, char **argv)
-{
-	void *srcA, *srcB, *dst;        // Host buffers for OpenCL test
-    cl_context cxGPUContext;       // OpenCL context
-    cl_command_queue cqCommandQue;  // OpenCL command que
-    cl_device_id* cdDevices;        // OpenCL device list    
-    cl_program cpProgram;           // OpenCL program
-    cl_kernel ckKernel;             // OpenCL kernel
-    cl_mem cmMemObjs[3];            // OpenCL memory buffer objects:  3 for device
-    size_t szGlobalWorkSize[1];     // 1D var for Total # of work items
-    size_t szLocalWorkSize[1];		// 1D var for # of work items in the work group	
-    size_t szParmDataBytes;			// Byte size of context information
-    cl_int ciErr1, ciErr2;			// Error code var
-    int iTestN = 100000 * 8;		// Size of Vectors to process
-
-    // set Global and Local work size dimensions
-    szGlobalWorkSize[0] = iTestN >> 3;  // do 8 computations per work item
-    szLocalWorkSize[0]= iTestN>>3;
-
-
-    // Allocate and initialize host arrays
-    srcA = (void *)malloc (sizeof(cl_float) * iTestN);
-    srcB = (void *)malloc (sizeof(cl_float) * iTestN);
-    dst = (void *)malloc (sizeof(cl_float) * iTestN);
-
-	int i;
-
-	// Initialize arrays with some values
-	for (i=0;i<iTestN;i++)
-	{
-		((cl_float*)srcA)[i] = cl_float(i);
-		((cl_float*)srcB)[i] = 2;
-		((cl_float*)dst)[i]=-1;
-	}
-
-    // Create OpenCL context & context
-    cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &ciErr1); //could also be CL_DEVICE_TYPE_GPU
-	
-    // Query all devices available to the context
-    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
-    cdDevices = (cl_device_id*)malloc(szParmDataBytes);
-    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
-	if (cdDevices)
-	{
-		printDevInfo(cdDevices[0]);
-	}
-
-    // Create a command queue for first device the context reported
-    cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr2);
-    ciErr1 |= ciErr2; 
-
-    // Allocate the OpenCL source and result buffer memory objects on the device GMEM
-    cmMemObjs[0] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcA, &ciErr2);
-    ciErr1 |= ciErr2;
-    cmMemObjs[1] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcB, &ciErr2);
-    ciErr1 |= ciErr2;
-    cmMemObjs[2] = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float8) * szGlobalWorkSize[0], NULL, &ciErr2);
-    ciErr1 |= ciErr2;
-
-///create kernels from binary
-	int numDevices = 1;
-	cl_int err;
-	::size_t* lengths = (::size_t*) malloc(numDevices * sizeof(::size_t));
-	const unsigned char** images = (const unsigned char**) malloc(numDevices * sizeof(const void*));
-
-	for (i = 0; i < numDevices; ++i) {
-		images[i] = 0;
-		lengths[i] = 0;
-	}
-
-	cpProgram = clCreateProgramWithBinary(cxGPUContext, numDevices,cdDevices,lengths, images, 0, &err);
-
-	// Build the executable program from a binary
-	ciErr1 |= clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
-
-    // Create the kernel
-    ckKernel = clCreateKernel(cpProgram, "VectorAdd", &ciErr1);
-    
-    // Set the Argument values
-    ciErr1 |= clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmMemObjs[0]);
-    ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmMemObjs[1]);
-    ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmMemObjs[2]);
-
-    // Copy input data from host to GPU and launch kernel 
-    ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
-
-    // Read back results and check accumulated errors
-    ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL);
-
-    // Release kernel, program, and memory objects
-	// NOTE:  Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity.
-    free(cdDevices);
-	clReleaseKernel(ckKernel);  
-    clReleaseProgram(cpProgram);
-    clReleaseCommandQueue(cqCommandQue);
-    clReleaseContext(cxGPUContext);
-
-
-    // print the results
-    int iErrorCount = 0;
-    for (i = 0; i < iTestN; i++) 
-    {
-		if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i])
-			iErrorCount++;
-    }
-	
-	if (iErrorCount)
-	{
-		printf("MiniCL validation FAILED\n");
-	} else
-	{
-		printf("MiniCL validation SUCCESSFULL\n");
-	}
-    // Free host memory, close log and return success
-	for (i = 0; i < 3; i++)
-    {
-        clReleaseMemObject(cmMemObjs[i]);
-    }
-
-    free(srcA); 
-    free(srcB);
-    free (dst);
-}
-
-
-#ifdef USE_MINICL
-#include "MiniCL/cl_MiniCL_Defs.h"
-extern "C"
-{
-	#include "VectorAddKernels.cl"
-}
-MINICL_REGISTER(VectorAdd)
+
+///VectorAdd sample, from the NVidia JumpStart Guide
+///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf
+
+///Instead of #include <CL/cl.h> we include <MiniCL/cl.h>
+///Apart from this include file, all other code should compile and work on OpenCL compliant implementation
+
+#define USE_MINICL 1
+#ifdef USE_MINICL
+#include "MiniCL/cl.h"
+#else //USE_MINICL
+#include <CL/cl.h>
+#endif//USE_MINICL
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+
+void printDevInfo(cl_device_id device)
+{
+    char device_string[1024];
+	
+    clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
+    printf(  " Device %s:\n", device_string);
+
+    // CL_DEVICE_INFO
+    cl_device_type type;
+    clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    if( type & CL_DEVICE_TYPE_CPU )
+        printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU");
+    if( type & CL_DEVICE_TYPE_GPU )
+        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU");
+    if( type & CL_DEVICE_TYPE_ACCELERATOR )
+        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
+    if( type & CL_DEVICE_TYPE_DEFAULT )
+        printf(  " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
+    
+    // CL_DEVICE_MAX_COMPUTE_UNITS
+    cl_uint compute_units;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
+    printf(  " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units);
+
+    // CL_DEVICE_MAX_WORK_GROUP_SIZE
+    size_t workitem_size[3];
+    clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
+    printf(  " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]);
+    
+}
+
+
+
+
+// Main function 
+// *********************************************************************
+int main(int argc, char **argv)
+{
+	void *srcA, *srcB, *dst;        // Host buffers for OpenCL test
+    cl_context cxGPUContext;       // OpenCL context
+    cl_command_queue cqCommandQue;  // OpenCL command que
+    cl_device_id* cdDevices;        // OpenCL device list    
+    cl_program cpProgram;           // OpenCL program
+    cl_kernel ckKernel;             // OpenCL kernel
+    cl_mem cmMemObjs[3];            // OpenCL memory buffer objects:  3 for device
+    size_t szGlobalWorkSize[1];     // 1D var for Total # of work items
+    size_t szLocalWorkSize[1];		// 1D var for # of work items in the work group	
+    size_t szParmDataBytes;			// Byte size of context information
+    cl_int ciErr1, ciErr2;			// Error code var
+    int iTestN = 100000 * 8;		// Size of Vectors to process
+
+    // set Global and Local work size dimensions
+    szGlobalWorkSize[0] = iTestN >> 3;  // do 8 computations per work item
+    szLocalWorkSize[0]= iTestN>>3;
+
+
+    // Allocate and initialize host arrays
+    srcA = (void *)malloc (sizeof(cl_float) * iTestN);
+    srcB = (void *)malloc (sizeof(cl_float) * iTestN);
+    dst = (void *)malloc (sizeof(cl_float) * iTestN);
+
+	int i;
+
+	// Initialize arrays with some values
+	for (i=0;i<iTestN;i++)
+	{
+		((cl_float*)srcA)[i] = cl_float(i);
+		((cl_float*)srcB)[i] = 2;
+		((cl_float*)dst)[i]=-1;
+	}
+
+    // Create OpenCL context & context
+    cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &ciErr1); //could also be CL_DEVICE_TYPE_GPU
+	
+    // Query all devices available to the context
+    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
+    cdDevices = (cl_device_id*)malloc(szParmDataBytes);
+    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
+	if (cdDevices)
+	{
+		printDevInfo(cdDevices[0]);
+	}
+
+    // Create a command queue for first device the context reported
+    cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr2);
+    ciErr1 |= ciErr2; 
+
+    // Allocate the OpenCL source and result buffer memory objects on the device GMEM
+    cmMemObjs[0] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcA, &ciErr2);
+    ciErr1 |= ciErr2;
+    cmMemObjs[1] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcB, &ciErr2);
+    ciErr1 |= ciErr2;
+    cmMemObjs[2] = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float8) * szGlobalWorkSize[0], NULL, &ciErr2);
+    ciErr1 |= ciErr2;
+
+///create kernels from binary
+	int numDevices = 1;
+	cl_int err;
+	::size_t* lengths = (::size_t*) malloc(numDevices * sizeof(::size_t));
+	const unsigned char** images = (const unsigned char**) malloc(numDevices * sizeof(const void*));
+
+	for (i = 0; i < numDevices; ++i) {
+		images[i] = 0;
+		lengths[i] = 0;
+	}
+
+	cpProgram = clCreateProgramWithBinary(cxGPUContext, numDevices,cdDevices,lengths, images, 0, &err);
+
+	// Build the executable program from a binary
+	ciErr1 |= clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
+
+    // Create the kernel
+    ckKernel = clCreateKernel(cpProgram, "VectorAdd", &ciErr1);
+    
+    // Set the Argument values
+    ciErr1 |= clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmMemObjs[0]);
+    ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmMemObjs[1]);
+    ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmMemObjs[2]);
+
+    // Copy input data from host to GPU and launch kernel 
+    ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
+
+    // Read back results and check accumulated errors
+    ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL);
+
+    // Release kernel, program, and memory objects
+	// NOTE:  Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity.
+    free(cdDevices);
+	clReleaseKernel(ckKernel);  
+    clReleaseProgram(cpProgram);
+    clReleaseCommandQueue(cqCommandQue);
+    clReleaseContext(cxGPUContext);
+
+
+    // print the results
+    int iErrorCount = 0;
+    for (i = 0; i < iTestN; i++) 
+    {
+		if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i])
+			iErrorCount++;
+    }
+	
+	if (iErrorCount)
+	{
+		printf("MiniCL validation FAILED\n");
+	} else
+	{
+		printf("MiniCL validation SUCCESSFULL\n");
+	}
+    // Free host memory, close log and return success
+	for (i = 0; i < 3; i++)
+    {
+        clReleaseMemObject(cmMemObjs[i]);
+    }
+
+    free(srcA); 
+    free(srcB);
+    free (dst);
+}
+
+
+#ifdef USE_MINICL
+#include "MiniCL/cl_MiniCL_Defs.h"
+extern "C"
+{
+	#include "VectorAddKernels.cl"
+}
+MINICL_REGISTER(VectorAdd)
 #endif//USE_MINICL
--- a/Demos/MiniCL_VectorAdd/VectorAddKernels.cl
+++ b/Demos/MiniCL_VectorAdd/VectorAddKernels.cl
@@ -1,47 +1,47 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation
-#ifndef GUID_ARG
-#define GUID_ARG
-#endif
-
-///////////////////////////////////////////////////
-// OpenCL Kernel Function for element by element vector addition
-__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG)
-{
-    // get oct-float index into global data array
-    int iGID = get_global_id(0);
-
-    // read inputs into registers
-    float8 f8InA = a[iGID];
-    float8 f8InB = b[iGID];
-    float8 f8Out = (float8)0.0f;
-    
-    
-    // add the vector elements
-    f8Out.s0 = f8InA.s0 + f8InB.s0;
-    f8Out.s1 = f8InA.s1 + f8InB.s1;
-    f8Out.s2 = f8InA.s2 + f8InB.s2;
-    f8Out.s3 = f8InA.s3 + f8InB.s3;
-    f8Out.s4 = f8InA.s4 + f8InB.s4;
-    f8Out.s5 = f8InA.s5 + f8InB.s5;
-    f8Out.s6 = f8InA.s6 + f8InB.s6;
-    f8Out.s7 = f8InA.s7 + f8InB.s7;
-
-    // write back out to GMEM
-    c[get_global_id(0)] = f8Out;
-}
-
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///GUID_ARG is only used by MiniCL to pass in the guid used by its get_global_id implementation
+#ifndef GUID_ARG
+#define GUID_ARG
+#endif
+
+///////////////////////////////////////////////////
+// OpenCL Kernel Function for element by element vector addition
+__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c GUID_ARG)
+{
+    // get oct-float index into global data array
+    int iGID = get_global_id(0);
+
+    // read inputs into registers
+    float8 f8InA = a[iGID];
+    float8 f8InB = b[iGID];
+    float8 f8Out = (float8)0.0f;
+    
+    
+    // add the vector elements
+    f8Out.s0 = f8InA.s0 + f8InB.s0;
+    f8Out.s1 = f8InA.s1 + f8InB.s1;
+    f8Out.s2 = f8InA.s2 + f8InB.s2;
+    f8Out.s3 = f8InA.s3 + f8InB.s3;
+    f8Out.s4 = f8InA.s4 + f8InB.s4;
+    f8Out.s5 = f8InA.s5 + f8InB.s5;
+    f8Out.s6 = f8InA.s6 + f8InB.s6;
+    f8Out.s7 = f8InA.s7 + f8InB.s7;
+
+    // write back out to GMEM
+    c[get_global_id(0)] = f8Out;
+}
+