Files
bullet3/test/OpenCL/BitonicSort/main.cpp
erwin coumans b32ae0c75c move some files to shared folders
use b3Aabb for compound vs compound on host (for testing, towards BVH)
2013-08-14 17:48:12 -07:00

197 lines
6.3 KiB
C++

/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///original author: Erwin Coumans
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3Common/shared/b3Int2.h"
#include "../btgui/Timing/b3Clock.h"
#include "b3BitonicSort.h"
#include <stdio.h>
int numSuccess=0;
int numFailed=0;
cl_context g_cxMainContext;
cl_command_queue g_cqCommandQue;
#define MSTRINGIFY(A) #A
static const char* kernelSource=
#include "BitonicSort.cl"
static bool compareFunc(const b3Int2& p, const b3Int2& q)
{
return (p.x < q.x) || ((p.x == q.x) && ((p.y < q.y)));
}
int main(int argc, char* argv[])
{
int ciErrNum = 0;
b3Clock clock;
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//ALL;
const char* vendorSDK = b3OpenCLUtils::getSdkVendorName();
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
int numPlatforms = b3OpenCLUtils::getNumPlatforms();
printf("Num Platforms = %d\n", numPlatforms);
for (int i=0;i<numPlatforms;i++)
{
cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
b3OpenCLPlatformInfo platformInfo;
b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
printf("--------------------------------\n");
printf("Platform info for platform nr %d:\n",i);
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
if (context)
{
int numDevices = b3OpenCLUtils::getNumDevices(context);
printf("Num Devices = %d\n", numDevices);
for (int j=0;j<numDevices;j++)
{
cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
b3OpenCLDeviceInfo devInfo;
b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
printf("m_deviceName = %s\n",devInfo.m_deviceName);
//b3OpenCLUtils::printDeviceInfo(dev);
g_cqCommandQue = clCreateCommandQueue(context, dev, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
b3BitonicSortInfo info;
info.bitonicSortLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdLocal",&ciErrNum,0,"");
oclCHECKERROR(ciErrNum, CL_SUCCESS);
info.bitonicSortLocal1 = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdLocal1",&ciErrNum,0,"");
oclCHECKERROR(ciErrNum, CL_SUCCESS);
info.bitonicSortMergeGlobal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeGlobal",&ciErrNum,0,"");
oclCHECKERROR(ciErrNum, CL_SUCCESS);
info.bitonicSortMergeLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeLocal",&ciErrNum,0,"");
oclCHECKERROR(ciErrNum, CL_SUCCESS);
info.m_cqCommandQue = g_cqCommandQue;
info.dev = dev;
b3OpenCLArray<b3Int2> keyValuesGPU(context,g_cqCommandQue);
b3AlignedObjectArray<b3Int2> keyValuesCPU;
b3AlignedObjectArray<b3Int2> keyValuesGold;
int numValues = 8*1024*1024;//2048;//1024;
keyValuesCPU.resize(numValues);
for (int i=0;i<numValues;i++)
{
b3Int2 v;
v.x = numValues+1-i;
v.y = i*i;
keyValuesCPU[i] = v;
}
keyValuesGPU.copyFromHost(keyValuesCPU);
keyValuesGPU.copyToHost(keyValuesGold);
keyValuesGold.quickSort(compareFunc);
unsigned int batch = 1;
unsigned int arrayLength = keyValuesGPU.size();
for (int i=0;i<10;i++)
{
keyValuesGPU.copyFromHost(keyValuesCPU);
clFinish(info.m_cqCommandQue);
unsigned long pre=clock.getTimeMilliseconds();
bitonicSortNv(keyValuesGPU.getBufferCL(), arrayLength, info);
clFinish(info.m_cqCommandQue);
unsigned long post=clock.getTimeMilliseconds();
printf("GPU sort took %d ms\n",post-pre);
}
keyValuesGPU.copyToHost(keyValuesCPU);
int success=1;
for (int i=0;i<numValues;i++)
{
if (keyValuesCPU[i].x != keyValuesGold[i].x)
success = 0;
if (keyValuesCPU[i].y != keyValuesGold[i].y)
success = 0;
}
if (success)
{
printf("Correct\n");
numSuccess++;
} else
{
printf("Sort Failed\n");
numFailed++;
}
}
clReleaseContext(context);
}
}
///Easier method to initialize OpenCL using createContextFromType for a GPU
deviceType = CL_DEVICE_TYPE_GPU;
void* glCtx=0;
void* glDC = 0;
printf("Initialize OpenCL using b3OpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
g_cxMainContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
if (g_cxMainContext)
{
int numDev = b3OpenCLUtils::getNumDevices(g_cxMainContext);
for (int i=0;i<numDev;i++)
{
cl_device_id device;
device = b3OpenCLUtils::getDevice(g_cxMainContext,i);
b3OpenCLDeviceInfo clInfo;
b3OpenCLUtils::getDeviceInfo(device,&clInfo);
b3OpenCLUtils::printDeviceInfo(device);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
//normally you would create and execute kernels using this command queue
clReleaseCommandQueue(g_cqCommandQue);
}
clReleaseContext(g_cxMainContext);
}
else {
printf("No OpenCL capable GPU found!");
}
printf("numSuccess=%d\n",numSuccess);
printf("numFailed=%d\n",numFailed);
printf("press <Enter>\n");
getchar();
return 0;
}