Removed to bitonic sort and ios, they were not used and confuse Bram :)
http://bulletphysics.org/Bullet/phpBB3/viewtopic.php?t=9570
This commit is contained in:
@@ -13,11 +13,6 @@
|
||||
end
|
||||
|
||||
|
||||
newoption
|
||||
{
|
||||
trigger = "ios",
|
||||
description = "Enable iOS target (requires xcode4)"
|
||||
}
|
||||
|
||||
newoption
|
||||
{
|
||||
@@ -64,23 +59,11 @@
|
||||
postfix=""
|
||||
|
||||
if _ACTION == "xcode4" then
|
||||
if _OPTIONS["ios"] then
|
||||
postfix = "ios";
|
||||
xcodebuildsettings
|
||||
{
|
||||
'CODE_SIGN_IDENTITY = "iPhone Developer"',
|
||||
"SDKROOT = iphoneos",
|
||||
'ARCHS = "armv7"',
|
||||
'TARGETED_DEVICE_FAMILY = "1,2"',
|
||||
'VALID_ARCHS = "armv7"',
|
||||
}
|
||||
else
|
||||
xcodebuildsettings
|
||||
{
|
||||
'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"',
|
||||
'VALID_ARCHS = "x86_64 i386"',
|
||||
}
|
||||
end
|
||||
xcodebuildsettings
|
||||
{
|
||||
'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"',
|
||||
'VALID_ARCHS = "x86_64 i386"',
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
@@ -99,8 +82,6 @@
|
||||
|
||||
language "C++"
|
||||
|
||||
if not _OPTIONS["ios"] then
|
||||
|
||||
|
||||
include "../Demos3/GpuDemos"
|
||||
-- include "../Demos3/CpuDemos"
|
||||
@@ -126,7 +107,7 @@ include "../Demos3/SimpleOpenGL3"
|
||||
-- include "../test/OpenCL/NarrowphaseCollision"
|
||||
include "../test/OpenCL/ParallelPrimitives"
|
||||
include "../test/OpenCL/RadixSortBenchmark"
|
||||
include "../test/OpenCL/BitonicSort"
|
||||
|
||||
|
||||
include "../src/Bullet3Dynamics"
|
||||
include "../src/Bullet3Common"
|
||||
@@ -166,4 +147,3 @@ include "../Demos3/SimpleOpenGL3"
|
||||
include "../Demos3"
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -1,171 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
/*
|
||||
* Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NVIDIA Corporation and its licensors retain all intellectual property and
|
||||
* proprietary rights in and to this software and related documentation.
|
||||
* Any use, reproduction, disclosure, or distribution of this software
|
||||
* and related documentation without an express license agreement from
|
||||
* NVIDIA Corporation is strictly prohibited.
|
||||
*
|
||||
* Please refer to the applicable NVIDIA end user license agreement (EULA)
|
||||
* associated with this source code for terms and conditions that govern
|
||||
* your use of this NVIDIA software.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
inline void ComparatorPrivate(int2* keyA, int2* keyB, uint dir)
|
||||
{
|
||||
if((keyA[0].x > keyB[0].x) == dir)
|
||||
{
|
||||
int2 tmp = *keyA;
|
||||
*keyA = *keyB;
|
||||
*keyB = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
inline void ComparatorLocal(__local int2* keyA, __local int2* keyB, uint dir)
|
||||
{
|
||||
if((keyA[0].x > keyB[0].x) == dir)
|
||||
{
|
||||
int2 tmp = *keyA;
|
||||
*keyA = *keyB;
|
||||
*keyB = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Monolithic bitonic sort kernel for short arrays fitting into local memory
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void kBitonicSortCellIdLocal(__global int2* pKey, uint arrayLength, uint dir GUID_ARG)
|
||||
{
|
||||
__local int2 l_key[1024U];
|
||||
int localSizeLimit = get_local_size(0) * 2;
|
||||
|
||||
//Offset to the beginning of subbatch and load data
|
||||
pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
|
||||
l_key[get_local_id(0) + 0] = pKey[ 0];
|
||||
l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
|
||||
|
||||
for(uint size = 2; size < arrayLength; size <<= 1)
|
||||
{
|
||||
//Bitonic merge
|
||||
uint ddd = dir ^ ( (get_local_id(0) & (size / 2)) != 0 );
|
||||
for(uint stride = size / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
|
||||
ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
|
||||
}
|
||||
}
|
||||
|
||||
//ddd == dir for the last bitonic merge step
|
||||
{
|
||||
for(uint stride = arrayLength / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
|
||||
ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], dir);
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
pKey[ 0] = l_key[get_local_id(0) + 0];
|
||||
pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Bitonic sort kernel for large arrays (not fitting into local memory)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//Bottom-level bitonic sort
|
||||
//Almost the same as bitonicSortLocal with the only exception
|
||||
//of even / odd subarrays (of LOCAL_SIZE_LIMIT points) being
|
||||
//sorted in opposite directions
|
||||
__kernel void kBitonicSortCellIdLocal1(__global int2* pKey GUID_ARG)
|
||||
{
|
||||
__local int2 l_key[1024U];
|
||||
uint localSizeLimit = get_local_size(0) * 2;
|
||||
|
||||
//Offset to the beginning of subarray and load data
|
||||
pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
|
||||
l_key[get_local_id(0) + 0] = pKey[ 0];
|
||||
l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
|
||||
|
||||
uint comparatorI = get_global_id(0) & ((localSizeLimit / 2) - 1);
|
||||
|
||||
for(uint size = 2; size < localSizeLimit; size <<= 1)
|
||||
{
|
||||
//Bitonic merge
|
||||
uint ddd = (comparatorI & (size / 2)) != 0;
|
||||
for(uint stride = size / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
|
||||
ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
|
||||
}
|
||||
}
|
||||
|
||||
//Odd / even arrays of localSizeLimit elements
|
||||
//sorted in opposite directions
|
||||
{
|
||||
uint ddd = (get_group_id(0) & 1);
|
||||
for(uint stride = localSizeLimit / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
|
||||
ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
pKey[ 0] = l_key[get_local_id(0) + 0];
|
||||
pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
|
||||
}
|
||||
|
||||
//Bitonic merge iteration for 'stride' >= LOCAL_SIZE_LIMIT
|
||||
__kernel void kBitonicSortCellIdMergeGlobal(__global int2* pKey, uint arrayLength, uint size, uint stride, uint dir GUID_ARG)
|
||||
{
|
||||
uint global_comparatorI = get_global_id(0);
|
||||
uint comparatorI = global_comparatorI & (arrayLength / 2 - 1);
|
||||
|
||||
//Bitonic merge
|
||||
uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 );
|
||||
uint pos = 2 * global_comparatorI - (global_comparatorI & (stride - 1));
|
||||
|
||||
int2 keyA = pKey[pos + 0];
|
||||
int2 keyB = pKey[pos + stride];
|
||||
|
||||
ComparatorPrivate(&keyA, &keyB, ddd);
|
||||
|
||||
pKey[pos + 0] = keyA;
|
||||
pKey[pos + stride] = keyB;
|
||||
}
|
||||
|
||||
//Combined bitonic merge steps for
|
||||
//'size' > LOCAL_SIZE_LIMIT and 'stride' = [1 .. LOCAL_SIZE_LIMIT / 2]
|
||||
__kernel void kBitonicSortCellIdMergeLocal(__global int2* pKey, uint arrayLength, uint stride, uint size, uint dir GUID_ARG)
|
||||
{
|
||||
__local int2 l_key[1024U];
|
||||
int localSizeLimit = get_local_size(0) * 2;
|
||||
|
||||
pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
|
||||
l_key[get_local_id(0) + 0] = pKey[ 0];
|
||||
l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
|
||||
|
||||
//Bitonic merge
|
||||
uint comparatorI = get_global_id(0) & ((arrayLength / 2) - 1);
|
||||
uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 );
|
||||
for(; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
|
||||
ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
pKey[ 0] = l_key[get_local_id(0) + 0];
|
||||
pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
|
||||
}
|
||||
);
|
||||
@@ -1,90 +0,0 @@
|
||||
|
||||
#include "b3BitonicSort.h"
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
|
||||
//Note: logically shared with BitonicSort OpenCL code!
|
||||
// TODO : get parameter from OpenCL and pass it to kernel (needed for platforms other than NVIDIA)
|
||||
|
||||
void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info)
|
||||
{
|
||||
|
||||
if(arrayLength < 2)
|
||||
return;
|
||||
//Only power-of-two array lengths are supported so far
|
||||
info.dir = (info.dir != 0);
|
||||
cl_int ciErrNum;
|
||||
size_t localWorkSize, globalWorkSize;
|
||||
int res = -1;
|
||||
|
||||
|
||||
cl_int clerr=clGetKernelWorkGroupInfo (info.bitonicSortLocal1,info.dev,CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t),&res,NULL);
|
||||
if((clerr==CL_SUCCESS)&&(res>0))
|
||||
info.localSizeLimit=res;
|
||||
|
||||
if(arrayLength <= info.localSizeLimit)
|
||||
{
|
||||
b3Assert( ( arrayLength) % info.localSizeLimit == 0);
|
||||
//Launch bitonicSortLocal
|
||||
ciErrNum = clSetKernelArg(info.bitonicSortLocal, 0, sizeof(cl_mem), (void *)&pKey);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortLocal, 1, sizeof(cl_uint), (void *)&arrayLength);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortLocal, 2, sizeof(cl_uint), (void *)&info.dir);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
localWorkSize = info.localSizeLimit / 2;
|
||||
globalWorkSize = arrayLength / 2;
|
||||
ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortLocal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Launch bitonicSortLocal1
|
||||
ciErrNum = clSetKernelArg(info.bitonicSortLocal1, 0, sizeof(cl_mem), (void *)&pKey);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
localWorkSize = info.localSizeLimit / 2;
|
||||
globalWorkSize = arrayLength / 2;
|
||||
ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortLocal1, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
for(unsigned int size = 2 * info.localSizeLimit; size <= arrayLength; size <<= 1)
|
||||
{
|
||||
for(unsigned stride = size / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
if(stride >= info.localSizeLimit)
|
||||
{
|
||||
//Launch bitonicMergeGlobal
|
||||
ciErrNum = clSetKernelArg(info.bitonicSortMergeGlobal, 0, sizeof(cl_mem), (void *)&pKey);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 1, sizeof(cl_uint), (void *)&arrayLength);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 2, sizeof(cl_uint), (void *)&size);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 3, sizeof(cl_uint), (void *)&stride);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 4, sizeof(cl_uint), (void *)&info.dir);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
localWorkSize = info.localSizeLimit / 4;
|
||||
globalWorkSize = arrayLength / 2;
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortMergeGlobal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Launch bitonicMergeLocal
|
||||
ciErrNum = clSetKernelArg(info.bitonicSortMergeLocal, 0, sizeof(cl_mem), (void *)&pKey);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 1, sizeof(cl_uint), (void *)&arrayLength);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 2, sizeof(cl_uint), (void *)&stride);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 3, sizeof(cl_uint), (void *)&size);
|
||||
ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 4, sizeof(cl_uint), (void *)&info.dir);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
localWorkSize = info.localSizeLimit / 2;
|
||||
globalWorkSize = arrayLength / 2;
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortMergeLocal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
#ifndef B3_BITONIC_SORT_H
|
||||
#define B3_BITONIC_SORT_H
|
||||
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
|
||||
|
||||
struct b3BitonicSortInfo
|
||||
{
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_device_id dev;
|
||||
|
||||
cl_kernel bitonicSortLocal;
|
||||
cl_kernel bitonicSortLocal1;
|
||||
cl_kernel bitonicSortMergeGlobal;
|
||||
cl_kernel bitonicSortMergeLocal;
|
||||
|
||||
unsigned int dir;
|
||||
unsigned int localSizeLimit;
|
||||
|
||||
b3BitonicSortInfo()
|
||||
{
|
||||
dev = 0;
|
||||
m_cqCommandQue = 0;
|
||||
bitonicSortLocal=0;
|
||||
bitonicSortLocal1=0;
|
||||
bitonicSortMergeGlobal=0;
|
||||
bitonicSortMergeLocal=0;
|
||||
dir = 1;
|
||||
localSizeLimit = 1024U;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info);
|
||||
|
||||
#endif //B3_BITONIC_SORT_H
|
||||
@@ -1,196 +0,0 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///original author: Erwin Coumans
|
||||
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
|
||||
#include "Bullet3Common/shared/b3Int2.h"
|
||||
|
||||
#include "../btgui/Timing/b3Clock.h"
|
||||
#include "b3BitonicSort.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int numSuccess=0;
|
||||
int numFailed=0;
|
||||
|
||||
cl_context g_cxMainContext;
|
||||
cl_command_queue g_cqCommandQue;
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* kernelSource=
|
||||
#include "BitonicSort.cl"
|
||||
|
||||
|
||||
|
||||
|
||||
static bool compareFunc(const b3Int2& p, const b3Int2& q)
|
||||
{
|
||||
return (p.x < q.x) || ((p.x == q.x) && ((p.y < q.y)));
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
b3Clock clock;
|
||||
|
||||
|
||||
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//ALL;
|
||||
const char* vendorSDK = b3OpenCLUtils::getSdkVendorName();
|
||||
|
||||
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
|
||||
int numPlatforms = b3OpenCLUtils::getNumPlatforms();
|
||||
printf("Num Platforms = %d\n", numPlatforms);
|
||||
|
||||
for (int i=0;i<numPlatforms;i++)
|
||||
{
|
||||
cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
|
||||
b3OpenCLPlatformInfo platformInfo;
|
||||
b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
|
||||
printf("--------------------------------\n");
|
||||
printf("Platform info for platform nr %d:\n",i);
|
||||
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
|
||||
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
|
||||
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
|
||||
|
||||
cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
|
||||
|
||||
if (context)
|
||||
{
|
||||
int numDevices = b3OpenCLUtils::getNumDevices(context);
|
||||
printf("Num Devices = %d\n", numDevices);
|
||||
for (int j=0;j<numDevices;j++)
|
||||
{
|
||||
cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
|
||||
b3OpenCLDeviceInfo devInfo;
|
||||
b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
|
||||
printf("m_deviceName = %s\n",devInfo.m_deviceName);
|
||||
//b3OpenCLUtils::printDeviceInfo(dev);
|
||||
|
||||
g_cqCommandQue = clCreateCommandQueue(context, dev, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
b3BitonicSortInfo info;
|
||||
|
||||
info.bitonicSortLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdLocal",&ciErrNum,0,"");
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
info.bitonicSortLocal1 = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdLocal1",&ciErrNum,0,"");
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
info.bitonicSortMergeGlobal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeGlobal",&ciErrNum,0,"");
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
info.bitonicSortMergeLocal = b3OpenCLUtils::compileCLKernelFromString(context,dev,kernelSource,"kBitonicSortCellIdMergeLocal",&ciErrNum,0,"");
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
info.m_cqCommandQue = g_cqCommandQue;
|
||||
info.dev = dev;
|
||||
|
||||
b3OpenCLArray<b3Int2> keyValuesGPU(context,g_cqCommandQue);
|
||||
b3AlignedObjectArray<b3Int2> keyValuesCPU;
|
||||
b3AlignedObjectArray<b3Int2> keyValuesGold;
|
||||
int numValues = 8*1024*1024;//2048;//1024;
|
||||
keyValuesCPU.resize(numValues);
|
||||
for (int i=0;i<numValues;i++)
|
||||
{
|
||||
b3Int2 v;
|
||||
v.x = numValues+1-i;
|
||||
v.y = i*i;
|
||||
keyValuesCPU[i] = v;
|
||||
}
|
||||
keyValuesGPU.copyFromHost(keyValuesCPU);
|
||||
keyValuesGPU.copyToHost(keyValuesGold);
|
||||
keyValuesGold.quickSort(compareFunc);
|
||||
|
||||
unsigned int batch = 1;
|
||||
unsigned int arrayLength = keyValuesGPU.size();
|
||||
|
||||
for (int i=0;i<10;i++)
|
||||
{
|
||||
keyValuesGPU.copyFromHost(keyValuesCPU);
|
||||
clFinish(info.m_cqCommandQue);
|
||||
unsigned long pre=clock.getTimeMilliseconds();
|
||||
bitonicSortNv(keyValuesGPU.getBufferCL(), arrayLength, info);
|
||||
clFinish(info.m_cqCommandQue);
|
||||
unsigned long post=clock.getTimeMilliseconds();
|
||||
printf("GPU sort took %d ms\n",post-pre);
|
||||
}
|
||||
keyValuesGPU.copyToHost(keyValuesCPU);
|
||||
int success=1;
|
||||
for (int i=0;i<numValues;i++)
|
||||
{
|
||||
if (keyValuesCPU[i].x != keyValuesGold[i].x)
|
||||
success = 0;
|
||||
if (keyValuesCPU[i].y != keyValuesGold[i].y)
|
||||
success = 0;
|
||||
}
|
||||
if (success)
|
||||
{
|
||||
printf("Correct\n");
|
||||
numSuccess++;
|
||||
} else
|
||||
{
|
||||
printf("Sort Failed\n");
|
||||
numFailed++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
clReleaseContext(context);
|
||||
}
|
||||
}
|
||||
|
||||
///Easier method to initialize OpenCL using createContextFromType for a GPU
|
||||
deviceType = CL_DEVICE_TYPE_GPU;
|
||||
|
||||
void* glCtx=0;
|
||||
void* glDC = 0;
|
||||
printf("Initialize OpenCL using b3OpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
|
||||
g_cxMainContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
if (g_cxMainContext)
|
||||
{
|
||||
int numDev = b3OpenCLUtils::getNumDevices(g_cxMainContext);
|
||||
|
||||
for (int i=0;i<numDev;i++)
|
||||
{
|
||||
cl_device_id device;
|
||||
device = b3OpenCLUtils::getDevice(g_cxMainContext,i);
|
||||
b3OpenCLDeviceInfo clInfo;
|
||||
b3OpenCLUtils::getDeviceInfo(device,&clInfo);
|
||||
b3OpenCLUtils::printDeviceInfo(device);
|
||||
// create a command-queue
|
||||
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
//normally you would create and execute kernels using this command queue
|
||||
|
||||
|
||||
clReleaseCommandQueue(g_cqCommandQue);
|
||||
}
|
||||
|
||||
clReleaseContext(g_cxMainContext);
|
||||
|
||||
}
|
||||
else {
|
||||
printf("No OpenCL capable GPU found!");
|
||||
}
|
||||
|
||||
printf("numSuccess=%d\n",numSuccess);
|
||||
printf("numFailed=%d\n",numFailed);
|
||||
|
||||
printf("press <Enter>\n");
|
||||
getchar();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
function createProject(vendor)
|
||||
|
||||
hasCL = findOpenCL(vendor)
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project ("Test_BitonicSort_" .. vendor)
|
||||
|
||||
initOpenCL(vendor)
|
||||
|
||||
language "C++"
|
||||
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
includedirs {"../../../src"}
|
||||
|
||||
files {
|
||||
"main.cpp",
|
||||
"b3BitonicSort.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.cpp",
|
||||
"../../../src/Bullet3Common/b3AlignedAllocator.h",
|
||||
"../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp",
|
||||
"../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h",
|
||||
"../../../src/Bullet3Common/b3Logging.cpp",
|
||||
"../../../src/Bullet3Common/b3Logging.h",
|
||||
"../../../btgui/Timing/b3Clock.cpp",
|
||||
"../../../btgui/Timing/b3Clock.h",
|
||||
}
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
createProject("clew")
|
||||
createProject("Apple")
|
||||
createProject("AMD")
|
||||
createProject("Intel")
|
||||
createProject("NVIDIA")
|
||||
Reference in New Issue
Block a user