diff --git a/build3/premake4.lua b/build3/premake4.lua index 9cf51b0ad..fe22efb2e 100644 --- a/build3/premake4.lua +++ b/build3/premake4.lua @@ -13,11 +13,6 @@ end - newoption - { - trigger = "ios", - description = "Enable iOS target (requires xcode4)" - } newoption { @@ -64,23 +59,11 @@ postfix="" if _ACTION == "xcode4" then - if _OPTIONS["ios"] then - postfix = "ios"; - xcodebuildsettings - { - 'CODE_SIGN_IDENTITY = "iPhone Developer"', - "SDKROOT = iphoneos", - 'ARCHS = "armv7"', - 'TARGETED_DEVICE_FAMILY = "1,2"', - 'VALID_ARCHS = "armv7"', - } - else - xcodebuildsettings - { - 'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"', - 'VALID_ARCHS = "x86_64 i386"', - } - end + xcodebuildsettings + { + 'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"', + 'VALID_ARCHS = "x86_64 i386"', + } end @@ -99,8 +82,6 @@ language "C++" -if not _OPTIONS["ios"] then - include "../Demos3/GpuDemos" -- include "../Demos3/CpuDemos" @@ -126,7 +107,7 @@ include "../Demos3/SimpleOpenGL3" -- include "../test/OpenCL/NarrowphaseCollision" include "../test/OpenCL/ParallelPrimitives" include "../test/OpenCL/RadixSortBenchmark" - include "../test/OpenCL/BitonicSort" + include "../src/Bullet3Dynamics" include "../src/Bullet3Common" @@ -166,4 +147,3 @@ include "../Demos3/SimpleOpenGL3" include "../Demos3" end - end diff --git a/test/OpenCL/BitonicSort/BitonicSort.cl b/test/OpenCL/BitonicSort/BitonicSort.cl deleted file mode 100644 index dce9a5435..000000000 --- a/test/OpenCL/BitonicSort/BitonicSort.cl +++ /dev/null @@ -1,171 +0,0 @@ -MSTRINGIFY( -/* - * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and - * proprietary rights in and to this software and related documentation. - * Any use, reproduction, disclosure, or distribution of this software - * and related documentation without an express license agreement from - * NVIDIA Corporation is strictly prohibited. - * - * Please refer to the applicable NVIDIA end user license agreement (EULA) - * associated with this source code for terms and conditions that govern - * your use of this NVIDIA software. - * - */ - - - -inline void ComparatorPrivate(int2* keyA, int2* keyB, uint dir) -{ - if((keyA[0].x > keyB[0].x) == dir) - { - int2 tmp = *keyA; - *keyA = *keyB; - *keyB = tmp; - } -} - -inline void ComparatorLocal(__local int2* keyA, __local int2* keyB, uint dir) -{ - if((keyA[0].x > keyB[0].x) == dir) - { - int2 tmp = *keyA; - *keyA = *keyB; - *keyB = tmp; - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Monolithic bitonic sort kernel for short arrays fitting into local memory -//////////////////////////////////////////////////////////////////////////////// -__kernel void kBitonicSortCellIdLocal(__global int2* pKey, uint arrayLength, uint dir GUID_ARG) -{ - __local int2 l_key[1024U]; - int localSizeLimit = get_local_size(0) * 2; - - //Offset to the beginning of subbatch and load data - pKey += get_group_id(0) * localSizeLimit + get_local_id(0); - l_key[get_local_id(0) + 0] = pKey[ 0]; - l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; - - for(uint size = 2; size < arrayLength; size <<= 1) - { - //Bitonic merge - uint ddd = dir ^ ( (get_local_id(0) & (size / 2)) != 0 ); - for(uint stride = size / 2; stride > 0; stride >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); - ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); - } - } - - //ddd == dir for the last bitonic merge step - { - for(uint stride = arrayLength / 2; stride > 0; stride >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); - ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], dir); - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - pKey[ 0] = l_key[get_local_id(0) + 0]; - pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; -} - -//////////////////////////////////////////////////////////////////////////////// -// Bitonic sort kernel for large arrays (not fitting into local memory) -//////////////////////////////////////////////////////////////////////////////// -//Bottom-level bitonic sort -//Almost the same as bitonicSortLocal with the only exception -//of even / odd subarrays (of LOCAL_SIZE_LIMIT points) being -//sorted in opposite directions -__kernel void kBitonicSortCellIdLocal1(__global int2* pKey GUID_ARG) -{ - __local int2 l_key[1024U]; - uint localSizeLimit = get_local_size(0) * 2; - - //Offset to the beginning of subarray and load data - pKey += get_group_id(0) * localSizeLimit + get_local_id(0); - l_key[get_local_id(0) + 0] = pKey[ 0]; - l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; - - uint comparatorI = get_global_id(0) & ((localSizeLimit / 2) - 1); - - for(uint size = 2; size < localSizeLimit; size <<= 1) - { - //Bitonic merge - uint ddd = (comparatorI & (size / 2)) != 0; - for(uint stride = size / 2; stride > 0; stride >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); - ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); - } - } - - //Odd / even arrays of localSizeLimit elements - //sorted in opposite directions - { - uint ddd = (get_group_id(0) & 1); - for(uint stride = localSizeLimit / 2; stride > 0; stride >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); - ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - pKey[ 0] = l_key[get_local_id(0) + 0]; - pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; -} - -//Bitonic merge iteration for 'stride' >= LOCAL_SIZE_LIMIT -__kernel void kBitonicSortCellIdMergeGlobal(__global int2* pKey, uint arrayLength, uint size, uint stride, uint dir GUID_ARG) -{ - uint global_comparatorI = get_global_id(0); - uint comparatorI = global_comparatorI & (arrayLength / 2 - 1); - - //Bitonic merge - uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 ); - uint pos = 2 * global_comparatorI - (global_comparatorI & (stride - 1)); - - int2 keyA = pKey[pos + 0]; - int2 keyB = pKey[pos + stride]; - - ComparatorPrivate(&keyA, &keyB, ddd); - - pKey[pos + 0] = keyA; - pKey[pos + stride] = keyB; -} - -//Combined bitonic merge steps for -//'size' > LOCAL_SIZE_LIMIT and 'stride' = [1 .. LOCAL_SIZE_LIMIT / 2] -__kernel void kBitonicSortCellIdMergeLocal(__global int2* pKey, uint arrayLength, uint stride, uint size, uint dir GUID_ARG) -{ - __local int2 l_key[1024U]; - int localSizeLimit = get_local_size(0) * 2; - - pKey += get_group_id(0) * localSizeLimit + get_local_id(0); - l_key[get_local_id(0) + 0] = pKey[ 0]; - l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; - - //Bitonic merge - uint comparatorI = get_global_id(0) & ((arrayLength / 2) - 1); - uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 ); - for(; stride > 0; stride >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); - ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); - } - - barrier(CLK_LOCAL_MEM_FENCE); - pKey[ 0] = l_key[get_local_id(0) + 0]; - pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; -} -); \ No newline at end of file diff --git a/test/OpenCL/BitonicSort/b3BitonicSort.cpp b/test/OpenCL/BitonicSort/b3BitonicSort.cpp deleted file mode 100644 index 68e2843d6..000000000 --- a/test/OpenCL/BitonicSort/b3BitonicSort.cpp +++ /dev/null @@ -1,90 +0,0 @@ - -#include "b3BitonicSort.h" -#include "Bullet3Common/b3Scalar.h" - - -//Note: logically shared with BitonicSort OpenCL code! -// TODO : get parameter from OpenCL and pass it to kernel (needed for platforms other than NVIDIA) - -void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info) -{ - - if(arrayLength < 2) - return; - //Only power-of-two array lengths are supported so far - info.dir = (info.dir != 0); - cl_int ciErrNum; - size_t localWorkSize, globalWorkSize; - int res = -1; - - - cl_int clerr=clGetKernelWorkGroupInfo (info.bitonicSortLocal1,info.dev,CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t),&res,NULL); - if((clerr==CL_SUCCESS)&&(res>0)) - info.localSizeLimit=res; - - if(arrayLength <= info.localSizeLimit) - { - b3Assert( ( arrayLength) % info.localSizeLimit == 0); - //Launch bitonicSortLocal - ciErrNum = clSetKernelArg(info.bitonicSortLocal, 0, sizeof(cl_mem), (void *)&pKey); - ciErrNum |= clSetKernelArg(info.bitonicSortLocal, 1, sizeof(cl_uint), (void *)&arrayLength); - ciErrNum |= clSetKernelArg(info.bitonicSortLocal, 2, sizeof(cl_uint), (void *)&info.dir); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - localWorkSize = info.localSizeLimit / 2; - globalWorkSize = arrayLength / 2; - ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortLocal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - } - else - { - //Launch bitonicSortLocal1 - ciErrNum = clSetKernelArg(info.bitonicSortLocal1, 0, sizeof(cl_mem), (void *)&pKey); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - localWorkSize = info.localSizeLimit / 2; - globalWorkSize = arrayLength / 2; - ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortLocal1, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - for(unsigned int size = 2 * info.localSizeLimit; size <= arrayLength; size <<= 1) - { - for(unsigned stride = size / 2; stride > 0; stride >>= 1) - { - if(stride >= info.localSizeLimit) - { - //Launch bitonicMergeGlobal - ciErrNum = clSetKernelArg(info.bitonicSortMergeGlobal, 0, sizeof(cl_mem), (void *)&pKey); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 1, sizeof(cl_uint), (void *)&arrayLength); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 2, sizeof(cl_uint), (void *)&size); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 3, sizeof(cl_uint), (void *)&stride); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeGlobal, 4, sizeof(cl_uint), (void *)&info.dir); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - localWorkSize = info.localSizeLimit / 4; - globalWorkSize = arrayLength / 2; - - ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortMergeGlobal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - } - else - { - //Launch bitonicMergeLocal - ciErrNum = clSetKernelArg(info.bitonicSortMergeLocal, 0, sizeof(cl_mem), (void *)&pKey); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 1, sizeof(cl_uint), (void *)&arrayLength); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 2, sizeof(cl_uint), (void *)&stride); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 3, sizeof(cl_uint), (void *)&size); - ciErrNum |= clSetKernelArg(info.bitonicSortMergeLocal, 4, sizeof(cl_uint), (void *)&info.dir); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - localWorkSize = info.localSizeLimit / 2; - globalWorkSize = arrayLength / 2; - - ciErrNum = clEnqueueNDRangeKernel(info.m_cqCommandQue, info.bitonicSortMergeLocal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - break; - } - } - } - } -} \ No newline at end of file diff --git a/test/OpenCL/BitonicSort/b3BitonicSort.h b/test/OpenCL/BitonicSort/b3BitonicSort.h deleted file mode 100644 index dd25b0138..000000000 --- a/test/OpenCL/BitonicSort/b3BitonicSort.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef B3_BITONIC_SORT_H -#define B3_BITONIC_SORT_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -struct b3BitonicSortInfo -{ - cl_command_queue m_cqCommandQue; - cl_device_id dev; - - cl_kernel bitonicSortLocal; - cl_kernel bitonicSortLocal1; - cl_kernel bitonicSortMergeGlobal; - cl_kernel bitonicSortMergeLocal; - - unsigned int dir; - unsigned int localSizeLimit; - - b3BitonicSortInfo() - { - dev = 0; - m_cqCommandQue = 0; - bitonicSortLocal=0; - bitonicSortLocal1=0; - bitonicSortMergeGlobal=0; - bitonicSortMergeLocal=0; - dir = 1; - localSizeLimit = 1024U; - } -}; - - -void bitonicSortNv(cl_mem pKey, int arrayLength, b3BitonicSortInfo& info); - -#endif //B3_BITONIC_SORT_H diff --git a/test/OpenCL/BitonicSort/main.cpp b/test/OpenCL/BitonicSort/main.cpp deleted file mode 100644 index 643f1ddae..000000000 --- a/test/OpenCL/BitonicSort/main.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///original author: Erwin Coumans - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Common/shared/b3Int2.h" - -#include "../btgui/Timing/b3Clock.h" -#include "b3BitonicSort.h" - -#include - -int numSuccess=0; -int numFailed=0; - -cl_context g_cxMainContext; -cl_command_queue g_cqCommandQue; - -#define MSTRINGIFY(A) #A -static const char* kernelSource= -#include "BitonicSort.cl" - - - - -static bool compareFunc(const b3Int2& p, const b3Int2& q) -{ - return (p.x < q.x) || ((p.x == q.x) && ((p.y < q.y))); -} - -int main(int argc, char* argv[]) -{ - int ciErrNum = 0; - - b3Clock clock; - - - cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//ALL; - const char* vendorSDK = b3OpenCLUtils::getSdkVendorName(); - - printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK); - int numPlatforms = b3OpenCLUtils::getNumPlatforms(); - printf("Num Platforms = %d\n", numPlatforms); - - for (int i=0;i keyValuesGPU(context,g_cqCommandQue); - b3AlignedObjectArray keyValuesCPU; - b3AlignedObjectArray keyValuesGold; - int numValues = 8*1024*1024;//2048;//1024; - keyValuesCPU.resize(numValues); - for (int i=0;i\n"); - getchar(); - return 0; -} diff --git a/test/OpenCL/BitonicSort/premake4.lua b/test/OpenCL/BitonicSort/premake4.lua deleted file mode 100644 index c45828a61..000000000 --- a/test/OpenCL/BitonicSort/premake4.lua +++ /dev/null @@ -1,39 +0,0 @@ -function createProject(vendor) - - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("Test_BitonicSort_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - - kind "ConsoleApp" - targetdir "../../../bin" - - includedirs {"../../../src"} - - files { - "main.cpp", - "b3BitonicSort.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp", - "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h", - "../../../src/Bullet3Common/b3Logging.cpp", - "../../../src/Bullet3Common/b3Logging.h", - "../../../btgui/Timing/b3Clock.cpp", - "../../../btgui/Timing/b3Clock.h", - } - - end -end - -createProject("clew") -createProject("Apple") -createProject("AMD") -createProject("Intel") -createProject("NVIDIA")