Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

This commit is contained in:
erwin.coumans
2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions

View File

@@ -0,0 +1,349 @@
MSTRINGIFY(
int getPosHash(int4 gridPos, __global float4* pParams)
{
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x &= gridDim.x - 1;
gridPos.y &= gridDim.y - 1;
gridPos.z &= gridDim.z - 1;
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
return hash;
}
int4 getGridPos(float4 worldPos, __global float4* pParams)
{
int4 gridPos;
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
return gridPos;
}
// calculate grid hash value for each body using its AABB
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
pos.w = 0.f;
// get address in grid
int4 gridPos = getGridPos(pos, pParams);
int gridHash = getPosHash(gridPos, pParams);
// store grid hash and body index
int2 hashVal;
hashVal.x = gridHash;
hashVal.y = index;
pHash[index] = hashVal;
}
__kernel void kClearCellStart( int numCells,
__global int* pCellStart GUID_ARG)
{
int index = get_global_id(0);
if(index >= numCells)
{
return;
}
pCellStart[index] = -1;
}
__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG)
{
__local int sharedHash[513];
int index = get_global_id(0);
int2 sortedData;
if(index < numObjects)
{
sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
sharedHash[get_local_id(0) + 1] = sortedData.x;
if((index > 0) && (get_local_id(0) == 0))
{
// first thread in block must load neighbor body hash
sharedHash[0] = pHash[index-1].x;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(index < numObjects)
{
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
{
cellStart[sortedData.x] = index;
}
}
}
int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
}
void findPairsInCell( int numObjects,
int4 gridPos,
int index,
__global int2* pHash,
__global int* pCellStart,
__global float4* pAABB,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global float4* pParams)
{
int4 pGridDim = *((__global int4*)(pParams + 1));
int maxBodiesPerCell = pGridDim.w;
int gridHash = getPosHash(gridPos, pParams);
// get start of bucket for this cell
int bucketStart = pCellStart[gridHash];
if (bucketStart == -1)
{
return; // cell empty
}
// iterate over bodies in this cell
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = pAABB[unsorted_indx*2 + 0];
float4 max0 = pAABB[unsorted_indx*2 + 1];
int handleIndex = as_int(min0.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
int curr_max = start_curr_next.x - start - 1;
int bucketEnd = bucketStart + maxBodiesPerCell;
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
{
int2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
int unsorted_indx2 = cellData.y;
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
{
float4 min1 = pAABB[unsorted_indx2*2 + 0];
float4 max1 = pAABB[unsorted_indx2*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
int handleIndex2 = as_int(min1.w);
int k;
for(k = 0; k < curr; k++)
{
int old_pair = pPairBuff[start+k] & (~0x60000000);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= 0x40000000;
break;
}
}
if(k == curr)
{
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
curr++;
}
}
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = curr;
pPairBuffStartCurr[handleIndex] = newStartCurr;
return;
}
__kernel void kFindOverlappingPairs( int numObjects,
__global float4* pAABB,
__global int2* pHash,
__global int* pCellStart,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global float4* pParams GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 bbMin = pAABB[unsorted_indx*2 + 0];
float4 bbMax = pAABB[unsorted_indx*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
// get address in grid
int4 gridPosA = getGridPos(pos, pParams);
int4 gridPosB;
// examine only neighbouring cells
for(int z=-1; z<=1; z++)
{
gridPosB.z = gridPosA.z + z;
for(int y=-1; y<=1; y++)
{
gridPosB.y = gridPosA.y + y;
for(int x=-1; x<=1; x++)
{
gridPosB.x = gridPosA.x + x;
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);
}
}
}
}
__kernel void kFindPairsLarge( int numObjects,
__global float4* pAABB,
__global int2* pHash,
__global int* pCellStart,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
uint numLarge GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = pAABB[unsorted_indx*2 + 0];
float4 max0 = pAABB[unsorted_indx*2 + 1];
int handleIndex = as_int(min0.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
int curr_max = start_curr_next.x - start - 1;
for(uint i = 0; i < numLarge; i++)
{
int indx2 = numObjects + i;
float4 min1 = pAABB[indx2*2 + 0];
float4 max1 = pAABB[indx2*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
int k;
int handleIndex2 = as_int(min1.w);
for(k = 0; k < curr; k++)
{
int old_pair = pPairBuff[start+k] & (~0x60000000);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= 0x40000000;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = curr;
pPairBuffStartCurr[handleIndex] = newStartCurr;
return;
}
__kernel void kComputePairCacheChanges( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global float4* pAABB GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int *pInp = pPairBuff + start;
int num_changes = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & 0x40000000))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
__kernel void kSqueezeOverlappingPairBuff( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global int* pPairOut,
__global float4* pAABB GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int* pInp = pPairBuff + start;
__global int* pOut = pPairOut + pPairScan[index+1];
__global int* pOut2 = pInp;
int num = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & 0x40000000))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & 0x60000000)
{
*pOut2 = (*pInp) & (~0x60000000);
pOut2++;
num++;
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = num;
pPairBuffStartCurr[handleIndex] = newStartCurr;
}
);

View File

@@ -0,0 +1,697 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "../basic_initialize/btOpenCLUtils.h"
#include "bt3dGridBroadphaseOCL.h"
#include <stdio.h>
#include <string.h>
#include "Adl/Adl.h"
#include <AdlPrimitives/Scan/PrefixScan.h>
#include <AdlPrimitives/Sort/RadixSort32.h>
#include <AdlPrimitives/Sort/RadixSort.h>
#define ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
#define GRID_OCL_PATH "..\\..\\opencl\\3dGridBroadphase\\Shared\\bt3dGridBroadphaseOCL.cl"
#define MSTRINGIFY(A) #A
static const char* spProgramSource =
#include "bt3dGridBroadphaseOCL.cl"
adl::PrefixScan<adl::TYPE_CL>::Data* gData1=0;
adl::Buffer<unsigned int>* m_srcClBuffer=0;
struct MySortData
{
int key;
int value;
};
adl::RadixSort32<adl::TYPE_CL>::Data* dataC = 0;
adl::RadixSort<adl::TYPE_HOST>::Data* dataHost = 0;
static unsigned int infElem = 0x2fffffff;
static unsigned int zeroEl = 0;
static unsigned int minusOne= -1;
bt3dGridBroadphaseOCL::bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell,
cl_context context, cl_device_id device, cl_command_queue queue,
adl::DeviceCL* deviceCL
) :
btGpu3DGridBroadphase(overlappingPairCache, cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize, maxSmallProxiesPerCell)
{
initCL(context, device, queue);
allocateBuffers();
prefillBuffers();
initKernels();
//create an Adl device host and OpenCL device
adl::DeviceUtils::Config cfg;
m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
m_ownsDevice = false;
if (!deviceCL)
{
m_ownsDevice = true;
deviceCL = new adl::DeviceCL;
deviceCL->m_context = context;
deviceCL->m_deviceIdx = device;
deviceCL->m_commandQueue = queue;
deviceCL->m_kernelManager = new adl::KernelManager;
}
m_deviceCL = deviceCL;
int minSize = 256*1024;
int maxSortBuffer = maxSmallProxies < minSize ? minSize :maxSmallProxies;
m_srcClBuffer = new adl::Buffer<unsigned int> (m_deviceCL,maxSmallProxies+2);
m_srcClBuffer->write(&zeroEl,1,0);
//m_srcClBuffer->write(&infElem,maxSmallProxies,0);
m_srcClBuffer->write(&infElem,1,maxSmallProxies);
m_srcClBuffer->write(&zeroEl,1,maxSmallProxies+1);
m_deviceCL->waitForCompletion();
gData1 = adl::PrefixScan<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2,adl::PrefixScanBase::EXCLUSIVE );
dataHost = adl::RadixSort<adl::TYPE_HOST>::allocate( m_deviceHost, maxSmallProxies+2 );
dataC = adl::RadixSort32<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2 );
}
bt3dGridBroadphaseOCL::~bt3dGridBroadphaseOCL()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
adl::RadixSort<adl::TYPE_HOST>::deallocate(dataHost);
adl::PrefixScan<adl::TYPE_CL>::deallocate(gData1);
adl::RadixSort32<adl::TYPE_CL>::deallocate(dataC);
adl::DeviceUtils::deallocate(m_deviceHost);
delete m_srcClBuffer;
if (m_ownsDevice)
{
delete m_deviceCL->m_kernelManager;
delete m_deviceCL;
}
}
#ifdef CL_PLATFORM_MINI_CL
// there is a problem with MSVC9 : static constructors are not called if variables defined in library and are not used
// looks like it is because of optimization
// probably this will happen with other compilers as well
// so to make it robust, register kernels again (it is safe)
#define MINICL_DECLARE(a) extern "C" void a();
MINICL_DECLARE(kCalcHashAABB)
MINICL_DECLARE(kClearCellStart)
MINICL_DECLARE(kFindCellStart)
MINICL_DECLARE(kFindOverlappingPairs)
MINICL_DECLARE(kFindPairsLarge)
MINICL_DECLARE(kComputePairCacheChanges)
MINICL_DECLARE(kSqueezeOverlappingPairBuff)
#undef MINICL_DECLARE
#endif
void bt3dGridBroadphaseOCL::initCL(cl_context context, cl_device_id device, cl_command_queue queue)
{
#ifdef CL_PLATFORM_MINI_CL
// call constructors here
MINICL_REGISTER(kCalcHashAABB)
MINICL_REGISTER(kClearCellStart)
MINICL_REGISTER(kFindCellStart)
MINICL_REGISTER(kFindOverlappingPairs)
MINICL_REGISTER(kFindPairsLarge)
MINICL_REGISTER(kComputePairCacheChanges)
MINICL_REGISTER(kSqueezeOverlappingPairBuff)
#endif
cl_int ciErrNum;
btAssert(context);
m_cxMainContext = context;
btAssert(device);
m_cdDevice = device;
btAssert(queue);
m_cqCommandQue = queue;
//adl::Kernel kern = m_deviceCL->getKernel(fileName,funcName,options,src);
m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext,m_cdDevice,spProgramSource, &ciErrNum,"-DGUID_ARG=""""",GRID_OCL_PATH);
printf("OK\n");
}
void bt3dGridBroadphaseOCL::initKernels()
{
initKernel(GRID3DOCL_KERNEL_CALC_HASH_AABB, "kCalcHashAABB");
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 3, sizeof(cl_mem),(void*)&m_dBpParams);
initKernel(GRID3DOCL_KERNEL_CLEAR_CELL_START, "kClearCellStart");
setKernelArg(GRID3DOCL_KERNEL_CLEAR_CELL_START, 1, sizeof(cl_mem),(void*)&m_dCellStart);
initKernel(GRID3DOCL_KERNEL_FIND_CELL_START, "kFindCellStart");
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 1, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 2, sizeof(cl_mem),(void*)&m_dCellStart);
initKernel(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, "kFindOverlappingPairs");
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 3, sizeof(cl_mem),(void*)&m_dCellStart);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 6, sizeof(cl_mem),(void*)&m_dBpParams);
initKernel(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, "kFindPairsLarge");
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 3, sizeof(cl_mem),(void*)&m_dCellStart);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
initKernel(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, "kComputePairCacheChanges");
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 4, sizeof(cl_mem),(void*)&m_dAABB);
initKernel(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, "kSqueezeOverlappingPairBuff");
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 4, sizeof(cl_mem),(void*)&m_dPairsChanged);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 5, sizeof(cl_mem),(void*)&m_dAABB);
}
void bt3dGridBroadphaseOCL::allocateBuffers()
{
cl_int ciErrNum;
unsigned int memSize;
// current version of bitonic sort works for power of 2 arrays only, so ...
m_hashSize = 1;
for(int bit = 1; bit < 32; bit++)
{
if(m_hashSize >= m_maxHandles)
{
break;
}
m_hashSize <<= 1;
}
memSize = m_hashSize * 2 * sizeof(unsigned int);
if (memSize < 1024*1024)
memSize = 1024*1024;
m_dBodiesHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_numCells * sizeof(unsigned int);
m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
m_dPairBuff = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = (m_maxHandles * 2 + 1) * sizeof(unsigned int);
m_dPairBuffStartCurr = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
memSize = numAABB * sizeof(float) * 4 * 2;
m_dAABB = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = (m_maxHandles + 2) * sizeof(unsigned int);
m_dPairScanChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
m_dPairsChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
m_dPairsContiguous = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = 3 * 4 * sizeof(float);
m_dBpParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::prefillBuffers()
{
memset(m_hBodiesHash, 0xFF, m_maxHandles*2*sizeof(unsigned int));
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_maxHandles * 2 * sizeof(unsigned int));
// now fill the rest (bitonic sorting works with size == pow of 2)
int remainder = m_hashSize - m_maxHandles;
if(remainder)
{
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, remainder * 2 * sizeof(unsigned int), m_maxHandles * 2 * sizeof(unsigned int), 0);
}
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
}
void bt3dGridBroadphaseOCL::initKernel(int kernelId, char* pName)
{
cl_int ciErrNum;
cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
size_t wgSize;
ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
m_kernels[kernelId].m_Id = kernelId;
m_kernels[kernelId].m_kernel = kernel;
m_kernels[kernelId].m_name = pName;
m_kernels[kernelId].m_workgroupSize = (int)wgSize;
return;
}
void bt3dGridBroadphaseOCL::runKernelWithWorkgroupSize(int kernelId, int globalSize)
{
if(globalSize <= 0)
{
return;
}
cl_kernel kernelFunc = m_kernels[kernelId].m_kernel;
cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
int workgroupSize = btMin(64,m_kernels[kernelId].m_workgroupSize);
if(workgroupSize <= 0)
{ // let OpenCL library calculate workgroup size
size_t globalWorkSize[2];
globalWorkSize[0] = globalSize;
globalWorkSize[1] = 1;
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 );
}
else
{
size_t localWorkSize[2], globalWorkSize[2];
//workgroupSize = btMin(workgroupSize, globalSize);
int num_t = globalSize / workgroupSize;
int num_g = num_t * workgroupSize;
if(num_g < globalSize)
{
num_t++;
}
localWorkSize[0] = workgroupSize;
globalWorkSize[0] = num_t * workgroupSize;
localWorkSize[1] = 1;
globalWorkSize[1] = 1;
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 );
}
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
ciErrNum = clFlush(m_cqCommandQue);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr)
{
cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs, int hostOffs)
{
if (size)
{
cl_int ciErrNum;
char* pHost = (char*)host + hostOffs;
ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
}
void bt3dGridBroadphaseOCL::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs)
{
if (size)
{
cl_int ciErrNum;
char* pHost = (char*)host + hostOffs;
ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
}
//
// overrides
//
void bt3dGridBroadphaseOCL::prepareAABB()
{
btGpu3DGridBroadphase::prepareAABB();
copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
return;
}
void bt3dGridBroadphaseOCL::setParameters(bt3DGridBroadphaseParams* hostParams)
{
btGpu3DGridBroadphase::setParameters(hostParams);
struct btParamsBpOCL
{
float m_invCellSize[4];
int m_gridSize[4];
};
btParamsBpOCL hParams;
hParams.m_invCellSize[0] = m_params.m_invCellSizeX;
hParams.m_invCellSize[1] = m_params.m_invCellSizeY;
hParams.m_invCellSize[2] = m_params.m_invCellSizeZ;
hParams.m_invCellSize[3] = 0.f;
hParams.m_gridSize[0] = m_params.m_gridSizeX;
hParams.m_gridSize[1] = m_params.m_gridSizeY;
hParams.m_gridSize[2] = m_params.m_gridSizeZ;
hParams.m_gridSize[3] = m_params.m_maxBodiesPerCell;
copyArrayToDevice(m_dBpParams, &hParams, sizeof(btParamsBpOCL));
return;
}
void bt3dGridBroadphaseOCL::calcHashAABB()
{
BT_PROFILE("calcHashAABB");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CALC_HASH_AABB, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
#else
btGpu3DGridBroadphase::calcHashAABB();
#endif
return;
}
void bt3dGridBroadphaseOCL::sortHash()
{
BT_PROFILE("sortHash");
#ifdef CL_PLATFORM_MINI_CL
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
btGpu3DGridBroadphase::sortHash();
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#else
//#define USE_HOST
#ifdef USE_HOST
copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
//adl::Buffer<unsigned int> keysIn,keysOut,valuesIn,valuesOut;
///adl::RadixSort32<adl::TYPE_CL>::execute(dataC,keysIn,keysOut,valuesIn,valuesOut,m_numHandles);
adl::HostBuffer<adl::SortData> inoutHost;
inoutHost.m_device = m_deviceHost;
inoutHost.m_ptr = (adl::SortData*)m_hBodiesHash;
inoutHost.m_size = m_numHandles;
adl::RadixSort<adl::TYPE_HOST>::execute(dataHost, inoutHost,m_numHandles);
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#else
{
clFinish(m_cqCommandQue);
BT_PROFILE("RadixSort32::execute");
adl::Buffer<adl::SortData> inout;
inout.m_device = this->m_deviceCL;
inout.m_size = m_numHandles;
inout.m_ptr = (adl::SortData*)m_dBodiesHash;
int actualHandles = m_numHandles;
int dataAlignment = adl::RadixSort32<adl::TYPE_CL>::DATA_ALIGNMENT;
if (actualHandles%dataAlignment)
{
actualHandles += dataAlignment-(actualHandles%dataAlignment);
}
adl::RadixSort32<adl::TYPE_CL>::execute(dataC,inout, actualHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
}
{
//BT_PROFILE("copyArrayFromDevice");
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
}
#endif //USE_HOST
#endif
return;
}
void bt3dGridBroadphaseOCL::findCellStart()
{
#if 1
BT_PROFILE("findCellStart");
#if defined(CL_PLATFORM_MINI_CL)
btGpu3DGridBroadphase::findCellStart();
copyArrayToDevice(m_dCellStart, m_hCellStart, m_numCells * sizeof(unsigned int));
#else
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CLEAR_CELL_START, m_numCells);
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_CELL_START, m_numHandles);
#endif
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findCellStart();
#endif
return;
}
void bt3dGridBroadphaseOCL::findOverlappingPairs()
{
#if 1
BT_PROFILE("findOverlappingPairs");
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findOverlappingPairs();
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
#endif
return;
}
void bt3dGridBroadphaseOCL::findPairsLarge()
{
BT_PROFILE("findPairsLarge");
#if 1
if(m_numLargeHandles)
{
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 6, sizeof(int),(void*)&m_numLargeHandles);
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, m_numHandles);
}
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findPairsLarge();
#endif
return;
}
void bt3dGridBroadphaseOCL::computePairCacheChanges()
{
BT_PROFILE("computePairCacheChanges");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
copyArrayFromDevice( m_hPairScanChanged,m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#else
btGpu3DGridBroadphase::computePairCacheChanges();
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#endif
return;
}
extern cl_device_type deviceType;
void bt3dGridBroadphaseOCL::scanOverlappingPairBuff(bool copyToCpu)
{
//Intel/CPU version doesn't handlel Adl scan well
#if 0
{
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
btGpu3DGridBroadphase::scanOverlappingPairBuff();
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
m_numPrefixSum = m_hPairScanChanged[m_numHandles+1];
clFinish(m_cqCommandQue);
//memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
}
#else
{
// copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
// btGpu3DGridBroadphase::scanOverlappingPairBuff();
adl::Buffer<unsigned int> destBuffer;
{
BT_PROFILE("copy GPU->GPU");
destBuffer.m_ptr = (unsigned int*)m_dPairScanChanged;
destBuffer.m_device = m_deviceCL;
destBuffer.m_size = sizeof(unsigned int)*(m_numHandles+2);
m_deviceCL->copy(m_srcClBuffer, &destBuffer,m_numHandles,1,1);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
}
{
BT_PROFILE("PrefixScan");
adl::PrefixScan<adl::TYPE_CL>::execute(gData1,*m_srcClBuffer,destBuffer, m_numHandles+2,&m_numPrefixSum);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
//if (m_numPrefixSum>0x1000)
// {
// printf("error m_numPrefixSum==%d\n",m_numPrefixSum);
// }
}
#if 0
unsigned int* verifyhPairScanChanged = new unsigned int[m_maxHandles + 2];
memset(verifyhPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
copyArrayFromDevice(verifyhPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
clFinish(m_cqCommandQue);
/*for (int i=0;i<m_numHandles+2;i++)
{
if (verifyhPairScanChanged[i] != m_hPairScanChanged[i])
{
printf("hello!\n");
}
}
*/
#endif
if (1)
{
//the data
if (copyToCpu)
{
BT_PROFILE("copy GPU -> CPU");
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
}
}
}
#endif
}
void bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff()
{
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, m_numHandles);
// btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScanChanged, m_dPairsChanged, m_dAABB, m_numHandles);
//copyArrayFromDevice(m_hPairsChanged, m_dPairsChanged, sizeof(unsigned int) * m_numPrefixSum);//m_hPairScanChanged[m_numHandles+1]); //gSum
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::squeezeOverlappingPairBuff();
#endif
return;
}
void bt3dGridBroadphaseOCL::resetPool(btDispatcher* dispatcher)
{
btGpu3DGridBroadphase::resetPool(dispatcher);
prefillBuffers();
}

View File

@@ -0,0 +1,146 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT3DGRIDBROADPHASEOCL_H
#define BT3DGRIDBROADPHASEOCL_H
#ifdef __APPLE__
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <MiniCL/cl.h>
#endif
//CL_PLATFORM_MINI_CL could be defined in build system
#else
//#include <GL/glew.h>
// standard utility and system includes
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <CL/cl.h>
#endif
// Extra CL/GL include
//#include <CL/cl_gl.h>
#endif //__APPLE__
namespace adl
{
struct Device;
struct DeviceCL;
};
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "btGpu3DGridBroadphaseSharedTypes.h"
#include "btGpu3DGridBroadphase.h"
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); }
enum
{
GRID3DOCL_KERNEL_CALC_HASH_AABB = 0,
GRID3DOCL_KERNEL_CLEAR_CELL_START,
GRID3DOCL_KERNEL_FIND_CELL_START,
GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS,
GRID3DOCL_KERNEL_FIND_PAIRS_LARGE,
GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES,
GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF,
GRID3DOCL_KERNEL_TOTAL
};
struct bt3dGridOCLKernelInfo
{
int m_Id;
cl_kernel m_kernel;
char* m_name;
int m_workgroupSize;
};
///The bt3dGridBroadphaseOCL uses OpenCL-capable GPU to compute overlapping pairs
class bt3dGridBroadphaseOCL : public btGpu3DGridBroadphase
{
protected:
int m_hashSize;
cl_context m_cxMainContext;
cl_device_id m_cdDevice;
cl_command_queue m_cqCommandQue;
cl_program m_cpProgram;
bt3dGridOCLKernelInfo m_kernels[GRID3DOCL_KERNEL_TOTAL];
// data buffers
cl_mem m_dBodiesHash;
cl_mem m_dCellStart;
cl_mem m_dPairBuff;
cl_mem m_dPairBuffStartCurr;
public:
cl_mem m_dAABB;
protected:
cl_mem m_dPairScanChanged;
cl_mem m_dPairsChanged;
cl_mem m_dPairsContiguous;
cl_mem m_dBpParams;
adl::Device* m_deviceHost;
adl::DeviceCL* m_deviceCL;
bool m_ownsDevice;
public:
unsigned int m_numPrefixSum;
bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell = 8,
cl_context context = NULL,
cl_device_id device = NULL,
cl_command_queue queue = NULL,
adl::DeviceCL* deviceCL = 0
);
virtual ~bt3dGridBroadphaseOCL();
protected:
void initCL(cl_context context, cl_device_id device, cl_command_queue queue);
void initKernels();
void allocateBuffers();
void prefillBuffers();
void initKernel(int kernelId, char* pName);
void allocateArray(void** devPtr, unsigned int size);
void freeArray(void* devPtr);
void runKernelWithWorkgroupSize(int kernelId, int globalSize);
void setKernelArg(int kernelId, int argNum, int argSize, void* argPtr);
void copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs = 0, int hostOffs = 0);
void copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs = 0, int devOffs = 0);
// overrides
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
virtual void squeezeOverlappingPairBuff();
virtual void resetPool(btDispatcher* dispatcher);
};
#endif //BT3DGRIDBROADPHASEOCL_H

View File

@@ -0,0 +1,626 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
#include "btGpuDefines.h"
#include "btGpuUtilsSharedDefs.h"
#include "btGpuUtilsSharedCode.h"
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btGpuDefines.h"
#include "btGpuUtilsSharedDefs.h"
#include "btGpu3DGridBroadphaseSharedDefs.h"
#include "btGpu3DGridBroadphase.h"
#include <string.h> //for memset
#include <stdio.h>
static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell) :
btSimpleBroadphase(maxSmallProxies,
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
m_bInitialized(false),
m_numBodies(0)
{
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
maxSmallProxySize, maxBodiesPerCell);
}
btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell) :
btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
m_bInitialized(false),
m_numBodies(0)
{
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
maxSmallProxySize, maxBodiesPerCell);
}
btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
}
// returns 2^n : 2^(n+1) > val >= 2^n
int btGpu3DGridBroadphase::getFloorPowOfTwo(int val)
{
int mask = 0x40000000;
for(int k = 0; k < 30; k++, mask >>= 1)
{
if(mask & val)
{
break;
}
}
return mask;
}
void btGpu3DGridBroadphase::_initialize( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell)
{
// set various paramerers
m_ownsPairCache = true;
m_params.m_gridSizeX = getFloorPowOfTwo(gridSizeX);
m_params.m_gridSizeY = getFloorPowOfTwo(gridSizeY);
m_params.m_gridSizeZ = getFloorPowOfTwo(gridSizeZ);
m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
m_numCells = m_params.m_numCells;
m_params.m_invCellSizeX = btScalar(1.f) / cellSize[0];
m_params.m_invCellSizeY = btScalar(1.f) / cellSize[1];
m_params.m_invCellSizeZ = btScalar(1.f) / cellSize[2];
m_maxRadius = maxSmallProxySize * btScalar(0.5f);
m_params.m_numBodies = m_numBodies;
m_params.m_maxBodiesPerCell = maxBodiesPerCell;
m_numLargeHandles = 0;
m_maxLargeHandles = maxLargeProxies;
m_maxPairsPerBody = maxPairsPerBody;
m_LastLargeHandleIndex = -1;
assert(!m_bInitialized);
// allocate host storage
m_hBodiesHash = new unsigned int[m_maxHandles * 2];
memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
m_hCellStart = new unsigned int[m_params.m_numCells];
memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
// --------------- for now, init with m_maxPairsPerBody for each body
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(int i = 1; i <= m_maxHandles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
//----------------
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
m_hPairScanChanged = new unsigned int[m_maxHandles + 2];
memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
m_hPairsChanged = new unsigned int[m_maxHandles * m_maxPairsPerBody];
memset(m_hPairsChanged,0,sizeof(int)*(m_maxHandles * m_maxPairsPerBody));
m_hAllOverlappingPairs= new MyUint2[m_maxHandles * m_maxPairsPerBody];
memset(m_hAllOverlappingPairs,0,sizeof(MyUint2)*(m_maxHandles * m_maxPairsPerBody));
// large proxies
// allocate handles buffer and put all handles on free list
m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
m_firstFreeLargeHandle = 0;
{
for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
{
m_pLargeHandles[i].SetNextFree(i + 1);
m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
}
m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
}
// debug data
m_numPairsAdded = 0;
m_numOverflows = 0;
m_bInitialized = true;
}
void btGpu3DGridBroadphase::_finalize()
{
assert(m_bInitialized);
delete [] m_hBodiesHash;
delete [] m_hCellStart;
delete [] m_hPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScanChanged;
delete [] m_hPairsChanged;
delete [] m_hAllOverlappingPairs;
btAlignedFree(m_pLargeHandlesRawPtr);
m_bInitialized = false;
}
void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
{
btSimpleBroadphase::calculateOverlappingPairs(dispatcher);
if(m_numHandles <= 0)
{
BT_PROFILE("addLarge2LargePairsToCache");
addLarge2LargePairsToCache(dispatcher);
return;
}
// update constants
{
BT_PROFILE("setParameters");
setParameters(&m_params);
}
// prepare AABB array
{
BT_PROFILE("prepareAABB");
prepareAABB();
}
// calculate hash
{
BT_PROFILE("calcHashAABB");
calcHashAABB();
}
{
BT_PROFILE("sortHash");
// sort bodies based on hash
sortHash();
}
// find start of each cell
{
BT_PROFILE("findCellStart");
findCellStart();
}
{
BT_PROFILE("findOverlappingPairs");
// findOverlappingPairs (small/small)
findOverlappingPairs();
}
// findOverlappingPairs (small/large)
{
BT_PROFILE("findPairsLarge");
findPairsLarge();
}
// add pairs to CPU cache
{
BT_PROFILE("computePairCacheChanges");
computePairCacheChanges();
}
{
BT_PROFILE("scanOverlappingPairBuff");
scanOverlappingPairBuff();
}
{
BT_PROFILE("squeezeOverlappingPairBuff");
squeezeOverlappingPairBuff();
}
{
BT_PROFILE("addPairsToCache");
addPairsToCache(dispatcher);
}
// find and add large/large pairs to CPU cache
{
BT_PROFILE("addLarge2LargePairsToCache");
addLarge2LargePairsToCache(dispatcher);
}
return;
}
void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
{
m_numPairsAdded = 0;
m_numPairsRemoved = 0;
for(int i = 0; i < m_numHandles; i++)
{
unsigned int num = m_hPairScanChanged[i+2] - m_hPairScanChanged[i+1];
if(!num)
{
continue;
}
unsigned int* pInp = m_hPairsChanged + m_hPairScanChanged[i+1];
unsigned int index0 = m_hAABB[i * 2].uw;
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
for(unsigned int j = 0; j < num; j++)
{
unsigned int indx1_s = pInp[j];
unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
btSimpleBroadphaseProxy* proxy1;
if(index1 < (unsigned int)m_maxHandles)
{
proxy1 = &m_pHandles[index1];
}
else
{
index1 -= m_maxHandles;
btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
proxy1 = &m_pLargeHandles[index1];
}
if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
m_numPairsAdded++;
}
else
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
m_numPairsRemoved++;
}
}
}
}
btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
{
btBroadphaseProxy* proxy;
bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
if(bIsLarge)
{
if (m_numLargeHandles >= m_maxLargeHandles)
{
///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
btAssert(0);
return 0; //should never happen, but don't let the game crash ;-)
}
btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
int newHandleIndex = allocLargeHandle();
proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
}
else
{
proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
}
return proxy;
}
void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
{
bool bIsLarge = isLargeProxy(proxy);
if(bIsLarge)
{
btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
freeLargeHandle(proxy0);
m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
}
else
{
btSimpleBroadphase::destroyProxy(proxy, dispatcher);
}
return;
}
void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
{
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(int i = 1; i <= m_maxHandles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
}
bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
{
btVector3 diag = aabbMax - aabbMin;
///use the bounding sphere radius of this bounding box, to include rotation
btScalar radius = diag.length() * btScalar(0.5f);
return (radius > m_maxRadius);
}
bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
{
return (proxy->getUid() >= (m_maxHandles+2));
}
void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
{
int i,j;
if (m_numLargeHandles <= 0)
{
return;
}
int new_largest_index = -1;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
new_largest_index = i;
for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
{
btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
btAssert(proxy0 != proxy1);
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
if(aabbOverlap(p0,p1))
{
if (!m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
}
}
else
{
if(m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
}
m_LastLargeHandleIndex = new_largest_index;
return;
}
void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
{
btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
for (int i=0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
rayCallback.process(proxy);
}
}
//
// overrides for CPU version
//
void btGpu3DGridBroadphase::prepareAABB()
{
BT_PROFILE("prepareAABB");
bt3DGrid3F1U* pBB = m_hAABB;
int i;
int new_largest_index = -1;
unsigned int num_small = 0;
for(i = 0; i <= m_LastHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_small;
pBB++;
num_small++;
}
m_LastHandleIndex = new_largest_index;
new_largest_index = -1;
unsigned int num_large = 0;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i + m_maxHandles;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_large + m_maxHandles;
pBB++;
num_large++;
}
m_LastLargeHandleIndex = new_largest_index;
// paranoid checks
btAssert(num_small == m_numHandles);
btAssert(num_large == m_numLargeHandles);
return;
}
void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
{
s3DGridBroadphaseParams = *hostParams;
return;
}
void btGpu3DGridBroadphase::calcHashAABB()
{
BT_PROFILE("bt3DGrid_calcHashAABB");
btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
return;
}
void btGpu3DGridBroadphase::sortHash()
{
class bt3DGridHashKey
{
public:
unsigned int hash;
unsigned int index;
void quickSort(bt3DGridHashKey* pData, int lo, int hi)
{
int i=lo, j=hi;
bt3DGridHashKey x = pData[(lo+hi)/2];
do
{
while(pData[i].hash > x.hash) i++;
while(x.hash > pData[j].hash) j--;
if(i <= j)
{
bt3DGridHashKey t = pData[i];
pData[i] = pData[j];
pData[j] = t;
i++; j--;
}
} while(i <= j);
if(lo < j) pData->quickSort(pData, lo, j);
if(i < hi) pData->quickSort(pData, i, hi);
}
};
BT_PROFILE("bt3DGrid_sortHash");
bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
pHash->quickSort(pHash, 0, m_numHandles - 1);
return;
}
void btGpu3DGridBroadphase::findCellStart()
{
BT_PROFILE("bt3DGrid_findCellStart");
btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
return;
}
void btGpu3DGridBroadphase::findOverlappingPairs()
{
BT_PROFILE("bt3DGrid_findOverlappingPairs");
btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
return;
}
void btGpu3DGridBroadphase::findPairsLarge()
{
BT_PROFILE("bt3DGrid_findPairsLarge");
btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
return;
}
void btGpu3DGridBroadphase::computePairCacheChanges()
{
BT_PROFILE("bt3DGrid_computePairCacheChanges");
btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hAABB, m_numHandles);
return;
}
void btGpu3DGridBroadphase::scanOverlappingPairBuff(bool copyToCpu)
{
BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
unsigned int sum = 0;
m_hPairScanChanged[0]=0;
for(int i = 0; i <= m_numHandles+1; i++)
{
unsigned int delta = m_hPairScanChanged[i];
m_hPairScanChanged[i] = sum;
sum += delta;
}
return;
}
void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
{
BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
//btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hPairsChanged, m_hAABB, m_numHandles);
btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, (unsigned int*)m_hAllOverlappingPairs, m_hAABB, m_numHandles);
return;
}
#include "btGpu3DGridBroadphaseSharedCode.h"

View File

@@ -0,0 +1,154 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASE_H
#define BTGPU3DGRIDBROADPHASE_H
//----------------------------------------------------------------------------------------
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "btGpu3DGridBroadphaseSharedTypes.h"
struct MyUint2
{
int x;
int y;
};
//----------------------------------------------------------------------------------------
///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
class btGpu3DGridBroadphase : public btSimpleBroadphase
{
protected:
bool m_bInitialized;
unsigned int m_numBodies;
unsigned int m_numCells;
unsigned int m_maxPairsPerBody;
unsigned int m_maxBodiesPerCell;
bt3DGridBroadphaseParams m_params;
btScalar m_maxRadius;
// CPU data
unsigned int* m_hBodiesHash;
unsigned int* m_hCellStart;
unsigned int* m_hPairBuffStartCurr;
bt3DGrid3F1U* m_hAABB;
unsigned int* m_hPairBuff;
unsigned int* m_hPairScanChanged;
unsigned int* m_hPairsChanged;
MyUint2* m_hAllOverlappingPairs;
// large proxies
int m_numLargeHandles;
int m_maxLargeHandles;
int m_LastLargeHandleIndex;
btSimpleBroadphaseProxy* m_pLargeHandles;
void* m_pLargeHandlesRawPtr;
int m_firstFreeLargeHandle;
int allocLargeHandle()
{
btAssert(m_numLargeHandles < m_maxLargeHandles);
int freeLargeHandle = m_firstFreeLargeHandle;
m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
m_numLargeHandles++;
if(freeLargeHandle > m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex = freeLargeHandle;
}
return freeLargeHandle;
}
void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
{
int handle = int(proxy - m_pLargeHandles);
btAssert((handle >= 0) && (handle < m_maxHandles));
if(handle == m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex--;
}
proxy->SetNextFree(m_firstFreeLargeHandle);
m_firstFreeLargeHandle = handle;
proxy->m_clientObject = 0;
m_numLargeHandles--;
}
bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
bool isLargeProxy(btBroadphaseProxy* proxy);
// debug
unsigned int m_numPairsAdded;
unsigned int m_numPairsRemoved;
unsigned int m_numOverflows;
//
public:
virtual int getNumOverlap()
{
return m_hPairScanChanged[m_numHandles+1];
}
virtual MyUint2* getOverlap()
{
return m_hAllOverlappingPairs;
}
// NOTE : for better results gridSizeX, gridSizeY and gridSizeZ should be powers of 2
btGpu3DGridBroadphase(const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell = 8);
btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell = 8);
virtual ~btGpu3DGridBroadphase();
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
virtual void resetPool(btDispatcher* dispatcher);
static int getFloorPowOfTwo(int val); // returns 2^n : 2^(n+1) > val >= 2^n
protected:
void _initialize( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell);
void _finalize();
void addPairsToCache(btDispatcher* dispatcher);
void addLarge2LargePairsToCache(btDispatcher* dispatcher);
// overrides for CPU version
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
virtual void squeezeOverlappingPairBuff();
};
//----------------------------------------------------------------------------------------
#endif //BTGPU3DGRIDBROADPHASE_H
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -0,0 +1,428 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// calculate position in uniform grid
BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
{
int3 gridPos;
gridPos.x = (int)floor(p.x * BT_GPU_params.m_invCellSizeX) & (BT_GPU_params.m_gridSizeX - 1);
gridPos.y = (int)floor(p.y * BT_GPU_params.m_invCellSizeY) & (BT_GPU_params.m_gridSizeY - 1);
gridPos.z = (int)floor(p.z * BT_GPU_params.m_invCellSizeZ) & (BT_GPU_params.m_gridSizeZ - 1);
return gridPos;
} // bt3DGrid_calcGridPos()
//----------------------------------------------------------------------------------------
// calculate address in grid from position (clamping to edges)
BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
{
gridPos.x &= (BT_GPU_params.m_gridSizeX - 1);
gridPos.y &= (BT_GPU_params.m_gridSizeY - 1);
gridPos.z &= (BT_GPU_params.m_gridSizeZ - 1);
return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
} // bt3DGrid_calcGridHash()
//----------------------------------------------------------------------------------------
// calculate grid hash value for each body using its AABB
BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index*2];
bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = bt3DGrid_calcGridPos(pos);
uint gridHash = bt3DGrid_calcGridHash(gridPos);
// store grid hash and body index
pHash[index] = BT_GPU_make_uint2(gridHash, index);
} // calcHashAABBD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
BT_GPU___shared__ uint sharedHash[257];
sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
if((index > 0) && (BT_GPU_threadIdx.x == 0))
{
// first thread in block must load neighbor body hash
volatile uint2 prevData = pHash[index-1];
sharedHash[0] = prevData.x;
}
BT_GPU___syncthreads();
if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
{
cellStart[sortedData.x] = index;
}
} // findCellStartD()
//----------------------------------------------------------------------------------------
BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
{
return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
(min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
} // cudaTestAABBOverlap()
//----------------------------------------------------------------------------------------
BT_GPU___device__ void findPairsInCell( int3 gridPos,
uint index,
uint2* pHash,
uint* pCellStart,
bt3DGrid3F1U* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numBodies)
{
uint gridHash = bt3DGrid_calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
{
return; // cell empty
}
// iterate over bodies in this cell
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
uint unsorted_indx2 = cellData.y;
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
{
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint handleIndex2 = min1.uw;
uint k;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
curr++;
}
}
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
return;
} // findPairsInCell()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = bt3DGrid_calcGridPos(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
}
}
}
} // findOverlappingPairsD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
for(uint i = 0; i < numLarge; i++)
{
uint indx2 = numBodies + i;
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint k;
uint handleIndex2 = min1.uw;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
return;
} // findPairsLargeD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
if(((*pInp) & BT_3DGRID_PAIR_ANY_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
} // computePairCacheChangesD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
uint2* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint2* pOut = pPairOut + pPairScan[index+1];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
{
pOut->x = handleIndex;
pOut->y = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
pOut++;
}
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
} // squeezeOverlappingPairBuffD()
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// E N D O F K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
// execute the kernel
BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
// check if kernel invocation generated an error
BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
} // calcHashAABB()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
} // findCellStart()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
{
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
#endif
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
} // findOverlappingPairs()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
{
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
#endif
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
} // findPairsLarge()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
} // computePairCacheChanges()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint2*)pPairOut,pAABB,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
} // btCuda_squeezeOverlappingPairBuff()
//------------------------------------------------------------------------------------------------
} // extern "C"
//------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------

View File

@@ -0,0 +1,61 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared definitions for GPU-based 3D Grid collision detection broadphase
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
//----------------------------------------------------------------------------------------
#include "btGpu3DGridBroadphaseSharedTypes.h"
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies);
void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
//----------------------------------------------------------------------------------------
} // extern "C"
//----------------------------------------------------------------------------------------
#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H

View File

@@ -0,0 +1,64 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared definitions for GPU-based 3D Grid collision detection broadphase
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
//----------------------------------------------------------------------------------------
#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
#define BT_3DGRID_PAIR_NEW_FLG (0x20000000)
#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
//----------------------------------------------------------------------------------------
struct bt3DGridBroadphaseParams
{
unsigned int m_gridSizeX;
unsigned int m_gridSizeY;
unsigned int m_gridSizeZ;
unsigned int m_numCells;
float m_invCellSizeX;
float m_invCellSizeY;
float m_invCellSizeZ;
unsigned int m_numBodies;
unsigned int m_maxBodiesPerCell;
};
//----------------------------------------------------------------------------------------
struct bt3DGrid3F1U
{
float fx;
float fy;
float fz;
unsigned int uw;
};
//----------------------------------------------------------------------------------------
#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H

View File

@@ -0,0 +1,211 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// definitions for "GPU on CPU" code
#ifndef BT_GPU_DEFINES_H
#define BT_GPU_DEFINES_H
typedef unsigned int uint;
struct int2
{
int x, y;
};
struct uint2
{
unsigned int x, y;
};
struct int3
{
int x, y, z;
};
struct uint3
{
unsigned int x, y, z;
};
struct float4
{
float x, y, z, w;
};
struct float3
{
float x, y, z;
};
#define BT_GPU___device__ inline
#define BT_GPU___devdata__
#define BT_GPU___constant__
#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
#define BT_GPU_params s3DGridBroadphaseParams
#define BT_GPU___mul24(a, b) ((a)*(b))
#define BT_GPU___global__ inline
#define BT_GPU___shared__ static
#define BT_GPU___syncthreads()
#define CUDART_PI_F SIMD_PI
static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
{
uint2 t; t.x = x; t.y = y; return t;
}
#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
static inline int3 bt3dGrid_make_int3(int x, int y, int z)
{
int3 t; t.x = x; t.y = y; t.z = z; return t;
}
#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
static inline float3 bt3dGrid_make_float3(float x, float y, float z)
{
float3 t; t.x = x; t.y = y; t.z = z; return t;
}
#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
static inline float3 bt3dGrid_make_float34(float4 f)
{
float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
}
#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
static inline float3 bt3dGrid_make_float31(float f)
{
float3 t; t.x = t.y = t.z = f; return t;
}
#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
static inline float4 bt3dGrid_make_float42(float3 v, float f)
{
float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
}
#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b)
static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
{
float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
}
#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d)
inline int3 operator+(int3 a, int3 b)
{
return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline float4 operator+(const float4& a, const float4& b)
{
float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
}
inline float4 operator*(const float4& a, float fact)
{
float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
}
inline float4 operator*(float fact, float4& a)
{
return (a * fact);
}
inline float4& operator*=(float4& a, float fact)
{
a = fact * a;
return a;
}
inline float4& operator+=(float4& a, const float4& b)
{
a = a + b;
return a;
}
inline float3 operator+(const float3& a, const float3& b)
{
float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
}
inline float3 operator-(const float3& a, const float3& b)
{
float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
}
static inline float bt3dGrid_dot(float3& a, float3& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z;
}
#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
static inline float bt3dGrid_dot4(float4& a, float4& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
}
#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
{
float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r;
}
#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
inline float3 operator*(const float3& a, float fact)
{
float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
}
inline float3& operator+=(float3& a, const float3& b)
{
a = a + b;
return a;
}
inline float3& operator-=(float3& a, const float3& b)
{
a = a - b;
return a;
}
inline float3& operator*=(float3& a, float fact)
{
a = a * fact;
return a;
}
inline float3 operator-(const float3& v)
{
float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
}
#define BT_GPU_FETCH(a, b) a[b]
#define BT_GPU_FETCH4(a, b) a[b]
#define BT_GPU_PREF(func) btGpu_##func
#define BT_GPU_SAFE_CALL(func) func
#define BT_GPU_Memset memset
#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
#define BT_GPU_BindTexture(a, b, c, d)
#define BT_GPU_UnbindTexture(a)
static uint2 s_blockIdx, s_blockDim, s_threadIdx;
#define BT_GPU_blockIdx s_blockIdx
#define BT_GPU_blockDim s_blockDim
#define BT_GPU_threadIdx s_threadIdx
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
#define BT_GPU_CHECK_ERROR(s)
#endif //BT_GPU_DEFINES_H

View File

@@ -0,0 +1,55 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared code for GPU-based utilities
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// will be compiled by both CPU and CUDA compilers
// file with definitions of BT_GPU_xxx should be included first
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#include "btGpuUtilsSharedDefs.h"
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
//Round a / b to nearest higher integer value
int BT_GPU_PREF(iDivUp)(int a, int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
} // iDivUp()
//----------------------------------------------------------------------------------------
// compute grid and thread block size for a given number of elements
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
{
numThreads = BT_GPU_min(blockSize, n);
numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
} // computeGridSize()
//----------------------------------------------------------------------------------------
} // extern "C"

View File

@@ -0,0 +1,52 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// Shared definitions for GPU-based utilities
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
// file with definitions of BT_GPU_xxx should be included first
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifndef BTGPUUTILSDHAREDDEFS_H
#define BTGPUUTILSDHAREDDEFS_H
extern "C"
{
//Round a / b to nearest higher integer value
int BT_GPU_PREF(iDivUp)(int a, int b);
// compute grid and thread block size for a given number of elements
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
void BT_GPU_PREF(freeArray)(void* devPtr);
void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
} // extern "C"
#endif // BTGPUUTILSDHAREDDEFS_H