Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
@@ -0,0 +1,349 @@
|
||||
|
||||
MSTRINGIFY(
|
||||
|
||||
int getPosHash(int4 gridPos, __global float4* pParams)
|
||||
{
|
||||
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||
gridPos.x &= gridDim.x - 1;
|
||||
gridPos.y &= gridDim.y - 1;
|
||||
gridPos.z &= gridDim.z - 1;
|
||||
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
|
||||
return hash;
|
||||
}
|
||||
|
||||
int4 getGridPos(float4 worldPos, __global float4* pParams)
|
||||
{
|
||||
int4 gridPos;
|
||||
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
|
||||
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
|
||||
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
|
||||
return gridPos;
|
||||
}
|
||||
|
||||
|
||||
// calculate grid hash value for each body using its AABB
|
||||
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||
pos.w = 0.f;
|
||||
// get address in grid
|
||||
int4 gridPos = getGridPos(pos, pParams);
|
||||
int gridHash = getPosHash(gridPos, pParams);
|
||||
// store grid hash and body index
|
||||
int2 hashVal;
|
||||
hashVal.x = gridHash;
|
||||
hashVal.y = index;
|
||||
pHash[index] = hashVal;
|
||||
}
|
||||
|
||||
__kernel void kClearCellStart( int numCells,
|
||||
__global int* pCellStart GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numCells)
|
||||
{
|
||||
return;
|
||||
}
|
||||
pCellStart[index] = -1;
|
||||
}
|
||||
|
||||
__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG)
|
||||
{
|
||||
__local int sharedHash[513];
|
||||
int index = get_global_id(0);
|
||||
int2 sortedData;
|
||||
if(index < numObjects)
|
||||
{
|
||||
sortedData = pHash[index];
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring body's hash value without loading
|
||||
// two hash values per thread
|
||||
sharedHash[get_local_id(0) + 1] = sortedData.x;
|
||||
if((index > 0) && (get_local_id(0) == 0))
|
||||
{
|
||||
// first thread in block must load neighbor body hash
|
||||
sharedHash[0] = pHash[index-1].x;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(index < numObjects)
|
||||
{
|
||||
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||
{
|
||||
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void findPairsInCell( int numObjects,
|
||||
int4 gridPos,
|
||||
int index,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global float4* pAABB,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global float4* pParams)
|
||||
{
|
||||
int4 pGridDim = *((__global int4*)(pParams + 1));
|
||||
int maxBodiesPerCell = pGridDim.w;
|
||||
int gridHash = getPosHash(gridPos, pParams);
|
||||
// get start of bucket for this cell
|
||||
int bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == -1)
|
||||
{
|
||||
return; // cell empty
|
||||
}
|
||||
// iterate over bodies in this cell
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||
int handleIndex = as_int(min0.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
int curr_max = start_curr_next.x - start - 1;
|
||||
int bucketEnd = bucketStart + maxBodiesPerCell;
|
||||
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
|
||||
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
int2 cellData = pHash[index2];
|
||||
if (cellData.x != gridHash)
|
||||
{
|
||||
break; // no longer in same bucket
|
||||
}
|
||||
int unsorted_indx2 = cellData.y;
|
||||
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[unsorted_indx2*2 + 0];
|
||||
float4 max1 = pAABB[unsorted_indx2*2 + 1];
|
||||
if(testAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
int handleIndex2 = as_int(min1.w);
|
||||
int k;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= 0x40000000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = curr;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void kFindOverlappingPairs( int numObjects,
|
||||
__global float4* pAABB,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global float4* pParams GUID_ARG)
|
||||
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 bbMin = pAABB[unsorted_indx*2 + 0];
|
||||
float4 bbMax = pAABB[unsorted_indx*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||
// get address in grid
|
||||
int4 gridPosA = getGridPos(pos, pParams);
|
||||
int4 gridPosB;
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++)
|
||||
{
|
||||
gridPosB.z = gridPosA.z + z;
|
||||
for(int y=-1; y<=1; y++)
|
||||
{
|
||||
gridPosB.y = gridPosA.y + y;
|
||||
for(int x=-1; x<=1; x++)
|
||||
{
|
||||
gridPosB.x = gridPosA.x + x;
|
||||
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void kFindPairsLarge( int numObjects,
|
||||
__global float4* pAABB,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
uint numLarge GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||
int handleIndex = as_int(min0.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
int curr_max = start_curr_next.x - start - 1;
|
||||
for(uint i = 0; i < numLarge; i++)
|
||||
{
|
||||
int indx2 = numObjects + i;
|
||||
float4 min1 = pAABB[indx2*2 + 0];
|
||||
float4 max1 = pAABB[indx2*2 + 1];
|
||||
if(testAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
int k;
|
||||
int handleIndex2 = as_int(min1.w);
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= 0x40000000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = curr;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void kComputePairCacheChanges( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global float4* pAABB GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int *pInp = pPairBuff + start;
|
||||
int num_changes = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & 0x40000000))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
__kernel void kSqueezeOverlappingPairBuff( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global int* pPairOut,
|
||||
__global float4* pAABB GUID_ARG)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int* pInp = pPairBuff + start;
|
||||
__global int* pOut = pPairOut + pPairScan[index+1];
|
||||
__global int* pOut2 = pInp;
|
||||
int num = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & 0x40000000))
|
||||
{
|
||||
*pOut = *pInp;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & 0x60000000)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~0x60000000);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = num;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
);
|
||||
@@ -0,0 +1,697 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
|
||||
#include "bt3dGridBroadphaseOCL.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "Adl/Adl.h"
|
||||
#include <AdlPrimitives/Scan/PrefixScan.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort32.h>
|
||||
#include <AdlPrimitives/Sort/RadixSort.h>
|
||||
|
||||
#define ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
|
||||
#define GRID_OCL_PATH "..\\..\\opencl\\3dGridBroadphase\\Shared\\bt3dGridBroadphaseOCL.cl"
|
||||
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
|
||||
static const char* spProgramSource =
|
||||
#include "bt3dGridBroadphaseOCL.cl"
|
||||
|
||||
adl::PrefixScan<adl::TYPE_CL>::Data* gData1=0;
|
||||
adl::Buffer<unsigned int>* m_srcClBuffer=0;
|
||||
|
||||
struct MySortData
|
||||
{
|
||||
int key;
|
||||
int value;
|
||||
};
|
||||
|
||||
adl::RadixSort32<adl::TYPE_CL>::Data* dataC = 0;
|
||||
adl::RadixSort<adl::TYPE_HOST>::Data* dataHost = 0;
|
||||
|
||||
|
||||
static unsigned int infElem = 0x2fffffff;
|
||||
|
||||
static unsigned int zeroEl = 0;
|
||||
static unsigned int minusOne= -1;
|
||||
|
||||
|
||||
bt3dGridBroadphaseOCL::bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell,
|
||||
cl_context context, cl_device_id device, cl_command_queue queue,
|
||||
adl::DeviceCL* deviceCL
|
||||
) :
|
||||
btGpu3DGridBroadphase(overlappingPairCache, cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize, maxSmallProxiesPerCell)
|
||||
{
|
||||
|
||||
|
||||
initCL(context, device, queue);
|
||||
allocateBuffers();
|
||||
|
||||
prefillBuffers();
|
||||
|
||||
initKernels();
|
||||
|
||||
//create an Adl device host and OpenCL device
|
||||
|
||||
adl::DeviceUtils::Config cfg;
|
||||
m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
|
||||
m_ownsDevice = false;
|
||||
if (!deviceCL)
|
||||
{
|
||||
m_ownsDevice = true;
|
||||
deviceCL = new adl::DeviceCL;
|
||||
deviceCL->m_context = context;
|
||||
deviceCL->m_deviceIdx = device;
|
||||
deviceCL->m_commandQueue = queue;
|
||||
deviceCL->m_kernelManager = new adl::KernelManager;
|
||||
}
|
||||
|
||||
m_deviceCL = deviceCL;
|
||||
|
||||
int minSize = 256*1024;
|
||||
int maxSortBuffer = maxSmallProxies < minSize ? minSize :maxSmallProxies;
|
||||
|
||||
m_srcClBuffer = new adl::Buffer<unsigned int> (m_deviceCL,maxSmallProxies+2);
|
||||
m_srcClBuffer->write(&zeroEl,1,0);
|
||||
|
||||
//m_srcClBuffer->write(&infElem,maxSmallProxies,0);
|
||||
m_srcClBuffer->write(&infElem,1,maxSmallProxies);
|
||||
m_srcClBuffer->write(&zeroEl,1,maxSmallProxies+1);
|
||||
m_deviceCL->waitForCompletion();
|
||||
|
||||
gData1 = adl::PrefixScan<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2,adl::PrefixScanBase::EXCLUSIVE );
|
||||
dataHost = adl::RadixSort<adl::TYPE_HOST>::allocate( m_deviceHost, maxSmallProxies+2 );
|
||||
dataC = adl::RadixSort32<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
bt3dGridBroadphaseOCL::~bt3dGridBroadphaseOCL()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
adl::RadixSort<adl::TYPE_HOST>::deallocate(dataHost);
|
||||
adl::PrefixScan<adl::TYPE_CL>::deallocate(gData1);
|
||||
adl::RadixSort32<adl::TYPE_CL>::deallocate(dataC);
|
||||
adl::DeviceUtils::deallocate(m_deviceHost);
|
||||
delete m_srcClBuffer;
|
||||
if (m_ownsDevice)
|
||||
{
|
||||
delete m_deviceCL->m_kernelManager;
|
||||
delete m_deviceCL;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
// there is a problem with MSVC9 : static constructors are not called if variables defined in library and are not used
|
||||
// looks like it is because of optimization
|
||||
// probably this will happen with other compilers as well
|
||||
// so to make it robust, register kernels again (it is safe)
|
||||
#define MINICL_DECLARE(a) extern "C" void a();
|
||||
MINICL_DECLARE(kCalcHashAABB)
|
||||
MINICL_DECLARE(kClearCellStart)
|
||||
MINICL_DECLARE(kFindCellStart)
|
||||
MINICL_DECLARE(kFindOverlappingPairs)
|
||||
MINICL_DECLARE(kFindPairsLarge)
|
||||
MINICL_DECLARE(kComputePairCacheChanges)
|
||||
MINICL_DECLARE(kSqueezeOverlappingPairBuff)
|
||||
#undef MINICL_DECLARE
|
||||
#endif
|
||||
|
||||
void bt3dGridBroadphaseOCL::initCL(cl_context context, cl_device_id device, cl_command_queue queue)
|
||||
{
|
||||
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
// call constructors here
|
||||
MINICL_REGISTER(kCalcHashAABB)
|
||||
MINICL_REGISTER(kClearCellStart)
|
||||
MINICL_REGISTER(kFindCellStart)
|
||||
MINICL_REGISTER(kFindOverlappingPairs)
|
||||
MINICL_REGISTER(kFindPairsLarge)
|
||||
MINICL_REGISTER(kComputePairCacheChanges)
|
||||
MINICL_REGISTER(kSqueezeOverlappingPairBuff)
|
||||
#endif
|
||||
|
||||
cl_int ciErrNum;
|
||||
|
||||
btAssert(context);
|
||||
m_cxMainContext = context;
|
||||
btAssert(device);
|
||||
m_cdDevice = device;
|
||||
btAssert(queue);
|
||||
m_cqCommandQue = queue;
|
||||
|
||||
//adl::Kernel kern = m_deviceCL->getKernel(fileName,funcName,options,src);
|
||||
|
||||
m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext,m_cdDevice,spProgramSource, &ciErrNum,"-DGUID_ARG=""""",GRID_OCL_PATH);
|
||||
|
||||
printf("OK\n");
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::initKernels()
|
||||
{
|
||||
initKernel(GRID3DOCL_KERNEL_CALC_HASH_AABB, "kCalcHashAABB");
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 3, sizeof(cl_mem),(void*)&m_dBpParams);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_CLEAR_CELL_START, "kClearCellStart");
|
||||
setKernelArg(GRID3DOCL_KERNEL_CLEAR_CELL_START, 1, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_CELL_START, "kFindCellStart");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 1, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 2, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, "kFindOverlappingPairs");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 3, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 6, sizeof(cl_mem),(void*)&m_dBpParams);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, "kFindPairsLarge");
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 1, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 3, sizeof(cl_mem),(void*)&m_dCellStart);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, "kComputePairCacheChanges");
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 4, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
initKernel(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, "kSqueezeOverlappingPairBuff");
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 4, sizeof(cl_mem),(void*)&m_dPairsChanged);
|
||||
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 5, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::allocateBuffers()
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
unsigned int memSize;
|
||||
// current version of bitonic sort works for power of 2 arrays only, so ...
|
||||
m_hashSize = 1;
|
||||
for(int bit = 1; bit < 32; bit++)
|
||||
{
|
||||
if(m_hashSize >= m_maxHandles)
|
||||
{
|
||||
break;
|
||||
}
|
||||
m_hashSize <<= 1;
|
||||
}
|
||||
memSize = m_hashSize * 2 * sizeof(unsigned int);
|
||||
if (memSize < 1024*1024)
|
||||
memSize = 1024*1024;
|
||||
|
||||
m_dBodiesHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_numCells * sizeof(unsigned int);
|
||||
m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
|
||||
m_dPairBuff = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = (m_maxHandles * 2 + 1) * sizeof(unsigned int);
|
||||
m_dPairBuffStartCurr = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
memSize = numAABB * sizeof(float) * 4 * 2;
|
||||
m_dAABB = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = (m_maxHandles + 2) * sizeof(unsigned int);
|
||||
m_dPairScanChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
|
||||
m_dPairsChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
m_dPairsContiguous = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
memSize = 3 * 4 * sizeof(float);
|
||||
m_dBpParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::prefillBuffers()
|
||||
{
|
||||
memset(m_hBodiesHash, 0xFF, m_maxHandles*2*sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_maxHandles * 2 * sizeof(unsigned int));
|
||||
// now fill the rest (bitonic sorting works with size == pow of 2)
|
||||
int remainder = m_hashSize - m_maxHandles;
|
||||
if(remainder)
|
||||
{
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, remainder * 2 * sizeof(unsigned int), m_maxHandles * 2 * sizeof(unsigned int), 0);
|
||||
}
|
||||
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::initKernel(int kernelId, char* pName)
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
size_t wgSize;
|
||||
ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
m_kernels[kernelId].m_Id = kernelId;
|
||||
m_kernels[kernelId].m_kernel = kernel;
|
||||
m_kernels[kernelId].m_name = pName;
|
||||
m_kernels[kernelId].m_workgroupSize = (int)wgSize;
|
||||
return;
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::runKernelWithWorkgroupSize(int kernelId, int globalSize)
|
||||
{
|
||||
if(globalSize <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
cl_kernel kernelFunc = m_kernels[kernelId].m_kernel;
|
||||
cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
int workgroupSize = btMin(64,m_kernels[kernelId].m_workgroupSize);
|
||||
|
||||
if(workgroupSize <= 0)
|
||||
{ // let OpenCL library calculate workgroup size
|
||||
size_t globalWorkSize[2];
|
||||
globalWorkSize[0] = globalSize;
|
||||
globalWorkSize[1] = 1;
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t localWorkSize[2], globalWorkSize[2];
|
||||
//workgroupSize = btMin(workgroupSize, globalSize);
|
||||
int num_t = globalSize / workgroupSize;
|
||||
int num_g = num_t * workgroupSize;
|
||||
if(num_g < globalSize)
|
||||
{
|
||||
num_t++;
|
||||
}
|
||||
localWorkSize[0] = workgroupSize;
|
||||
globalWorkSize[0] = num_t * workgroupSize;
|
||||
localWorkSize[1] = 1;
|
||||
globalWorkSize[1] = 1;
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 );
|
||||
}
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
ciErrNum = clFlush(m_cqCommandQue);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs, int hostOffs)
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
char* pHost = (char*)host + hostOffs;
|
||||
ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs)
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
cl_int ciErrNum;
|
||||
char* pHost = (char*)host + hostOffs;
|
||||
ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
|
||||
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides
|
||||
//
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::prepareAABB()
|
||||
{
|
||||
btGpu3DGridBroadphase::prepareAABB();
|
||||
copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
return;
|
||||
}
|
||||
|
||||
void bt3dGridBroadphaseOCL::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
btGpu3DGridBroadphase::setParameters(hostParams);
|
||||
struct btParamsBpOCL
|
||||
{
|
||||
float m_invCellSize[4];
|
||||
int m_gridSize[4];
|
||||
};
|
||||
btParamsBpOCL hParams;
|
||||
hParams.m_invCellSize[0] = m_params.m_invCellSizeX;
|
||||
hParams.m_invCellSize[1] = m_params.m_invCellSizeY;
|
||||
hParams.m_invCellSize[2] = m_params.m_invCellSizeZ;
|
||||
hParams.m_invCellSize[3] = 0.f;
|
||||
hParams.m_gridSize[0] = m_params.m_gridSizeX;
|
||||
hParams.m_gridSize[1] = m_params.m_gridSizeY;
|
||||
hParams.m_gridSize[2] = m_params.m_gridSizeZ;
|
||||
hParams.m_gridSize[3] = m_params.m_maxBodiesPerCell;
|
||||
copyArrayToDevice(m_dBpParams, &hParams, sizeof(btParamsBpOCL));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CALC_HASH_AABB, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::calcHashAABB();
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::sortHash()
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
#ifdef CL_PLATFORM_MINI_CL
|
||||
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
btGpu3DGridBroadphase::sortHash();
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#else
|
||||
|
||||
//#define USE_HOST
|
||||
#ifdef USE_HOST
|
||||
copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
//adl::Buffer<unsigned int> keysIn,keysOut,valuesIn,valuesOut;
|
||||
///adl::RadixSort32<adl::TYPE_CL>::execute(dataC,keysIn,keysOut,valuesIn,valuesOut,m_numHandles);
|
||||
adl::HostBuffer<adl::SortData> inoutHost;
|
||||
inoutHost.m_device = m_deviceHost;
|
||||
inoutHost.m_ptr = (adl::SortData*)m_hBodiesHash;
|
||||
inoutHost.m_size = m_numHandles;
|
||||
adl::RadixSort<adl::TYPE_HOST>::execute(dataHost, inoutHost,m_numHandles);
|
||||
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#else
|
||||
{
|
||||
clFinish(m_cqCommandQue);
|
||||
BT_PROFILE("RadixSort32::execute");
|
||||
adl::Buffer<adl::SortData> inout;
|
||||
inout.m_device = this->m_deviceCL;
|
||||
inout.m_size = m_numHandles;
|
||||
inout.m_ptr = (adl::SortData*)m_dBodiesHash;
|
||||
int actualHandles = m_numHandles;
|
||||
int dataAlignment = adl::RadixSort32<adl::TYPE_CL>::DATA_ALIGNMENT;
|
||||
|
||||
if (actualHandles%dataAlignment)
|
||||
{
|
||||
actualHandles += dataAlignment-(actualHandles%dataAlignment);
|
||||
}
|
||||
|
||||
adl::RadixSort32<adl::TYPE_CL>::execute(dataC,inout, actualHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
}
|
||||
{
|
||||
//BT_PROFILE("copyArrayFromDevice");
|
||||
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
}
|
||||
|
||||
|
||||
#endif //USE_HOST
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findCellStart()
|
||||
{
|
||||
#if 1
|
||||
BT_PROFILE("findCellStart");
|
||||
|
||||
#if defined(CL_PLATFORM_MINI_CL)
|
||||
btGpu3DGridBroadphase::findCellStart();
|
||||
copyArrayToDevice(m_dCellStart, m_hCellStart, m_numCells * sizeof(unsigned int));
|
||||
#else
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CLEAR_CELL_START, m_numCells);
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_CELL_START, m_numHandles);
|
||||
#endif
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findCellStart();
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findOverlappingPairs()
|
||||
{
|
||||
#if 1
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findOverlappingPairs();
|
||||
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("findPairsLarge");
|
||||
#if 1
|
||||
if(m_numLargeHandles)
|
||||
{
|
||||
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 6, sizeof(int),(void*)&m_numLargeHandles);
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, m_numHandles);
|
||||
}
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::findPairsLarge();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, m_numHandles);
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
copyArrayFromDevice( m_hPairScanChanged,m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::computePairCacheChanges();
|
||||
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
extern cl_device_type deviceType;
|
||||
|
||||
void bt3dGridBroadphaseOCL::scanOverlappingPairBuff(bool copyToCpu)
|
||||
{
|
||||
|
||||
//Intel/CPU version doesn't handlel Adl scan well
|
||||
#if 0
|
||||
{
|
||||
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
m_numPrefixSum = m_hPairScanChanged[m_numHandles+1];
|
||||
clFinish(m_cqCommandQue);
|
||||
//memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
}
|
||||
#else
|
||||
{
|
||||
|
||||
// copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
// btGpu3DGridBroadphase::scanOverlappingPairBuff();
|
||||
|
||||
adl::Buffer<unsigned int> destBuffer;
|
||||
|
||||
{
|
||||
BT_PROFILE("copy GPU->GPU");
|
||||
|
||||
destBuffer.m_ptr = (unsigned int*)m_dPairScanChanged;
|
||||
destBuffer.m_device = m_deviceCL;
|
||||
destBuffer.m_size = sizeof(unsigned int)*(m_numHandles+2);
|
||||
m_deviceCL->copy(m_srcClBuffer, &destBuffer,m_numHandles,1,1);
|
||||
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("PrefixScan");
|
||||
|
||||
adl::PrefixScan<adl::TYPE_CL>::execute(gData1,*m_srcClBuffer,destBuffer, m_numHandles+2,&m_numPrefixSum);
|
||||
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
//if (m_numPrefixSum>0x1000)
|
||||
// {
|
||||
// printf("error m_numPrefixSum==%d\n",m_numPrefixSum);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
#if 0
|
||||
unsigned int* verifyhPairScanChanged = new unsigned int[m_maxHandles + 2];
|
||||
memset(verifyhPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
|
||||
copyArrayFromDevice(verifyhPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
clFinish(m_cqCommandQue);
|
||||
|
||||
/*for (int i=0;i<m_numHandles+2;i++)
|
||||
{
|
||||
if (verifyhPairScanChanged[i] != m_hPairScanChanged[i])
|
||||
{
|
||||
printf("hello!\n");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
if (1)
|
||||
{
|
||||
|
||||
//the data
|
||||
if (copyToCpu)
|
||||
{
|
||||
BT_PROFILE("copy GPU -> CPU");
|
||||
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
|
||||
#if 1
|
||||
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, m_numHandles);
|
||||
// btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScanChanged, m_dPairsChanged, m_dAABB, m_numHandles);
|
||||
|
||||
//copyArrayFromDevice(m_hPairsChanged, m_dPairsChanged, sizeof(unsigned int) * m_numPrefixSum);//m_hPairScanChanged[m_numHandles+1]); //gSum
|
||||
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
|
||||
clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
#else
|
||||
btGpu3DGridBroadphase::squeezeOverlappingPairBuff();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bt3dGridBroadphaseOCL::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
btGpu3DGridBroadphase::resetPool(dispatcher);
|
||||
prefillBuffers();
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef BT3DGRIDBROADPHASEOCL_H
|
||||
#define BT3DGRIDBROADPHASEOCL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <MiniCL/cl.h>
|
||||
#endif
|
||||
//CL_PLATFORM_MINI_CL could be defined in build system
|
||||
#else
|
||||
//#include <GL/glew.h>
|
||||
// standard utility and system includes
|
||||
#ifdef USE_MINICL
|
||||
#include <MiniCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
// Extra CL/GL include
|
||||
//#include <CL/cl_gl.h>
|
||||
#endif //__APPLE__
|
||||
|
||||
namespace adl
|
||||
{
|
||||
struct Device;
|
||||
struct DeviceCL;
|
||||
};
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
#include "btGpu3DGridBroadphase.h"
|
||||
|
||||
|
||||
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); }
|
||||
|
||||
enum
|
||||
{
|
||||
GRID3DOCL_KERNEL_CALC_HASH_AABB = 0,
|
||||
GRID3DOCL_KERNEL_CLEAR_CELL_START,
|
||||
GRID3DOCL_KERNEL_FIND_CELL_START,
|
||||
GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS,
|
||||
GRID3DOCL_KERNEL_FIND_PAIRS_LARGE,
|
||||
GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES,
|
||||
GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF,
|
||||
GRID3DOCL_KERNEL_TOTAL
|
||||
};
|
||||
|
||||
struct bt3dGridOCLKernelInfo
|
||||
{
|
||||
int m_Id;
|
||||
cl_kernel m_kernel;
|
||||
char* m_name;
|
||||
int m_workgroupSize;
|
||||
};
|
||||
|
||||
|
||||
///The bt3dGridBroadphaseOCL uses OpenCL-capable GPU to compute overlapping pairs
|
||||
|
||||
class bt3dGridBroadphaseOCL : public btGpu3DGridBroadphase
|
||||
{
|
||||
protected:
|
||||
int m_hashSize;
|
||||
cl_context m_cxMainContext;
|
||||
cl_device_id m_cdDevice;
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_program m_cpProgram;
|
||||
bt3dGridOCLKernelInfo m_kernels[GRID3DOCL_KERNEL_TOTAL];
|
||||
// data buffers
|
||||
cl_mem m_dBodiesHash;
|
||||
cl_mem m_dCellStart;
|
||||
cl_mem m_dPairBuff;
|
||||
cl_mem m_dPairBuffStartCurr;
|
||||
public:
|
||||
cl_mem m_dAABB;
|
||||
protected:
|
||||
cl_mem m_dPairScanChanged;
|
||||
cl_mem m_dPairsChanged;
|
||||
cl_mem m_dPairsContiguous;
|
||||
cl_mem m_dBpParams;
|
||||
|
||||
adl::Device* m_deviceHost;
|
||||
adl::DeviceCL* m_deviceCL;
|
||||
bool m_ownsDevice;
|
||||
|
||||
|
||||
public:
|
||||
unsigned int m_numPrefixSum;
|
||||
|
||||
bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell = 8,
|
||||
cl_context context = NULL,
|
||||
cl_device_id device = NULL,
|
||||
cl_command_queue queue = NULL,
|
||||
adl::DeviceCL* deviceCL = 0
|
||||
);
|
||||
virtual ~bt3dGridBroadphaseOCL();
|
||||
|
||||
protected:
|
||||
void initCL(cl_context context, cl_device_id device, cl_command_queue queue);
|
||||
void initKernels();
|
||||
void allocateBuffers();
|
||||
void prefillBuffers();
|
||||
void initKernel(int kernelId, char* pName);
|
||||
void allocateArray(void** devPtr, unsigned int size);
|
||||
void freeArray(void* devPtr);
|
||||
void runKernelWithWorkgroupSize(int kernelId, int globalSize);
|
||||
void setKernelArg(int kernelId, int argNum, int argSize, void* argPtr);
|
||||
void copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs = 0, int hostOffs = 0);
|
||||
void copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs = 0, int devOffs = 0);
|
||||
|
||||
// overrides
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
};
|
||||
|
||||
#endif //BT3DGRIDBROADPHASEOCL_H
|
||||
@@ -0,0 +1,626 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
|
||||
#include "btGpuDefines.h"
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
#include "btGpuUtilsSharedCode.h"
|
||||
|
||||
|
||||
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
|
||||
|
||||
|
||||
#include "btGpuDefines.h"
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedDefs.h"
|
||||
|
||||
#include "btGpu3DGridBroadphase.h"
|
||||
#include <string.h> //for memset
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell) :
|
||||
btSimpleBroadphase(maxSmallProxies,
|
||||
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
|
||||
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
|
||||
m_bInitialized(false),
|
||||
m_numBodies(0)
|
||||
{
|
||||
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
|
||||
maxSmallProxySize, maxBodiesPerCell);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell) :
|
||||
btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
|
||||
m_bInitialized(false),
|
||||
m_numBodies(0)
|
||||
{
|
||||
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
|
||||
maxSmallProxySize, maxBodiesPerCell);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
|
||||
{
|
||||
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
||||
assert(m_bInitialized);
|
||||
_finalize();
|
||||
|
||||
|
||||
}
|
||||
|
||||
// returns 2^n : 2^(n+1) > val >= 2^n
|
||||
int btGpu3DGridBroadphase::getFloorPowOfTwo(int val)
|
||||
{
|
||||
int mask = 0x40000000;
|
||||
for(int k = 0; k < 30; k++, mask >>= 1)
|
||||
{
|
||||
if(mask & val)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::_initialize( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell)
|
||||
{
|
||||
// set various paramerers
|
||||
m_ownsPairCache = true;
|
||||
m_params.m_gridSizeX = getFloorPowOfTwo(gridSizeX);
|
||||
m_params.m_gridSizeY = getFloorPowOfTwo(gridSizeY);
|
||||
m_params.m_gridSizeZ = getFloorPowOfTwo(gridSizeZ);
|
||||
m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
|
||||
m_numCells = m_params.m_numCells;
|
||||
m_params.m_invCellSizeX = btScalar(1.f) / cellSize[0];
|
||||
m_params.m_invCellSizeY = btScalar(1.f) / cellSize[1];
|
||||
m_params.m_invCellSizeZ = btScalar(1.f) / cellSize[2];
|
||||
m_maxRadius = maxSmallProxySize * btScalar(0.5f);
|
||||
m_params.m_numBodies = m_numBodies;
|
||||
m_params.m_maxBodiesPerCell = maxBodiesPerCell;
|
||||
|
||||
m_numLargeHandles = 0;
|
||||
m_maxLargeHandles = maxLargeProxies;
|
||||
|
||||
m_maxPairsPerBody = maxPairsPerBody;
|
||||
|
||||
m_LastLargeHandleIndex = -1;
|
||||
|
||||
assert(!m_bInitialized);
|
||||
|
||||
// allocate host storage
|
||||
m_hBodiesHash = new unsigned int[m_maxHandles * 2];
|
||||
memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
|
||||
|
||||
m_hCellStart = new unsigned int[m_params.m_numCells];
|
||||
memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
|
||||
|
||||
m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
|
||||
// --------------- for now, init with m_maxPairsPerBody for each body
|
||||
m_hPairBuffStartCurr[0] = 0;
|
||||
m_hPairBuffStartCurr[1] = 0;
|
||||
for(int i = 1; i <= m_maxHandles; i++)
|
||||
{
|
||||
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
|
||||
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
||||
}
|
||||
//----------------
|
||||
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
|
||||
m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
|
||||
|
||||
m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
|
||||
|
||||
m_hPairScanChanged = new unsigned int[m_maxHandles + 2];
|
||||
memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
|
||||
|
||||
m_hPairsChanged = new unsigned int[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hPairsChanged,0,sizeof(int)*(m_maxHandles * m_maxPairsPerBody));
|
||||
|
||||
m_hAllOverlappingPairs= new MyUint2[m_maxHandles * m_maxPairsPerBody];
|
||||
memset(m_hAllOverlappingPairs,0,sizeof(MyUint2)*(m_maxHandles * m_maxPairsPerBody));
|
||||
|
||||
|
||||
// large proxies
|
||||
|
||||
// allocate handles buffer and put all handles on free list
|
||||
m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
|
||||
m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
|
||||
m_firstFreeLargeHandle = 0;
|
||||
{
|
||||
for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
|
||||
{
|
||||
m_pLargeHandles[i].SetNextFree(i + 1);
|
||||
m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
|
||||
}
|
||||
m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
|
||||
}
|
||||
|
||||
// debug data
|
||||
m_numPairsAdded = 0;
|
||||
m_numOverflows = 0;
|
||||
|
||||
|
||||
m_bInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::_finalize()
|
||||
{
|
||||
assert(m_bInitialized);
|
||||
delete [] m_hBodiesHash;
|
||||
delete [] m_hCellStart;
|
||||
delete [] m_hPairBuffStartCurr;
|
||||
delete [] m_hAABB;
|
||||
delete [] m_hPairBuff;
|
||||
delete [] m_hPairScanChanged;
|
||||
delete [] m_hPairsChanged;
|
||||
delete [] m_hAllOverlappingPairs;
|
||||
btAlignedFree(m_pLargeHandlesRawPtr);
|
||||
m_bInitialized = false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
{
|
||||
btSimpleBroadphase::calculateOverlappingPairs(dispatcher);
|
||||
|
||||
if(m_numHandles <= 0)
|
||||
{
|
||||
BT_PROFILE("addLarge2LargePairsToCache");
|
||||
addLarge2LargePairsToCache(dispatcher);
|
||||
return;
|
||||
}
|
||||
// update constants
|
||||
{
|
||||
BT_PROFILE("setParameters");
|
||||
setParameters(&m_params);
|
||||
}
|
||||
|
||||
// prepare AABB array
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
prepareAABB();
|
||||
}
|
||||
// calculate hash
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
calcHashAABB();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
// sort bodies based on hash
|
||||
sortHash();
|
||||
}
|
||||
// find start of each cell
|
||||
{
|
||||
BT_PROFILE("findCellStart");
|
||||
findCellStart();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
// findOverlappingPairs (small/small)
|
||||
findOverlappingPairs();
|
||||
}
|
||||
// findOverlappingPairs (small/large)
|
||||
{
|
||||
BT_PROFILE("findPairsLarge");
|
||||
findPairsLarge();
|
||||
}
|
||||
// add pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
computePairCacheChanges();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("scanOverlappingPairBuff");
|
||||
scanOverlappingPairBuff();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("squeezeOverlappingPairBuff");
|
||||
squeezeOverlappingPairBuff();
|
||||
}
|
||||
{
|
||||
BT_PROFILE("addPairsToCache");
|
||||
addPairsToCache(dispatcher);
|
||||
}
|
||||
// find and add large/large pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("addLarge2LargePairsToCache");
|
||||
addLarge2LargePairsToCache(dispatcher);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
|
||||
{
|
||||
m_numPairsAdded = 0;
|
||||
m_numPairsRemoved = 0;
|
||||
for(int i = 0; i < m_numHandles; i++)
|
||||
{
|
||||
unsigned int num = m_hPairScanChanged[i+2] - m_hPairScanChanged[i+1];
|
||||
if(!num)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
unsigned int* pInp = m_hPairsChanged + m_hPairScanChanged[i+1];
|
||||
unsigned int index0 = m_hAABB[i * 2].uw;
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
|
||||
for(unsigned int j = 0; j < num; j++)
|
||||
{
|
||||
unsigned int indx1_s = pInp[j];
|
||||
unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
btSimpleBroadphaseProxy* proxy1;
|
||||
if(index1 < (unsigned int)m_maxHandles)
|
||||
{
|
||||
proxy1 = &m_pHandles[index1];
|
||||
}
|
||||
else
|
||||
{
|
||||
index1 -= m_maxHandles;
|
||||
btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
|
||||
proxy1 = &m_pLargeHandles[index1];
|
||||
}
|
||||
if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
|
||||
{
|
||||
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
||||
m_numPairsAdded++;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
||||
m_numPairsRemoved++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
|
||||
{
|
||||
btBroadphaseProxy* proxy;
|
||||
bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
|
||||
if(bIsLarge)
|
||||
{
|
||||
if (m_numLargeHandles >= m_maxLargeHandles)
|
||||
{
|
||||
///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
|
||||
btAssert(0);
|
||||
return 0; //should never happen, but don't let the game crash ;-)
|
||||
}
|
||||
btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
|
||||
int newHandleIndex = allocLargeHandle();
|
||||
proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
|
||||
}
|
||||
else
|
||||
{
|
||||
proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
|
||||
}
|
||||
return proxy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
|
||||
{
|
||||
bool bIsLarge = isLargeProxy(proxy);
|
||||
if(bIsLarge)
|
||||
{
|
||||
|
||||
btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
|
||||
freeLargeHandle(proxy0);
|
||||
m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
|
||||
}
|
||||
else
|
||||
{
|
||||
btSimpleBroadphase::destroyProxy(proxy, dispatcher);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
|
||||
{
|
||||
m_hPairBuffStartCurr[0] = 0;
|
||||
m_hPairBuffStartCurr[1] = 0;
|
||||
for(int i = 1; i <= m_maxHandles; i++)
|
||||
{
|
||||
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
|
||||
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
|
||||
{
|
||||
btVector3 diag = aabbMax - aabbMin;
|
||||
///use the bounding sphere radius of this bounding box, to include rotation
|
||||
btScalar radius = diag.length() * btScalar(0.5f);
|
||||
return (radius > m_maxRadius);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
|
||||
{
|
||||
return (proxy->getUid() >= (m_maxHandles+2));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
|
||||
{
|
||||
int i,j;
|
||||
if (m_numLargeHandles <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int new_largest_index = -1;
|
||||
for(i = 0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
|
||||
new_largest_index = i;
|
||||
for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
|
||||
btAssert(proxy0 != proxy1);
|
||||
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
|
||||
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
|
||||
if(aabbOverlap(p0,p1))
|
||||
{
|
||||
if (!m_pairCache->findPair(proxy0,proxy1))
|
||||
{
|
||||
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_pairCache->findPair(proxy0,proxy1))
|
||||
{
|
||||
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
|
||||
{
|
||||
btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
|
||||
for (int i=0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
|
||||
rayCallback.process(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// overrides for CPU version
|
||||
//
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::prepareAABB()
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
bt3DGrid3F1U* pBB = m_hAABB;
|
||||
int i;
|
||||
int new_largest_index = -1;
|
||||
unsigned int num_small = 0;
|
||||
for(i = 0; i <= m_LastHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
|
||||
new_largest_index = i;
|
||||
pBB->fx = proxy0->m_aabbMin.getX();
|
||||
pBB->fy = proxy0->m_aabbMin.getY();
|
||||
pBB->fz = proxy0->m_aabbMin.getZ();
|
||||
pBB->uw = i;
|
||||
pBB++;
|
||||
pBB->fx = proxy0->m_aabbMax.getX();
|
||||
pBB->fy = proxy0->m_aabbMax.getY();
|
||||
pBB->fz = proxy0->m_aabbMax.getZ();
|
||||
pBB->uw = num_small;
|
||||
pBB++;
|
||||
num_small++;
|
||||
}
|
||||
m_LastHandleIndex = new_largest_index;
|
||||
new_largest_index = -1;
|
||||
unsigned int num_large = 0;
|
||||
for(i = 0; i <= m_LastLargeHandleIndex; i++)
|
||||
{
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
|
||||
new_largest_index = i;
|
||||
pBB->fx = proxy0->m_aabbMin.getX();
|
||||
pBB->fy = proxy0->m_aabbMin.getY();
|
||||
pBB->fz = proxy0->m_aabbMin.getZ();
|
||||
pBB->uw = i + m_maxHandles;
|
||||
pBB++;
|
||||
pBB->fx = proxy0->m_aabbMax.getX();
|
||||
pBB->fy = proxy0->m_aabbMax.getY();
|
||||
pBB->fz = proxy0->m_aabbMax.getZ();
|
||||
pBB->uw = num_large + m_maxHandles;
|
||||
pBB++;
|
||||
num_large++;
|
||||
}
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
// paranoid checks
|
||||
btAssert(num_small == m_numHandles);
|
||||
btAssert(num_large == m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
|
||||
{
|
||||
s3DGridBroadphaseParams = *hostParams;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::calcHashAABB()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_calcHashAABB");
|
||||
btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::sortHash()
|
||||
{
|
||||
class bt3DGridHashKey
|
||||
{
|
||||
public:
|
||||
unsigned int hash;
|
||||
unsigned int index;
|
||||
void quickSort(bt3DGridHashKey* pData, int lo, int hi)
|
||||
{
|
||||
int i=lo, j=hi;
|
||||
bt3DGridHashKey x = pData[(lo+hi)/2];
|
||||
do
|
||||
{
|
||||
while(pData[i].hash > x.hash) i++;
|
||||
while(x.hash > pData[j].hash) j--;
|
||||
if(i <= j)
|
||||
{
|
||||
bt3DGridHashKey t = pData[i];
|
||||
pData[i] = pData[j];
|
||||
pData[j] = t;
|
||||
i++; j--;
|
||||
}
|
||||
} while(i <= j);
|
||||
if(lo < j) pData->quickSort(pData, lo, j);
|
||||
if(i < hi) pData->quickSort(pData, i, hi);
|
||||
}
|
||||
};
|
||||
BT_PROFILE("bt3DGrid_sortHash");
|
||||
bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
|
||||
pHash->quickSort(pHash, 0, m_numHandles - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findCellStart()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findCellStart");
|
||||
btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findOverlappingPairs()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findOverlappingPairs");
|
||||
btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::findPairsLarge()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_findPairsLarge");
|
||||
btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::computePairCacheChanges()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_computePairCacheChanges");
|
||||
btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hAABB, m_numHandles);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::scanOverlappingPairBuff(bool copyToCpu)
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
|
||||
unsigned int sum = 0;
|
||||
m_hPairScanChanged[0]=0;
|
||||
for(int i = 0; i <= m_numHandles+1; i++)
|
||||
{
|
||||
unsigned int delta = m_hPairScanChanged[i];
|
||||
m_hPairScanChanged[i] = sum;
|
||||
sum += delta;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
|
||||
{
|
||||
BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
|
||||
//btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hPairsChanged, m_hAABB, m_numHandles);
|
||||
btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, (unsigned int*)m_hAllOverlappingPairs, m_hAABB, m_numHandles);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedCode.h"
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASE_H
|
||||
#define BTGPU3DGRIDBROADPHASE_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
struct MyUint2
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
|
||||
|
||||
class btGpu3DGridBroadphase : public btSimpleBroadphase
|
||||
{
|
||||
protected:
|
||||
bool m_bInitialized;
|
||||
unsigned int m_numBodies;
|
||||
unsigned int m_numCells;
|
||||
unsigned int m_maxPairsPerBody;
|
||||
unsigned int m_maxBodiesPerCell;
|
||||
bt3DGridBroadphaseParams m_params;
|
||||
btScalar m_maxRadius;
|
||||
// CPU data
|
||||
unsigned int* m_hBodiesHash;
|
||||
unsigned int* m_hCellStart;
|
||||
unsigned int* m_hPairBuffStartCurr;
|
||||
bt3DGrid3F1U* m_hAABB;
|
||||
unsigned int* m_hPairBuff;
|
||||
unsigned int* m_hPairScanChanged;
|
||||
unsigned int* m_hPairsChanged;
|
||||
MyUint2* m_hAllOverlappingPairs;
|
||||
// large proxies
|
||||
int m_numLargeHandles;
|
||||
int m_maxLargeHandles;
|
||||
int m_LastLargeHandleIndex;
|
||||
btSimpleBroadphaseProxy* m_pLargeHandles;
|
||||
void* m_pLargeHandlesRawPtr;
|
||||
int m_firstFreeLargeHandle;
|
||||
int allocLargeHandle()
|
||||
{
|
||||
btAssert(m_numLargeHandles < m_maxLargeHandles);
|
||||
int freeLargeHandle = m_firstFreeLargeHandle;
|
||||
m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
|
||||
m_numLargeHandles++;
|
||||
if(freeLargeHandle > m_LastLargeHandleIndex)
|
||||
{
|
||||
m_LastLargeHandleIndex = freeLargeHandle;
|
||||
}
|
||||
return freeLargeHandle;
|
||||
}
|
||||
void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
|
||||
{
|
||||
int handle = int(proxy - m_pLargeHandles);
|
||||
btAssert((handle >= 0) && (handle < m_maxHandles));
|
||||
if(handle == m_LastLargeHandleIndex)
|
||||
{
|
||||
m_LastLargeHandleIndex--;
|
||||
}
|
||||
proxy->SetNextFree(m_firstFreeLargeHandle);
|
||||
m_firstFreeLargeHandle = handle;
|
||||
proxy->m_clientObject = 0;
|
||||
m_numLargeHandles--;
|
||||
}
|
||||
bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
|
||||
bool isLargeProxy(btBroadphaseProxy* proxy);
|
||||
// debug
|
||||
unsigned int m_numPairsAdded;
|
||||
unsigned int m_numPairsRemoved;
|
||||
unsigned int m_numOverflows;
|
||||
//
|
||||
public:
|
||||
virtual int getNumOverlap()
|
||||
{
|
||||
return m_hPairScanChanged[m_numHandles+1];
|
||||
}
|
||||
virtual MyUint2* getOverlap()
|
||||
{
|
||||
return m_hAllOverlappingPairs;
|
||||
}
|
||||
// NOTE : for better results gridSizeX, gridSizeY and gridSizeZ should be powers of 2
|
||||
btGpu3DGridBroadphase(const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell = 8);
|
||||
btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell = 8);
|
||||
virtual ~btGpu3DGridBroadphase();
|
||||
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
|
||||
|
||||
virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
|
||||
virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
|
||||
virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
|
||||
virtual void resetPool(btDispatcher* dispatcher);
|
||||
|
||||
static int getFloorPowOfTwo(int val); // returns 2^n : 2^(n+1) > val >= 2^n
|
||||
|
||||
protected:
|
||||
void _initialize( const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxBodiesPerCell);
|
||||
void _finalize();
|
||||
void addPairsToCache(btDispatcher* dispatcher);
|
||||
void addLarge2LargePairsToCache(btDispatcher* dispatcher);
|
||||
|
||||
// overrides for CPU version
|
||||
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
|
||||
virtual void prepareAABB();
|
||||
virtual void calcHashAABB();
|
||||
virtual void sortHash();
|
||||
virtual void findCellStart();
|
||||
virtual void findOverlappingPairs();
|
||||
virtual void findPairsLarge();
|
||||
virtual void computePairCacheChanges();
|
||||
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
|
||||
virtual void squeezeOverlappingPairBuff();
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif //BTGPU3DGRIDBROADPHASE_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
@@ -0,0 +1,428 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
// K E R N E L F U N C T I O N S
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate position in uniform grid
|
||||
BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
|
||||
{
|
||||
int3 gridPos;
|
||||
gridPos.x = (int)floor(p.x * BT_GPU_params.m_invCellSizeX) & (BT_GPU_params.m_gridSizeX - 1);
|
||||
gridPos.y = (int)floor(p.y * BT_GPU_params.m_invCellSizeY) & (BT_GPU_params.m_gridSizeY - 1);
|
||||
gridPos.z = (int)floor(p.z * BT_GPU_params.m_invCellSizeZ) & (BT_GPU_params.m_gridSizeZ - 1);
|
||||
return gridPos;
|
||||
} // bt3DGrid_calcGridPos()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate address in grid from position (clamping to edges)
|
||||
BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
|
||||
{
|
||||
gridPos.x &= (BT_GPU_params.m_gridSizeX - 1);
|
||||
gridPos.y &= (BT_GPU_params.m_gridSizeY - 1);
|
||||
gridPos.z &= (BT_GPU_params.m_gridSizeZ - 1);
|
||||
return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
|
||||
} // bt3DGrid_calcGridHash()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// calculate grid hash value for each body using its AABB
|
||||
BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index*2];
|
||||
bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
|
||||
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
|
||||
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = bt3DGrid_calcGridPos(pos);
|
||||
uint gridHash = bt3DGrid_calcGridHash(gridPos);
|
||||
// store grid hash and body index
|
||||
pHash[index] = BT_GPU_make_uint2(gridHash, index);
|
||||
} // calcHashAABBD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring body's hash value without loading
|
||||
// two hash values per thread
|
||||
BT_GPU___shared__ uint sharedHash[257];
|
||||
sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
|
||||
if((index > 0) && (BT_GPU_threadIdx.x == 0))
|
||||
{
|
||||
// first thread in block must load neighbor body hash
|
||||
volatile uint2 prevData = pHash[index-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
BT_GPU___syncthreads();
|
||||
if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
} // findCellStartD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
|
||||
{
|
||||
return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
|
||||
(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
|
||||
(min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
|
||||
} // cudaTestAABBOverlap()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___device__ void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pHash,
|
||||
uint* pCellStart,
|
||||
bt3DGrid3F1U* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numBodies)
|
||||
{
|
||||
uint gridHash = bt3DGrid_calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
{
|
||||
return; // cell empty
|
||||
}
|
||||
// iterate over bodies in this cell
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
uint handleIndex = min0.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
uint curr_max = start_curr_next.x - start - 1;
|
||||
uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
|
||||
bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
|
||||
for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
uint2 cellData = pHash[index2];
|
||||
if (cellData.x != gridHash)
|
||||
{
|
||||
break; // no longer in same bucket
|
||||
}
|
||||
uint unsorted_indx2 = cellData.y;
|
||||
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||
{
|
||||
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
|
||||
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint handleIndex2 = min1.uw;
|
||||
uint k;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
|
||||
return;
|
||||
} // findPairsInCell()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
|
||||
uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
float4 pos;
|
||||
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
|
||||
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
|
||||
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = bt3DGrid_calcGridPos(pos);
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // findOverlappingPairsD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint2 sortedData = pHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
|
||||
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
|
||||
uint handleIndex = min0.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||
uint curr_max = start_curr_next.x - start - 1;
|
||||
for(uint i = 0; i < numLarge; i++)
|
||||
{
|
||||
uint indx2 = numBodies + i;
|
||||
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
|
||||
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint k;
|
||||
uint handleIndex2 = min1.uw;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
if(old_pair == handleIndex2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
|
||||
if(curr >= curr_max)
|
||||
{ // not a good solution, but let's avoid crash
|
||||
break;
|
||||
}
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
|
||||
return;
|
||||
} // findPairsLargeD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
|
||||
uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index * 2];
|
||||
uint handleIndex = bbMin.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint *pInp = pPairBuff + start;
|
||||
uint num_changes = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
|
||||
if(((*pInp) & BT_3DGRID_PAIR_ANY_FLG))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
} // computePairCacheChangesD()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
|
||||
uint2* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
|
||||
{
|
||||
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
|
||||
if(index >= (int)numBodies)
|
||||
{
|
||||
return;
|
||||
}
|
||||
bt3DGrid3F1U bbMin = pAABB[index * 2];
|
||||
uint handleIndex = bbMin.uw;
|
||||
uint2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint* pInp = pPairBuff + start;
|
||||
uint2* pOut = pPairOut + pPairScan[index+1];
|
||||
uint* pOut2 = pInp;
|
||||
uint num = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
|
||||
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
|
||||
{
|
||||
pOut->x = handleIndex;
|
||||
pOut->y = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
|
||||
} // squeezeOverlappingPairBuffD()
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
// E N D O F K E R N E L F U N C T I O N S
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
// execute the kernel
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
|
||||
// check if kernel invocation generated an error
|
||||
BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
|
||||
} // calcHashAABB()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
|
||||
} // findCellStart()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
|
||||
{
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
|
||||
#endif
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
|
||||
#endif
|
||||
} // findOverlappingPairs()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
|
||||
{
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
|
||||
#endif
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
|
||||
#if B_CUDA_USE_TEX
|
||||
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
|
||||
#endif
|
||||
} // findPairsLarge()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
|
||||
} // computePairCacheChanges()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
|
||||
BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint2*)pPairOut,pAABB,numBodies));
|
||||
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
|
||||
} // btCuda_squeezeOverlappingPairBuff()
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------------------------
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared definitions for GPU-based 3D Grid collision detection broadphase
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "btGpu3DGridBroadphaseSharedTypes.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
|
||||
|
||||
void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
|
||||
|
||||
void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
|
||||
|
||||
void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared definitions for GPU-based 3D Grid collision detection broadphase
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
|
||||
#define BT_3DGRID_PAIR_NEW_FLG (0x20000000)
|
||||
#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
struct bt3DGridBroadphaseParams
|
||||
{
|
||||
unsigned int m_gridSizeX;
|
||||
unsigned int m_gridSizeY;
|
||||
unsigned int m_gridSizeZ;
|
||||
unsigned int m_numCells;
|
||||
float m_invCellSizeX;
|
||||
float m_invCellSizeY;
|
||||
float m_invCellSizeZ;
|
||||
unsigned int m_numBodies;
|
||||
unsigned int m_maxBodiesPerCell;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
struct bt3DGrid3F1U
|
||||
{
|
||||
float fx;
|
||||
float fy;
|
||||
float fz;
|
||||
unsigned int uw;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
// definitions for "GPU on CPU" code
|
||||
|
||||
|
||||
#ifndef BT_GPU_DEFINES_H
|
||||
#define BT_GPU_DEFINES_H
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x, y;
|
||||
};
|
||||
|
||||
struct uint2
|
||||
{
|
||||
unsigned int x, y;
|
||||
};
|
||||
|
||||
struct int3
|
||||
{
|
||||
int x, y, z;
|
||||
};
|
||||
|
||||
struct uint3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct float4
|
||||
{
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
struct float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
|
||||
#define BT_GPU___device__ inline
|
||||
#define BT_GPU___devdata__
|
||||
#define BT_GPU___constant__
|
||||
#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define BT_GPU_params s3DGridBroadphaseParams
|
||||
#define BT_GPU___mul24(a, b) ((a)*(b))
|
||||
#define BT_GPU___global__ inline
|
||||
#define BT_GPU___shared__ static
|
||||
#define BT_GPU___syncthreads()
|
||||
#define CUDART_PI_F SIMD_PI
|
||||
|
||||
static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
|
||||
{
|
||||
uint2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
|
||||
|
||||
static inline int3 bt3dGrid_make_int3(int x, int y, int z)
|
||||
{
|
||||
int3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
|
||||
|
||||
static inline float3 bt3dGrid_make_float3(float x, float y, float z)
|
||||
{
|
||||
float3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
|
||||
|
||||
static inline float3 bt3dGrid_make_float34(float4 f)
|
||||
{
|
||||
float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
|
||||
}
|
||||
#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
|
||||
|
||||
static inline float3 bt3dGrid_make_float31(float f)
|
||||
{
|
||||
float3 t; t.x = t.y = t.z = f; return t;
|
||||
}
|
||||
#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
|
||||
|
||||
static inline float4 bt3dGrid_make_float42(float3 v, float f)
|
||||
{
|
||||
float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
|
||||
}
|
||||
#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b)
|
||||
|
||||
static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
|
||||
{
|
||||
float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
|
||||
}
|
||||
#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d)
|
||||
|
||||
inline int3 operator+(int3 a, int3 b)
|
||||
{
|
||||
return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
}
|
||||
|
||||
inline float4 operator+(const float4& a, const float4& b)
|
||||
{
|
||||
float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
|
||||
}
|
||||
inline float4 operator*(const float4& a, float fact)
|
||||
{
|
||||
float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
|
||||
}
|
||||
inline float4 operator*(float fact, float4& a)
|
||||
{
|
||||
return (a * fact);
|
||||
}
|
||||
inline float4& operator*=(float4& a, float fact)
|
||||
{
|
||||
a = fact * a;
|
||||
return a;
|
||||
}
|
||||
inline float4& operator+=(float4& a, const float4& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline float3 operator+(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
|
||||
}
|
||||
inline float3 operator-(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
|
||||
}
|
||||
static inline float bt3dGrid_dot(float3& a, float3& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
}
|
||||
#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
|
||||
|
||||
static inline float bt3dGrid_dot4(float4& a, float4& b)
|
||||
{
|
||||
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
|
||||
}
|
||||
#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
|
||||
|
||||
static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r;
|
||||
}
|
||||
#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
|
||||
|
||||
|
||||
inline float3 operator*(const float3& a, float fact)
|
||||
{
|
||||
float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
|
||||
}
|
||||
|
||||
|
||||
inline float3& operator+=(float3& a, const float3& b)
|
||||
{
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
inline float3& operator-=(float3& a, const float3& b)
|
||||
{
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
inline float3& operator*=(float3& a, float fact)
|
||||
{
|
||||
a = a * fact;
|
||||
return a;
|
||||
}
|
||||
inline float3 operator-(const float3& v)
|
||||
{
|
||||
float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
|
||||
}
|
||||
|
||||
|
||||
#define BT_GPU_FETCH(a, b) a[b]
|
||||
#define BT_GPU_FETCH4(a, b) a[b]
|
||||
#define BT_GPU_PREF(func) btGpu_##func
|
||||
#define BT_GPU_SAFE_CALL(func) func
|
||||
#define BT_GPU_Memset memset
|
||||
#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
|
||||
#define BT_GPU_BindTexture(a, b, c, d)
|
||||
#define BT_GPU_UnbindTexture(a)
|
||||
|
||||
static uint2 s_blockIdx, s_blockDim, s_threadIdx;
|
||||
#define BT_GPU_blockIdx s_blockIdx
|
||||
#define BT_GPU_blockDim s_blockDim
|
||||
#define BT_GPU_threadIdx s_threadIdx
|
||||
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
|
||||
|
||||
#define BT_GPU_CHECK_ERROR(s)
|
||||
|
||||
|
||||
#endif //BT_GPU_DEFINES_H
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// Shared code for GPU-based utilities
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// will be compiled by both CPU and CUDA compilers
|
||||
// file with definitions of BT_GPU_xxx should be included first
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
#include "btGpuUtilsSharedDefs.h"
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
//Round a / b to nearest higher integer value
|
||||
int BT_GPU_PREF(iDivUp)(int a, int b)
|
||||
{
|
||||
return (a % b != 0) ? (a / b + 1) : (a / b);
|
||||
} // iDivUp()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
// compute grid and thread block size for a given number of elements
|
||||
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
|
||||
{
|
||||
numThreads = BT_GPU_min(blockSize, n);
|
||||
numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
|
||||
} // computeGridSize()
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
// Shared definitions for GPU-based utilities
|
||||
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
// Keep this file free from Bullet headers
|
||||
// it is included into both CUDA and CPU code
|
||||
// file with definitions of BT_GPU_xxx should be included first
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
||||
#ifndef BTGPUUTILSDHAREDDEFS_H
|
||||
#define BTGPUUTILSDHAREDDEFS_H
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
|
||||
//Round a / b to nearest higher integer value
|
||||
int BT_GPU_PREF(iDivUp)(int a, int b);
|
||||
|
||||
// compute grid and thread block size for a given number of elements
|
||||
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
|
||||
|
||||
void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
|
||||
void BT_GPU_PREF(freeArray)(void* devPtr);
|
||||
void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
|
||||
void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
|
||||
void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
|
||||
void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
|
||||
void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
|
||||
#endif // BTGPUUTILSDHAREDDEFS_H
|
||||
|
||||
Reference in New Issue
Block a user