more work towards re-enabling grid broadphase.
This commit is contained in:
@@ -48,9 +48,9 @@ public:
|
|||||||
arraySizeZ(10),
|
arraySizeZ(10),
|
||||||
#else
|
#else
|
||||||
|
|
||||||
arraySizeX(1),
|
arraySizeX(30),
|
||||||
arraySizeY(1),
|
arraySizeY(30),
|
||||||
arraySizeZ(1),
|
arraySizeZ(30),
|
||||||
#endif
|
#endif
|
||||||
m_useConcaveMesh(false),
|
m_useConcaveMesh(false),
|
||||||
gapX(16.3),
|
gapX(16.3),
|
||||||
|
|||||||
@@ -115,7 +115,8 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
|
|||||||
|
|
||||||
m_data->m_config.m_maxConvexBodies = b3Max(m_data->m_config.m_maxConvexBodies,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ+10);
|
m_data->m_config.m_maxConvexBodies = b3Max(m_data->m_config.m_maxConvexBodies,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ+10);
|
||||||
m_data->m_config.m_maxConvexShapes = m_data->m_config.m_maxConvexBodies;
|
m_data->m_config.m_maxConvexShapes = m_data->m_config.m_maxConvexBodies;
|
||||||
m_data->m_config.m_maxBroadphasePairs = 16*m_data->m_config.m_maxConvexBodies;
|
int maxPairsPerBody = 128;
|
||||||
|
m_data->m_config.m_maxBroadphasePairs = maxPairsPerBody*m_data->m_config.m_maxConvexBodies;
|
||||||
m_data->m_config.m_maxContactCapacity = m_data->m_config.m_maxBroadphasePairs;
|
m_data->m_config.m_maxContactCapacity = m_data->m_config.m_maxBroadphasePairs;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "../../btgui/OpenGLWindow/SimpleOpenGL3App.h"
|
#include "../../btgui/OpenGLWindow/SimpleOpenGL3App.h"
|
||||||
#include "Bullet3Common/b3Vector3.h"
|
#include "Bullet3Common/b3Vector3.h"
|
||||||
#include "assert.h"
|
#include "assert.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/ParallelP
|
|||||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h" --stringname="sapCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h" --stringname="sapCL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
|
||||||
|
|
||||||
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h" --stringname="gridBroadphaseCL" stringify
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h" --stringname="satKernelsCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h" --stringname="satKernelsCL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
|
||||||
|
|||||||
@@ -1,17 +1,99 @@
|
|||||||
|
|
||||||
#include "b3GpuGridBroadphase.h"
|
#include "b3GpuGridBroadphase.h"
|
||||||
#include "Bullet3Geometry/b3AabbUtil.h"
|
#include "Bullet3Geometry/b3AabbUtil.h"
|
||||||
|
#include "kernels/gridBroadphaseKernels.h"
|
||||||
|
|
||||||
|
//#include "kernels/gridBroadphase.cl"
|
||||||
|
|
||||||
|
|
||||||
|
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||||
|
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl"
|
||||||
|
|
||||||
|
cl_kernel kCalcHashAABB;
|
||||||
|
cl_kernel kClearCellStart;
|
||||||
|
cl_kernel kFindCellStart;
|
||||||
|
cl_kernel kFindOverlappingPairs;
|
||||||
|
|
||||||
|
|
||||||
|
cl_kernel kFindPairsLarge;
|
||||||
|
cl_kernel kComputePairCacheChanges;
|
||||||
|
cl_kernel kSqueezeOverlappingPairBuff;
|
||||||
|
|
||||||
|
|
||||||
|
int maxPairsPerBody = 32;
|
||||||
|
int maxBodiesPerCell = 1024;//??
|
||||||
|
|
||||||
b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
|
b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
|
||||||
:m_context(ctx),
|
:m_context(ctx),
|
||||||
m_device(device),
|
m_device(device),
|
||||||
m_queue(q),
|
m_queue(q),
|
||||||
m_allAabbsGPU(ctx,q),
|
m_allAabbsGPU(ctx,q),
|
||||||
m_gpuPairs(ctx,q)
|
m_gpuPairs(ctx,q),
|
||||||
|
m_hashGpu(ctx,q),
|
||||||
|
m_paramsGPU(ctx,q),
|
||||||
|
m_cellStartGpu(ctx,q)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
|
b3Vector3 gridSize = b3MakeVector3(3,3,3);
|
||||||
|
b3Vector3 invGridSize = b3MakeVector3(1.f/gridSize[0],1.f/gridSize[1],1.f/gridSize[2]);
|
||||||
|
|
||||||
|
m_paramsCPU.m_gridSize[0] = 128;
|
||||||
|
m_paramsCPU.m_gridSize[1] = 128;
|
||||||
|
m_paramsCPU.m_gridSize[2] = 128;
|
||||||
|
m_paramsCPU.m_gridSize[3] = maxBodiesPerCell;
|
||||||
|
m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell);
|
||||||
|
m_paramsCPU.m_invCellSize[0] = invGridSize[0];
|
||||||
|
m_paramsCPU.m_invCellSize[1] = invGridSize[1];
|
||||||
|
m_paramsCPU.m_invCellSize[2] = invGridSize[2];
|
||||||
|
m_paramsCPU.m_invCellSize[3] = 0.f;
|
||||||
|
m_paramsGPU.push_back(m_paramsCPU);
|
||||||
|
|
||||||
|
cl_int errNum=0;
|
||||||
|
cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,0,&errNum,"",B3_GRID_BROADPHASE_PATH,true);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kCalcHashAABB",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kClearCellStart",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindCellStart",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
|
||||||
|
kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindOverlappingPairs",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kFindPairsLarge = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindPairsLarge",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kComputePairCacheChanges = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kComputePairCacheChanges",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
kSqueezeOverlappingPairBuff = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kSqueezeOverlappingPairBuff",&errNum,gridProg);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
|
||||||
|
m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue);
|
||||||
|
|
||||||
}
|
}
|
||||||
b3GpuGridBroadphase::~b3GpuGridBroadphase()
|
b3GpuGridBroadphase::~b3GpuGridBroadphase()
|
||||||
{
|
{
|
||||||
|
clReleaseKernel( kCalcHashAABB);
|
||||||
|
clReleaseKernel( kClearCellStart);
|
||||||
|
clReleaseKernel( kFindCellStart);
|
||||||
|
clReleaseKernel( kFindOverlappingPairs);
|
||||||
|
|
||||||
|
clReleaseKernel( kFindPairsLarge);
|
||||||
|
clReleaseKernel( kComputePairCacheChanges);
|
||||||
|
clReleaseKernel( kSqueezeOverlappingPairBuff);
|
||||||
|
delete m_sorter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -34,6 +116,109 @@ void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Ve
|
|||||||
void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||||
{
|
{
|
||||||
calculateOverlappingPairsHost(maxPairs);
|
calculateOverlappingPairsHost(maxPairs);
|
||||||
|
return;
|
||||||
|
int numAabbs = m_allAabbsGPU.size();
|
||||||
|
if (numAabbs)
|
||||||
|
{
|
||||||
|
m_hashGpu.resize(numAabbs);
|
||||||
|
{
|
||||||
|
b3LauncherCL launch(m_queue,kCalcHashAABB);
|
||||||
|
launch.setConst(numAabbs);
|
||||||
|
launch.setBuffer(m_allAabbsGPU.getBufferCL());
|
||||||
|
launch.setBuffer(m_hashGpu.getBufferCL());
|
||||||
|
launch.setBuffer(this->m_paramsGPU.getBufferCL());
|
||||||
|
launch.launch1D(numAabbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_sorter->execute(m_hashGpu);
|
||||||
|
|
||||||
|
int numCells = this->m_paramsCPU.m_gridSize[0]*this->m_paramsCPU.m_gridSize[1]*this->m_paramsCPU.m_gridSize[2];
|
||||||
|
m_cellStartGpu.resize(numCells);
|
||||||
|
//b3AlignedObjectArray<int > cellStartCpu;
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
b3LauncherCL launch(m_queue,kClearCellStart);
|
||||||
|
launch.setConst(numCells);
|
||||||
|
launch.setBuffer(m_cellStartGpu.getBufferCL());
|
||||||
|
launch.launch1D(numCells);
|
||||||
|
//m_cellStartGpu.copyToHost(cellStartCpu);
|
||||||
|
//printf("??\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
b3LauncherCL launch(m_queue,kFindCellStart);
|
||||||
|
launch.setConst(numAabbs);
|
||||||
|
launch.setBuffer(m_hashGpu.getBufferCL());
|
||||||
|
launch.setBuffer(m_cellStartGpu.getBufferCL());
|
||||||
|
launch.launch1D(numAabbs);
|
||||||
|
//m_cellStartGpu.copyToHost(cellStartCpu);
|
||||||
|
//printf("??\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
b3OpenCLArray<b3Int2> pairsGpu2(m_context,m_queue);
|
||||||
|
b3OpenCLArray<unsigned int> pairsGpu(m_context,m_queue);
|
||||||
|
b3OpenCLArray<unsigned int> pairStartCurGpu(m_context,m_queue);
|
||||||
|
b3AlignedObjectArray<unsigned int> pairStartCpu;
|
||||||
|
|
||||||
|
m_gpuPairs.resize(numAabbs*maxPairsPerBody);
|
||||||
|
pairsGpu2.resize(numAabbs*maxPairsPerBody);
|
||||||
|
pairsGpu.resize(numAabbs*maxPairsPerBody);
|
||||||
|
pairStartCurGpu.resize(numAabbs*2+2);
|
||||||
|
|
||||||
|
pairStartCpu.resize(numAabbs*2+2);
|
||||||
|
|
||||||
|
pairStartCpu[0] = 0;
|
||||||
|
pairStartCpu[1] = 0;
|
||||||
|
for(int i = 1; i <= numAabbs; i++)
|
||||||
|
{
|
||||||
|
pairStartCpu[i * 2] = pairStartCpu[(i-1) * 2] + maxPairsPerBody;
|
||||||
|
pairStartCpu[i * 2 + 1] = 0;
|
||||||
|
}
|
||||||
|
pairStartCurGpu.copyFromHost(pairStartCpu);
|
||||||
|
|
||||||
|
b3OpenCLArray<int> pairCount(m_context,m_queue);
|
||||||
|
pairCount.push_back(0);
|
||||||
|
|
||||||
|
b3LauncherCL launch(m_queue,kFindOverlappingPairs);
|
||||||
|
launch.setConst(numAabbs);
|
||||||
|
launch.setBuffer(m_allAabbsGPU.getBufferCL());
|
||||||
|
launch.setBuffer(m_hashGpu.getBufferCL());
|
||||||
|
launch.setBuffer(m_cellStartGpu.getBufferCL());
|
||||||
|
launch.setBuffer(pairsGpu.getBufferCL());
|
||||||
|
launch.setBuffer(pairStartCurGpu.getBufferCL());
|
||||||
|
launch.setBuffer(m_paramsGPU.getBufferCL());
|
||||||
|
//launch.setBuffer(0);
|
||||||
|
launch.setBuffer(pairCount.getBufferCL());
|
||||||
|
launch.setBuffer(m_gpuPairs.getBufferCL());
|
||||||
|
|
||||||
|
launch.launch1D(numAabbs);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int actualCount = pairCount.at(0);
|
||||||
|
//b3AlignedObjectArray<b3Int4> pairsCpu;
|
||||||
|
m_gpuPairs.resize(actualCount);
|
||||||
|
//m_gpuPairs.copyToHost(pairsCpu);
|
||||||
|
//printf("?!?\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//calculateOverlappingPairsHost(maxPairs);
|
||||||
}
|
}
|
||||||
void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2,6 +2,25 @@
|
|||||||
#define B3_GPU_GRID_BROADPHASE_H
|
#define B3_GPU_GRID_BROADPHASE_H
|
||||||
|
|
||||||
#include "b3GpuBroadphaseInterface.h"
|
#include "b3GpuBroadphaseInterface.h"
|
||||||
|
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||||
|
|
||||||
|
struct b3ParamsGridBroadphaseCL
|
||||||
|
{
|
||||||
|
|
||||||
|
float m_invCellSize[4];
|
||||||
|
int m_gridSize[4];
|
||||||
|
|
||||||
|
int getMaxBodiesPerCell() const
|
||||||
|
{
|
||||||
|
return m_gridSize[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
void setMaxBodiesPerCell(int maxOverlap)
|
||||||
|
{
|
||||||
|
m_gridSize[3] = maxOverlap;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class b3GpuGridBroadphase : public b3GpuBroadphaseInterface
|
class b3GpuGridBroadphase : public b3GpuBroadphaseInterface
|
||||||
{
|
{
|
||||||
@@ -16,6 +35,15 @@ protected:
|
|||||||
b3AlignedObjectArray<b3Int4> m_hostPairs;
|
b3AlignedObjectArray<b3Int4> m_hostPairs;
|
||||||
b3OpenCLArray<b3Int4> m_gpuPairs;
|
b3OpenCLArray<b3Int4> m_gpuPairs;
|
||||||
|
|
||||||
|
b3OpenCLArray<b3SortData> m_hashGpu;
|
||||||
|
b3OpenCLArray<int> m_cellStartGpu;
|
||||||
|
|
||||||
|
|
||||||
|
b3ParamsGridBroadphaseCL m_paramsCPU;
|
||||||
|
b3OpenCLArray<b3ParamsGridBroadphaseCL> m_paramsGPU;
|
||||||
|
|
||||||
|
class b3RadixSort32CL* m_sorter;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||||
|
|||||||
374
src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl
Normal file
374
src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl
Normal file
@@ -0,0 +1,374 @@
|
|||||||
|
|
||||||
|
|
||||||
|
int getPosHash(int4 gridPos, __global float4* pParams)
|
||||||
|
{
|
||||||
|
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||||
|
gridPos.x &= gridDim.x - 1;
|
||||||
|
gridPos.y &= gridDim.y - 1;
|
||||||
|
gridPos.z &= gridDim.z - 1;
|
||||||
|
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
int4 getGridPos(float4 worldPos, __global float4* pParams)
|
||||||
|
{
|
||||||
|
int4 gridPos;
|
||||||
|
int4 gridDim = *((__global int4*)(pParams + 1));
|
||||||
|
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
|
||||||
|
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
|
||||||
|
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
|
||||||
|
return gridPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// calculate grid hash value for each body using its AABB
|
||||||
|
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numObjects)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
float4 bbMin = pAABB[index*2];
|
||||||
|
float4 bbMax = pAABB[index*2 + 1];
|
||||||
|
float4 pos;
|
||||||
|
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||||
|
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||||
|
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||||
|
pos.w = 0.f;
|
||||||
|
// get address in grid
|
||||||
|
int4 gridPos = getGridPos(pos, pParams);
|
||||||
|
int gridHash = getPosHash(gridPos, pParams);
|
||||||
|
// store grid hash and body index
|
||||||
|
int2 hashVal;
|
||||||
|
hashVal.x = gridHash;
|
||||||
|
hashVal.y = index;
|
||||||
|
pHash[index] = hashVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kClearCellStart( int numCells,
|
||||||
|
__global int* pCellStart )
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numCells)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pCellStart[index] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )
|
||||||
|
{
|
||||||
|
__local int sharedHash[513];
|
||||||
|
int index = get_global_id(0);
|
||||||
|
int2 sortedData;
|
||||||
|
|
||||||
|
if(index < numObjects)
|
||||||
|
{
|
||||||
|
sortedData = pHash[index];
|
||||||
|
// Load hash data into shared memory so that we can look
|
||||||
|
// at neighboring body's hash value without loading
|
||||||
|
// two hash values per thread
|
||||||
|
sharedHash[get_local_id(0) + 1] = sortedData.x;
|
||||||
|
if((index > 0) && (get_local_id(0) == 0))
|
||||||
|
{
|
||||||
|
// first thread in block must load neighbor body hash
|
||||||
|
sharedHash[0] = pHash[index-1].x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
if(index < numObjects)
|
||||||
|
{
|
||||||
|
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
|
||||||
|
{
|
||||||
|
cellStart[sortedData.x] = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||||
|
{
|
||||||
|
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||||
|
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||||
|
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//search for AABB 'index' against other AABBs' in this cell
|
||||||
|
void findPairsInCell( int numObjects,
|
||||||
|
int4 gridPos,
|
||||||
|
int index,
|
||||||
|
__global int2* pHash,
|
||||||
|
__global int* pCellStart,
|
||||||
|
__global float4* pAABB,
|
||||||
|
__global int* pPairBuff,
|
||||||
|
__global int2* pPairBuffStartCurr,
|
||||||
|
__global float4* pParams,
|
||||||
|
volatile __global int* pairCount,
|
||||||
|
__global int4* pPairBuff2
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int4 pGridDim = *((__global int4*)(pParams + 1));
|
||||||
|
int maxBodiesPerCell = pGridDim.w;
|
||||||
|
int gridHash = getPosHash(gridPos, pParams);
|
||||||
|
// get start of bucket for this cell
|
||||||
|
int bucketStart = pCellStart[gridHash];
|
||||||
|
if (bucketStart == -1)
|
||||||
|
{
|
||||||
|
return; // cell empty
|
||||||
|
}
|
||||||
|
// iterate over bodies in this cell
|
||||||
|
int2 sortedData = pHash[index];
|
||||||
|
int unsorted_indx = sortedData.y;
|
||||||
|
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||||
|
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||||
|
int handleIndex = as_int(min0.w);
|
||||||
|
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||||
|
int start = start_curr.x;
|
||||||
|
int curr = start_curr.y;
|
||||||
|
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||||
|
int curr_max = start_curr_next.x - start - 1;
|
||||||
|
int bucketEnd = bucketStart + maxBodiesPerCell;
|
||||||
|
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
|
||||||
|
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
|
||||||
|
{
|
||||||
|
int2 cellData = pHash[index2];
|
||||||
|
if (cellData.x != gridHash)
|
||||||
|
{
|
||||||
|
break; // no longer in same bucket
|
||||||
|
}
|
||||||
|
int unsorted_indx2 = cellData.y;
|
||||||
|
//if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||||
|
if (unsorted_indx2 != unsorted_indx) // check not colliding with self
|
||||||
|
{
|
||||||
|
float4 min1 = pAABB[unsorted_indx2*2 + 0];
|
||||||
|
float4 max1 = pAABB[unsorted_indx2*2 + 1];
|
||||||
|
if(testAABBOverlap(min0, max0, min1, max1))
|
||||||
|
{
|
||||||
|
if (pairCount)
|
||||||
|
{
|
||||||
|
int handleIndex2 = as_int(min1.w);
|
||||||
|
//if (handleIndex<handleIndex2)
|
||||||
|
{
|
||||||
|
int curPair = atomic_add(pairCount,1);
|
||||||
|
int4 newpair;
|
||||||
|
newpair.x = handleIndex2;
|
||||||
|
newpair.y = handleIndex;
|
||||||
|
newpair.z = -1;
|
||||||
|
newpair.w = -1;
|
||||||
|
pPairBuff2[curPair] = newpair;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
int handleIndex2 = as_int(min1.w);
|
||||||
|
int k;
|
||||||
|
for(k = 0; k < curr; k++)
|
||||||
|
{
|
||||||
|
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||||
|
if(old_pair == handleIndex2)
|
||||||
|
{
|
||||||
|
pPairBuff[start+k] |= 0x40000000;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(k == curr)
|
||||||
|
{
|
||||||
|
if(curr >= curr_max)
|
||||||
|
{ // not a good solution, but let's avoid crash
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||||
|
curr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!pairCount)
|
||||||
|
{
|
||||||
|
int2 newStartCurr;
|
||||||
|
newStartCurr.x = start;
|
||||||
|
newStartCurr.y = curr;
|
||||||
|
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kFindOverlappingPairs( int numObjects,
|
||||||
|
__global float4* pAABB,
|
||||||
|
__global int2* pHash,
|
||||||
|
__global int* pCellStart,
|
||||||
|
__global int* pPairBuff,
|
||||||
|
__global int2* pPairBuffStartCurr,
|
||||||
|
__global float4* pParams ,
|
||||||
|
volatile __global int* pairCount,
|
||||||
|
__global int4* pPairBuff2
|
||||||
|
)
|
||||||
|
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numObjects)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int2 sortedData = pHash[index];
|
||||||
|
int unsorted_indx = sortedData.y;
|
||||||
|
float4 bbMin = pAABB[unsorted_indx*2 + 0];
|
||||||
|
float4 bbMax = pAABB[unsorted_indx*2 + 1];
|
||||||
|
float4 pos;
|
||||||
|
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||||
|
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||||
|
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||||
|
// get address in grid
|
||||||
|
int4 gridPosA = getGridPos(pos, pParams);
|
||||||
|
int4 gridPosB;
|
||||||
|
// examine only neighbouring cells
|
||||||
|
for(int z=0; z<=1; z++)
|
||||||
|
{
|
||||||
|
gridPosB.z = gridPosA.z + z;
|
||||||
|
for(int y=0; y<=1; y++)
|
||||||
|
{
|
||||||
|
gridPosB.y = gridPosA.y + y;
|
||||||
|
for(int x=0; x<=1; x++)
|
||||||
|
{
|
||||||
|
gridPosB.x = gridPosA.x + x;
|
||||||
|
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams, pairCount,pPairBuff2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void kFindPairsLarge( int numObjects,
|
||||||
|
__global float4* pAABB,
|
||||||
|
__global int2* pHash,
|
||||||
|
__global int* pCellStart,
|
||||||
|
__global int* pPairBuff,
|
||||||
|
__global int2* pPairBuffStartCurr,
|
||||||
|
uint numLarge )
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numObjects)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int2 sortedData = pHash[index];
|
||||||
|
int unsorted_indx = sortedData.y;
|
||||||
|
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||||
|
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||||
|
int handleIndex = as_int(min0.w);
|
||||||
|
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||||
|
int start = start_curr.x;
|
||||||
|
int curr = start_curr.y;
|
||||||
|
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
|
||||||
|
int curr_max = start_curr_next.x - start - 1;
|
||||||
|
for(uint i = 0; i < numLarge; i++)
|
||||||
|
{
|
||||||
|
int indx2 = numObjects + i;
|
||||||
|
float4 min1 = pAABB[indx2*2 + 0];
|
||||||
|
float4 max1 = pAABB[indx2*2 + 1];
|
||||||
|
if(testAABBOverlap(min0, max0, min1, max1))
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
int handleIndex2 = as_int(min1.w);
|
||||||
|
for(k = 0; k < curr; k++)
|
||||||
|
{
|
||||||
|
int old_pair = pPairBuff[start+k] & (~0x60000000);
|
||||||
|
if(old_pair == handleIndex2)
|
||||||
|
{
|
||||||
|
pPairBuff[start+k] |= 0x40000000;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(k == curr)
|
||||||
|
{
|
||||||
|
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
|
||||||
|
if(curr >= curr_max)
|
||||||
|
{ // not a good solution, but let's avoid crash
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
curr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int2 newStartCurr;
|
||||||
|
newStartCurr.x = start;
|
||||||
|
newStartCurr.y = curr;
|
||||||
|
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kComputePairCacheChanges( int numObjects,
|
||||||
|
__global int* pPairBuff,
|
||||||
|
__global int2* pPairBuffStartCurr,
|
||||||
|
__global int* pPairScan,
|
||||||
|
__global float4* pAABB )
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numObjects)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
float4 bbMin = pAABB[index * 2];
|
||||||
|
int handleIndex = as_int(bbMin.w);
|
||||||
|
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||||
|
int start = start_curr.x;
|
||||||
|
int curr = start_curr.y;
|
||||||
|
__global int *pInp = pPairBuff + start;
|
||||||
|
int num_changes = 0;
|
||||||
|
for(int k = 0; k < curr; k++, pInp++)
|
||||||
|
{
|
||||||
|
if(!((*pInp) & 0x40000000))
|
||||||
|
{
|
||||||
|
num_changes++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pPairScan[index+1] = num_changes;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kSqueezeOverlappingPairBuff( int numObjects,
|
||||||
|
__global int* pPairBuff,
|
||||||
|
__global int2* pPairBuffStartCurr,
|
||||||
|
__global int* pPairScan,
|
||||||
|
__global int* pPairOut,
|
||||||
|
__global float4* pAABB )
|
||||||
|
{
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index >= numObjects)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
float4 bbMin = pAABB[index * 2];
|
||||||
|
int handleIndex = as_int(bbMin.w);
|
||||||
|
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||||
|
int start = start_curr.x;
|
||||||
|
int curr = start_curr.y;
|
||||||
|
__global int* pInp = pPairBuff + start;
|
||||||
|
__global int* pOut = pPairOut + pPairScan[index+1];
|
||||||
|
__global int* pOut2 = pInp;
|
||||||
|
int num = 0;
|
||||||
|
for(int k = 0; k < curr; k++, pInp++)
|
||||||
|
{
|
||||||
|
if(!((*pInp) & 0x40000000))
|
||||||
|
{
|
||||||
|
*pOut = *pInp;
|
||||||
|
pOut++;
|
||||||
|
}
|
||||||
|
if((*pInp) & 0x60000000)
|
||||||
|
{
|
||||||
|
*pOut2 = (*pInp) & (~0x60000000);
|
||||||
|
pOut2++;
|
||||||
|
num++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int2 newStartCurr;
|
||||||
|
newStartCurr.x = start;
|
||||||
|
newStartCurr.y = num;
|
||||||
|
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,331 @@
|
|||||||
|
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||||
|
static const char* gridBroadphaseCL= \
|
||||||
|
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||||
|
"int getPosHash(int4 gridPos, __global float4* pParams)\n"
|
||||||
|
"{\n"
|
||||||
|
" int4 gridDim = *((__global int4*)(pParams + 1));\n"
|
||||||
|
" gridPos.x &= gridDim.x - 1;\n"
|
||||||
|
" gridPos.y &= gridDim.y - 1;\n"
|
||||||
|
" gridPos.z &= gridDim.z - 1;\n"
|
||||||
|
" int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n"
|
||||||
|
" return hash;\n"
|
||||||
|
"} \n"
|
||||||
|
"int4 getGridPos(float4 worldPos, __global float4* pParams)\n"
|
||||||
|
"{\n"
|
||||||
|
" int4 gridPos;\n"
|
||||||
|
" int4 gridDim = *((__global int4*)(pParams + 1));\n"
|
||||||
|
" gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n"
|
||||||
|
" gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n"
|
||||||
|
" gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n"
|
||||||
|
" return gridPos;\n"
|
||||||
|
"}\n"
|
||||||
|
"// calculate grid hash value for each body using its AABB\n"
|
||||||
|
"__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if(index >= numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" float4 bbMin = pAABB[index*2];\n"
|
||||||
|
" float4 bbMax = pAABB[index*2 + 1];\n"
|
||||||
|
" float4 pos;\n"
|
||||||
|
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
|
||||||
|
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
|
||||||
|
" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
|
||||||
|
" pos.w = 0.f;\n"
|
||||||
|
" // get address in grid\n"
|
||||||
|
" int4 gridPos = getGridPos(pos, pParams);\n"
|
||||||
|
" int gridHash = getPosHash(gridPos, pParams);\n"
|
||||||
|
" // store grid hash and body index\n"
|
||||||
|
" int2 hashVal;\n"
|
||||||
|
" hashVal.x = gridHash;\n"
|
||||||
|
" hashVal.y = index;\n"
|
||||||
|
" pHash[index] = hashVal;\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void kClearCellStart( int numCells, \n"
|
||||||
|
" __global int* pCellStart )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" \n"
|
||||||
|
" if(index >= numCells)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" pCellStart[index] = -1;\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n"
|
||||||
|
"{\n"
|
||||||
|
" __local int sharedHash[513];\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" int2 sortedData;\n"
|
||||||
|
" if(index < numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" sortedData = pHash[index];\n"
|
||||||
|
" // Load hash data into shared memory so that we can look \n"
|
||||||
|
" // at neighboring body's hash value without loading\n"
|
||||||
|
" // two hash values per thread\n"
|
||||||
|
" sharedHash[get_local_id(0) + 1] = sortedData.x;\n"
|
||||||
|
" if((index > 0) && (get_local_id(0) == 0))\n"
|
||||||
|
" {\n"
|
||||||
|
" printf(\"%d sharedHash!\\n\", index);\n"
|
||||||
|
" // first thread in block must load neighbor body hash\n"
|
||||||
|
" sharedHash[0] = pHash[index-1].x;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||||
|
" if(index < numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n"
|
||||||
|
" {\n"
|
||||||
|
" printf(\"%d cellStart!\\n\", index);\n"
|
||||||
|
" cellStart[sortedData.x] = index;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n"
|
||||||
|
"{\n"
|
||||||
|
" return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n"
|
||||||
|
" (min0.y <= max1.y)&& (min1.y <= max0.y) && \n"
|
||||||
|
" (min0.z <= max1.z)&& (min1.z <= max0.z); \n"
|
||||||
|
"}\n"
|
||||||
|
"void findPairsInCell( int numObjects,\n"
|
||||||
|
" int4 gridPos,\n"
|
||||||
|
" int index,\n"
|
||||||
|
" __global int2* pHash,\n"
|
||||||
|
" __global int* pCellStart,\n"
|
||||||
|
" __global float4* pAABB, \n"
|
||||||
|
" __global int* pPairBuff,\n"
|
||||||
|
" __global int2* pPairBuffStartCurr,\n"
|
||||||
|
" __global float4* pParams)\n"
|
||||||
|
"{\n"
|
||||||
|
" int4 pGridDim = *((__global int4*)(pParams + 1));\n"
|
||||||
|
" int maxBodiesPerCell = pGridDim.w;\n"
|
||||||
|
" int gridHash = getPosHash(gridPos, pParams);\n"
|
||||||
|
" // get start of bucket for this cell\n"
|
||||||
|
" int bucketStart = pCellStart[gridHash];\n"
|
||||||
|
" if (bucketStart == -1)\n"
|
||||||
|
" {\n"
|
||||||
|
" return; // cell empty\n"
|
||||||
|
" }\n"
|
||||||
|
" // iterate over bodies in this cell\n"
|
||||||
|
" int2 sortedData = pHash[index];\n"
|
||||||
|
" int unsorted_indx = sortedData.y;\n"
|
||||||
|
" float4 min0 = pAABB[unsorted_indx*2 + 0]; \n"
|
||||||
|
" float4 max0 = pAABB[unsorted_indx*2 + 1];\n"
|
||||||
|
" int handleIndex = as_int(min0.w);\n"
|
||||||
|
" int2 start_curr = pPairBuffStartCurr[handleIndex];\n"
|
||||||
|
" int start = start_curr.x;\n"
|
||||||
|
" int curr = start_curr.y;\n"
|
||||||
|
" int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];\n"
|
||||||
|
" int curr_max = start_curr_next.x - start - 1;\n"
|
||||||
|
" int bucketEnd = bucketStart + maxBodiesPerCell;\n"
|
||||||
|
" bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n"
|
||||||
|
" for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n"
|
||||||
|
" {\n"
|
||||||
|
" int2 cellData = pHash[index2];\n"
|
||||||
|
" if (cellData.x != gridHash)\n"
|
||||||
|
" {\n"
|
||||||
|
" break; // no longer in same bucket\n"
|
||||||
|
" }\n"
|
||||||
|
" int unsorted_indx2 = cellData.y;\n"
|
||||||
|
" if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
|
||||||
|
" { \n"
|
||||||
|
" float4 min1 = pAABB[unsorted_indx2*2 + 0];\n"
|
||||||
|
" float4 max1 = pAABB[unsorted_indx2*2 + 1];\n"
|
||||||
|
" if(testAABBOverlap(min0, max0, min1, max1))\n"
|
||||||
|
" {\n"
|
||||||
|
" int handleIndex2 = as_int(min1.w);\n"
|
||||||
|
" int k;\n"
|
||||||
|
" for(k = 0; k < curr; k++)\n"
|
||||||
|
" {\n"
|
||||||
|
" int old_pair = pPairBuff[start+k] & (~0x60000000);\n"
|
||||||
|
" if(old_pair == handleIndex2)\n"
|
||||||
|
" {\n"
|
||||||
|
" pPairBuff[start+k] |= 0x40000000;\n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" if(k == curr)\n"
|
||||||
|
" {\n"
|
||||||
|
" if(curr >= curr_max) \n"
|
||||||
|
" { // not a good solution, but let's avoid crash\n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" pPairBuff[start+curr] = handleIndex2 | 0x20000000;\n"
|
||||||
|
" curr++;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" int2 newStartCurr;\n"
|
||||||
|
" newStartCurr.x = start;\n"
|
||||||
|
" newStartCurr.y = curr;\n"
|
||||||
|
" pPairBuffStartCurr[handleIndex] = newStartCurr;\n"
|
||||||
|
" return;\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void kFindOverlappingPairs( int numObjects,\n"
|
||||||
|
" __global float4* pAABB, \n"
|
||||||
|
" __global int2* pHash, \n"
|
||||||
|
" __global int* pCellStart, \n"
|
||||||
|
" __global int* pPairBuff, \n"
|
||||||
|
" __global int2* pPairBuffStartCurr, \n"
|
||||||
|
" __global float4* pParams )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if(index >= numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" int2 sortedData = pHash[index];\n"
|
||||||
|
" int unsorted_indx = sortedData.y;\n"
|
||||||
|
" float4 bbMin = pAABB[unsorted_indx*2 + 0];\n"
|
||||||
|
" float4 bbMax = pAABB[unsorted_indx*2 + 1];\n"
|
||||||
|
" float4 pos;\n"
|
||||||
|
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
|
||||||
|
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
|
||||||
|
" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
|
||||||
|
" // get address in grid\n"
|
||||||
|
" int4 gridPosA = getGridPos(pos, pParams);\n"
|
||||||
|
" int4 gridPosB; \n"
|
||||||
|
" // examine only neighbouring cells\n"
|
||||||
|
" for(int z=-1; z<=1; z++) \n"
|
||||||
|
" {\n"
|
||||||
|
" gridPosB.z = gridPosA.z + z;\n"
|
||||||
|
" for(int y=-1; y<=1; y++) \n"
|
||||||
|
" {\n"
|
||||||
|
" gridPosB.y = gridPosA.y + y;\n"
|
||||||
|
" for(int x=-1; x<=1; x++) \n"
|
||||||
|
" {\n"
|
||||||
|
" gridPosB.x = gridPosA.x + x;\n"
|
||||||
|
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void kFindPairsLarge( int numObjects, \n"
|
||||||
|
" __global float4* pAABB, \n"
|
||||||
|
" __global int2* pHash, \n"
|
||||||
|
" __global int* pCellStart, \n"
|
||||||
|
" __global int* pPairBuff, \n"
|
||||||
|
" __global int2* pPairBuffStartCurr, \n"
|
||||||
|
" uint numLarge )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if(index >= numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" int2 sortedData = pHash[index];\n"
|
||||||
|
" int unsorted_indx = sortedData.y;\n"
|
||||||
|
" float4 min0 = pAABB[unsorted_indx*2 + 0];\n"
|
||||||
|
" float4 max0 = pAABB[unsorted_indx*2 + 1];\n"
|
||||||
|
" int handleIndex = as_int(min0.w);\n"
|
||||||
|
" int2 start_curr = pPairBuffStartCurr[handleIndex];\n"
|
||||||
|
" int start = start_curr.x;\n"
|
||||||
|
" int curr = start_curr.y;\n"
|
||||||
|
" int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];\n"
|
||||||
|
" int curr_max = start_curr_next.x - start - 1;\n"
|
||||||
|
" for(uint i = 0; i < numLarge; i++)\n"
|
||||||
|
" {\n"
|
||||||
|
" int indx2 = numObjects + i;\n"
|
||||||
|
" float4 min1 = pAABB[indx2*2 + 0];\n"
|
||||||
|
" float4 max1 = pAABB[indx2*2 + 1];\n"
|
||||||
|
" if(testAABBOverlap(min0, max0, min1, max1))\n"
|
||||||
|
" {\n"
|
||||||
|
" int k;\n"
|
||||||
|
" int handleIndex2 = as_int(min1.w);\n"
|
||||||
|
" for(k = 0; k < curr; k++)\n"
|
||||||
|
" {\n"
|
||||||
|
" int old_pair = pPairBuff[start+k] & (~0x60000000);\n"
|
||||||
|
" if(old_pair == handleIndex2)\n"
|
||||||
|
" {\n"
|
||||||
|
" pPairBuff[start+k] |= 0x40000000;\n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" if(k == curr)\n"
|
||||||
|
" {\n"
|
||||||
|
" pPairBuff[start+curr] = handleIndex2 | 0x20000000;\n"
|
||||||
|
" if(curr >= curr_max) \n"
|
||||||
|
" { // not a good solution, but let's avoid crash\n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" curr++;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" int2 newStartCurr;\n"
|
||||||
|
" newStartCurr.x = start;\n"
|
||||||
|
" newStartCurr.y = curr;\n"
|
||||||
|
" pPairBuffStartCurr[handleIndex] = newStartCurr;\n"
|
||||||
|
" return;\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void kComputePairCacheChanges( int numObjects,\n"
|
||||||
|
" __global int* pPairBuff, \n"
|
||||||
|
" __global int2* pPairBuffStartCurr, \n"
|
||||||
|
" __global int* pPairScan, \n"
|
||||||
|
" __global float4* pAABB )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if(index >= numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" float4 bbMin = pAABB[index * 2];\n"
|
||||||
|
" int handleIndex = as_int(bbMin.w);\n"
|
||||||
|
" int2 start_curr = pPairBuffStartCurr[handleIndex];\n"
|
||||||
|
" int start = start_curr.x;\n"
|
||||||
|
" int curr = start_curr.y;\n"
|
||||||
|
" __global int *pInp = pPairBuff + start;\n"
|
||||||
|
" int num_changes = 0;\n"
|
||||||
|
" for(int k = 0; k < curr; k++, pInp++)\n"
|
||||||
|
" {\n"
|
||||||
|
" if(!((*pInp) & 0x40000000))\n"
|
||||||
|
" {\n"
|
||||||
|
" num_changes++;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" pPairScan[index+1] = num_changes;\n"
|
||||||
|
"} \n"
|
||||||
|
"__kernel void kSqueezeOverlappingPairBuff( int numObjects,\n"
|
||||||
|
" __global int* pPairBuff, \n"
|
||||||
|
" __global int2* pPairBuffStartCurr, \n"
|
||||||
|
" __global int* pPairScan,\n"
|
||||||
|
" __global int* pPairOut, \n"
|
||||||
|
" __global float4* pAABB )\n"
|
||||||
|
"{\n"
|
||||||
|
" int index = get_global_id(0);\n"
|
||||||
|
" if(index >= numObjects)\n"
|
||||||
|
" {\n"
|
||||||
|
" return;\n"
|
||||||
|
" }\n"
|
||||||
|
" float4 bbMin = pAABB[index * 2];\n"
|
||||||
|
" int handleIndex = as_int(bbMin.w);\n"
|
||||||
|
" int2 start_curr = pPairBuffStartCurr[handleIndex];\n"
|
||||||
|
" int start = start_curr.x;\n"
|
||||||
|
" int curr = start_curr.y;\n"
|
||||||
|
" __global int* pInp = pPairBuff + start;\n"
|
||||||
|
" __global int* pOut = pPairOut + pPairScan[index+1];\n"
|
||||||
|
" __global int* pOut2 = pInp;\n"
|
||||||
|
" int num = 0; \n"
|
||||||
|
" for(int k = 0; k < curr; k++, pInp++)\n"
|
||||||
|
" {\n"
|
||||||
|
" if(!((*pInp) & 0x40000000))\n"
|
||||||
|
" {\n"
|
||||||
|
" *pOut = *pInp;\n"
|
||||||
|
" pOut++;\n"
|
||||||
|
" }\n"
|
||||||
|
" if((*pInp) & 0x60000000)\n"
|
||||||
|
" {\n"
|
||||||
|
" *pOut2 = (*pInp) & (~0x60000000);\n"
|
||||||
|
" pOut2++;\n"
|
||||||
|
" num++;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" int2 newStartCurr;\n"
|
||||||
|
" newStartCurr.x = start;\n"
|
||||||
|
" newStartCurr.y = num;\n"
|
||||||
|
" pPairBuffStartCurr[handleIndex] = newStartCurr;\n"
|
||||||
|
"}\n"
|
||||||
|
;
|
||||||
Reference in New Issue
Block a user