Accelerate GPU raycaster with PLBVH.
This commit is contained in:
@@ -8,6 +8,11 @@
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
|
||||
|
||||
#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
|
||||
|
||||
|
||||
@@ -20,7 +25,24 @@ struct b3GpuRaycastInternalData
|
||||
cl_context m_context;
|
||||
cl_device_id m_device;
|
||||
cl_command_queue m_q;
|
||||
cl_kernel m_raytraceKernel;
|
||||
cl_kernel m_raytraceKernel;
|
||||
cl_kernel m_raytracePairsKernel;
|
||||
cl_kernel m_findRayRigidPairIndexRanges;
|
||||
|
||||
b3GpuParallelLinearBvh* m_plbvh;
|
||||
b3RadixSort32CL* m_radixSorter;
|
||||
b3FillCL* m_fill;
|
||||
|
||||
//1 element per ray
|
||||
b3OpenCLArray<b3RayInfo>* m_gpuRays;
|
||||
b3OpenCLArray<b3RayHit>* m_gpuHitResults;
|
||||
b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
|
||||
b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
|
||||
|
||||
//1 element per (ray index, rigid index) pair
|
||||
b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
|
||||
b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs;
|
||||
|
||||
int m_test;
|
||||
};
|
||||
|
||||
@@ -31,7 +53,19 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
||||
m_data->m_device = device;
|
||||
m_data->m_q = q;
|
||||
m_data->m_raytraceKernel = 0;
|
||||
m_data->m_raytracePairsKernel = 0;
|
||||
m_data->m_findRayRigidPairIndexRanges = 0;
|
||||
|
||||
m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
|
||||
m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
|
||||
m_data->m_fill = new b3FillCL(ctx, device, q);
|
||||
|
||||
m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
|
||||
m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
|
||||
m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
|
||||
m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
|
||||
m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
|
||||
m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
|
||||
|
||||
{
|
||||
cl_int errNum=0;
|
||||
@@ -39,6 +73,10 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
clReleaseProgram(prog);
|
||||
}
|
||||
|
||||
@@ -48,6 +86,20 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
||||
b3GpuRaycast::~b3GpuRaycast()
|
||||
{
|
||||
clReleaseKernel(m_data->m_raytraceKernel);
|
||||
clReleaseKernel(m_data->m_raytracePairsKernel);
|
||||
clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
|
||||
|
||||
delete m_data->m_plbvh;
|
||||
delete m_data->m_radixSorter;
|
||||
delete m_data->m_fill;
|
||||
|
||||
delete m_data->m_gpuRays;
|
||||
delete m_data->m_gpuHitResults;
|
||||
delete m_data->m_firstRayRigidPairIndexPerRay;
|
||||
delete m_data->m_numRayRigidPairsPerRay;
|
||||
delete m_data->m_gpuNumRayRigidPairs;
|
||||
delete m_data->m_gpuRayRigidPairs;
|
||||
|
||||
delete m_data;
|
||||
}
|
||||
|
||||
@@ -206,27 +258,32 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
|
||||
}
|
||||
///todo: add some acceleration structure (AABBs, tree etc)
|
||||
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
|
||||
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
|
||||
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
|
||||
const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
|
||||
{
|
||||
|
||||
//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
|
||||
|
||||
B3_PROFILE("castRaysGPU");
|
||||
|
||||
b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
|
||||
b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("raycast copyFromHost");
|
||||
gpuRays.copyFromHost(rays);
|
||||
|
||||
|
||||
gpuHitResults.resize(hitResults.size());
|
||||
gpuHitResults.copyFromHost(hitResults);
|
||||
m_data->m_gpuRays->copyFromHost(rays);
|
||||
m_data->m_gpuHitResults->copyFromHost(hitResults);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int numRays = hitResults.size();
|
||||
{
|
||||
m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
|
||||
m_data->m_numRayRigidPairsPerRay->resize(numRays);
|
||||
|
||||
m_data->m_gpuNumRayRigidPairs->resize(1);
|
||||
m_data->m_gpuRayRigidPairs->resize(numRays * 16);
|
||||
}
|
||||
|
||||
//run kernel
|
||||
const bool USE_BRUTE_FORCE_RAYCAST = false;
|
||||
if(USE_BRUTE_FORCE_RAYCAST)
|
||||
{
|
||||
B3_PROFILE("raycast launch1D");
|
||||
|
||||
@@ -234,8 +291,8 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
|
||||
int numRays = rays.size();
|
||||
launcher.setConst(numRays);
|
||||
|
||||
launcher.setBuffer(gpuRays.getBufferCL());
|
||||
launcher.setBuffer(gpuHitResults.getBufferCL());
|
||||
launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
|
||||
launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
|
||||
|
||||
launcher.setConst(numBodies);
|
||||
launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
|
||||
@@ -246,11 +303,90 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
|
||||
launcher.launch1D(numRays);
|
||||
clFinish(m_data->m_q);
|
||||
}
|
||||
else
|
||||
{
|
||||
//printf("broadphase->getAllAabbsGPU().size(): %d \n", broadphase->getAllAabbsGPU().size());
|
||||
m_data->m_plbvh->build( broadphase->getAllAabbsGPU() );
|
||||
|
||||
m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
|
||||
|
||||
int numRayRigidPairs = -1;
|
||||
m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
|
||||
if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
|
||||
{
|
||||
numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
|
||||
m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
|
||||
}
|
||||
|
||||
m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
|
||||
|
||||
//Sort ray-rigid pairs by ray index
|
||||
{
|
||||
B3_PROFILE("sort ray-rigid pairs");
|
||||
m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
|
||||
}
|
||||
|
||||
//detect start,count of each ray pair
|
||||
{
|
||||
B3_PROFILE("detect ray-rigid pair index ranges");
|
||||
|
||||
{
|
||||
B3_PROFILE("reset ray-rigid pair index ranges");
|
||||
|
||||
m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
|
||||
m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
|
||||
clFinish(m_data->m_q);
|
||||
}
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
|
||||
|
||||
b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numRayRigidPairs);
|
||||
|
||||
launcher.launch1D(numRayRigidPairs);
|
||||
clFinish(m_data->m_q);
|
||||
}
|
||||
|
||||
{
|
||||
B3_PROFILE("ray-rigid intersection");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
|
||||
b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
|
||||
|
||||
b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
|
||||
b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
|
||||
b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
|
||||
b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
|
||||
|
||||
b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numRays);
|
||||
|
||||
launcher.launch1D(numRays);
|
||||
clFinish(m_data->m_q);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//copy results
|
||||
{
|
||||
B3_PROFILE("raycast copyToHost");
|
||||
gpuHitResults.copyToHost(hitResults);
|
||||
m_data->m_gpuHitResults->copyToHost(hitResults);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user