Accelerate GPU raycaster with PLBVH.
This commit is contained in:
@@ -308,7 +308,7 @@ void GpuRaytraceScene::renderScene2()
|
|||||||
{
|
{
|
||||||
B3_PROFILE("cast primary rays");
|
B3_PROFILE("cast primary rays");
|
||||||
//m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(),m_data->m_np->getInternalData());
|
//m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(),m_data->m_np->getInternalData());
|
||||||
m_raycaster->castRays(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData());
|
m_raycaster->castRays(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData(), m_data->m_bp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -350,7 +350,7 @@ void GpuRaytraceScene::renderScene2()
|
|||||||
{
|
{
|
||||||
B3_PROFILE("cast shadow rays");
|
B3_PROFILE("cast shadow rays");
|
||||||
//m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu());
|
//m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu());
|
||||||
m_raycaster->castRays(shadowRays, shadowHits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData());
|
m_raycaster->castRays(shadowRays, shadowHits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData(), m_data->m_bp);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ subject to the following restrictions:
|
|||||||
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
|
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
|
||||||
#include "Bullet3Common/shared/b3Int2.h"
|
#include "Bullet3Common/shared/b3Int2.h"
|
||||||
#include "Bullet3Common/shared/b3Int4.h"
|
#include "Bullet3Common/shared/b3Int4.h"
|
||||||
|
#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
|
||||||
|
|
||||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||||
@@ -59,6 +60,7 @@ class b3GpuParallelLinearBvh
|
|||||||
cl_kernel m_determineInternalNodeAabbsKernel;
|
cl_kernel m_determineInternalNodeAabbsKernel;
|
||||||
|
|
||||||
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
||||||
|
cl_kernel m_plbvhRayTraverseKernel;
|
||||||
|
|
||||||
b3FillCL m_fill;
|
b3FillCL m_fill;
|
||||||
b3RadixSort32CL m_radixSorter;
|
b3RadixSort32CL m_radixSorter;
|
||||||
@@ -79,6 +81,7 @@ class b3GpuParallelLinearBvh
|
|||||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
||||||
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
||||||
|
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||||
@@ -94,7 +97,8 @@ public:
|
|||||||
m_internalNodeParentNodes(context, queue),
|
m_internalNodeParentNodes(context, queue),
|
||||||
m_leafNodeParentNodes(context, queue),
|
m_leafNodeParentNodes(context, queue),
|
||||||
m_mortonCodesAndAabbIndicies(context, queue),
|
m_mortonCodesAndAabbIndicies(context, queue),
|
||||||
m_mergedAabb(context, queue)
|
m_mergedAabb(context, queue),
|
||||||
|
m_leafNodeAabbs(context, queue)
|
||||||
{
|
{
|
||||||
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
||||||
|
|
||||||
@@ -115,6 +119,8 @@ public:
|
|||||||
|
|
||||||
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
||||||
|
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
|
b3Assert(m_plbvhRayTraverseKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~b3GpuParallelLinearBvh()
|
virtual ~b3GpuParallelLinearBvh()
|
||||||
@@ -125,6 +131,7 @@ public:
|
|||||||
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
||||||
|
|
||||||
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
||||||
|
clReleaseKernel(m_plbvhRayTraverseKernel);
|
||||||
|
|
||||||
clReleaseProgram(m_parallelLinearBvhProgram);
|
clReleaseProgram(m_parallelLinearBvhProgram);
|
||||||
}
|
}
|
||||||
@@ -148,8 +155,12 @@ public:
|
|||||||
m_leafNodeParentNodes.resize(numLeaves);
|
m_leafNodeParentNodes.resize(numLeaves);
|
||||||
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
||||||
m_mergedAabb.resize(numLeaves);
|
m_mergedAabb.resize(numLeaves);
|
||||||
|
m_leafNodeAabbs.resize(numLeaves);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs);
|
||||||
|
|
||||||
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
||||||
//The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes)
|
//The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes)
|
||||||
int numLevels = 0;
|
int numLevels = 0;
|
||||||
@@ -168,7 +179,7 @@ public:
|
|||||||
if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
|
if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
//Determine number of internal nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
||||||
{
|
{
|
||||||
B3_PROFILE("Determine number of nodes per level");
|
B3_PROFILE("Determine number of nodes per level");
|
||||||
|
|
||||||
@@ -329,7 +340,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
//For each internal node, check children to get its AABB; start from the
|
//For each internal node, check children to get its AABB; start from the
|
||||||
//last level and move towards the root
|
//last level, which contains the leaves, and move towards the root
|
||||||
{
|
{
|
||||||
B3_PROFILE("Set AABBs");
|
B3_PROFILE("Set AABBs");
|
||||||
|
|
||||||
@@ -416,10 +427,12 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Max number of pairs is out_overlappingPairs.size()
|
///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses
|
||||||
//If the number of overlapping pairs is < out_overlappingPairs.size(), the array is resized
|
///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
|
||||||
void calculateOverlappingPairs(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs,
|
///@param out_numPairs If number of pairs exceeds the max number of pairs, this is clamped to the max number.
|
||||||
b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
|
///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
|
||||||
|
///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
|
||||||
|
void calculateOverlappingPairs(b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
|
||||||
{
|
{
|
||||||
b3Assert( out_numPairs.size() == 1 );
|
b3Assert( out_numPairs.size() == 1 );
|
||||||
|
|
||||||
@@ -431,11 +444,11 @@ public:
|
|||||||
{
|
{
|
||||||
B3_PROFILE("PLBVH calculateOverlappingPairs");
|
B3_PROFILE("PLBVH calculateOverlappingPairs");
|
||||||
|
|
||||||
int numQueryAabbs = worldSpaceAabbs.size();
|
int numQueryAabbs = m_leafNodeAabbs.size();
|
||||||
|
|
||||||
b3BufferInfoCL bufferInfo[] =
|
b3BufferInfoCL bufferInfo[] =
|
||||||
{
|
{
|
||||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||||
|
|
||||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
|
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
|
||||||
@@ -468,6 +481,53 @@ public:
|
|||||||
|
|
||||||
out_overlappingPairs.resize(numPairs);
|
out_overlappingPairs.resize(numPairs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
|
||||||
|
///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
|
||||||
|
///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
|
||||||
|
///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
|
||||||
|
void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
|
||||||
|
b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs)
|
||||||
|
{
|
||||||
|
B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()");
|
||||||
|
|
||||||
|
int numRays = rays.size();
|
||||||
|
int maxRayRigidPairs = out_rayRigidPairs.size();
|
||||||
|
|
||||||
|
int reset = 0;
|
||||||
|
out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( rays.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( out_rayRigidPairs.getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(maxRayRigidPairs);
|
||||||
|
launcher.setConst(numRays);
|
||||||
|
|
||||||
|
launcher.launch1D(numRays);
|
||||||
|
clFinish(m_queue);
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
int numRayRigidPairs = -1;
|
||||||
|
out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1);
|
||||||
|
|
||||||
|
if(numRayRigidPairs > maxRayRigidPairs)
|
||||||
|
b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs);
|
||||||
|
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ public:
|
|||||||
|
|
||||||
//
|
//
|
||||||
m_overlappingPairsGpu.resize(maxPairs);
|
m_overlappingPairsGpu.resize(maxPairs);
|
||||||
m_plbvh.calculateOverlappingPairs(m_aabbsGpu, m_tempNumPairs, m_overlappingPairsGpu);
|
m_plbvh.calculateOverlappingPairs(m_tempNumPairs, m_overlappingPairsGpu);
|
||||||
}
|
}
|
||||||
virtual void calculateOverlappingPairsHost(int maxPairs)
|
virtual void calculateOverlappingPairsHost(int maxPairs)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ typedef float b3Scalar;
|
|||||||
typedef float4 b3Vector3;
|
typedef float4 b3Vector3;
|
||||||
#define b3Max max
|
#define b3Max max
|
||||||
#define b3Min min
|
#define b3Min min
|
||||||
|
#define b3Sqrt sqrt
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@@ -388,3 +389,161 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//From rayCastKernels.cl
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 m_from;
|
||||||
|
float4 m_to;
|
||||||
|
} b3RayInfo;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float m_hitFraction;
|
||||||
|
int m_hitResult0;
|
||||||
|
int m_hitResult1;
|
||||||
|
int m_hitResult2;
|
||||||
|
float4 m_hitPoint;
|
||||||
|
float4 m_hitNormal;
|
||||||
|
} b3RayHit;
|
||||||
|
//From rayCastKernels.cl
|
||||||
|
|
||||||
|
b3Vector3 b3Vector3_normalize(b3Vector3 v)
|
||||||
|
{
|
||||||
|
b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};
|
||||||
|
return normalize(normal); //OpenCL normalize == vector4 normalize
|
||||||
|
}
|
||||||
|
b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }
|
||||||
|
b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
|
||||||
|
int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)
|
||||||
|
{
|
||||||
|
// not functional -- need to fix
|
||||||
|
|
||||||
|
//aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} )
|
||||||
|
//t_min is the first intersection, t_max is the second intersection
|
||||||
|
b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection;
|
||||||
|
int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y)
|
||||||
|
|
||||||
|
//select(b, a, condition) == condition ? a : b
|
||||||
|
b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection;
|
||||||
|
b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection;
|
||||||
|
|
||||||
|
b3Scalar t_min_final = 0.0f;
|
||||||
|
b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
|
||||||
|
|
||||||
|
//Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned.
|
||||||
|
//Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])
|
||||||
|
//Since the innermost fmin()/fmax() is always not NaN, this should never return NaN
|
||||||
|
t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );
|
||||||
|
t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );
|
||||||
|
|
||||||
|
return (t_min_final <= t_max_final);
|
||||||
|
}
|
||||||
|
**/
|
||||||
|
|
||||||
|
void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection,
|
||||||
|
b3Scalar planeMin, b3Scalar planeMax,
|
||||||
|
b3Scalar* out_t_min, b3Scalar* out_t_max)
|
||||||
|
{
|
||||||
|
if(rayNormalizedDirection < 0.0f)
|
||||||
|
{
|
||||||
|
//max is closer, min is farther
|
||||||
|
*out_t_min = (planeMax - rayStart) / rayNormalizedDirection;
|
||||||
|
*out_t_max = (planeMin - rayStart) / rayNormalizedDirection;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//min is closer, max is farther
|
||||||
|
*out_t_min = (planeMin - rayStart) / rayNormalizedDirection;
|
||||||
|
*out_t_max = (planeMax - rayStart) / rayNormalizedDirection;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)
|
||||||
|
{
|
||||||
|
b3Scalar t_min_x, t_min_y, t_min_z;
|
||||||
|
b3Scalar t_max_x, t_max_y, t_max_z;
|
||||||
|
|
||||||
|
rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x);
|
||||||
|
rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y);
|
||||||
|
rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z);
|
||||||
|
|
||||||
|
b3Scalar t_min_final = 0.0f;
|
||||||
|
b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
|
||||||
|
|
||||||
|
t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) );
|
||||||
|
t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) );
|
||||||
|
|
||||||
|
return (t_min_final <= t_max_final);
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,
|
||||||
|
__global int2* internalNodeChildIndices,
|
||||||
|
__global b3AabbCL* internalNodeAabbs,
|
||||||
|
__global int2* internalNodeLeafIndexRanges,
|
||||||
|
__global SortDataCL* mortonCodesAndAabbIndices,
|
||||||
|
|
||||||
|
__global b3RayInfo* rays,
|
||||||
|
|
||||||
|
__global int* out_numRayRigidPairs,
|
||||||
|
__global int2* out_rayRigidPairs,
|
||||||
|
int maxRayRigidPairs, int numRays)
|
||||||
|
{
|
||||||
|
int rayIndex = get_global_id(0);
|
||||||
|
if(rayIndex >= numRays) return;
|
||||||
|
|
||||||
|
b3Vector3 rayFrom = rays[rayIndex].m_from;
|
||||||
|
b3Vector3 rayTo = rays[rayIndex].m_to;
|
||||||
|
b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from);
|
||||||
|
|
||||||
|
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
|
||||||
|
|
||||||
|
//Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)
|
||||||
|
int stackSize = 2;
|
||||||
|
stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;
|
||||||
|
stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;
|
||||||
|
|
||||||
|
while(stackSize)
|
||||||
|
{
|
||||||
|
int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
|
||||||
|
--stackSize;
|
||||||
|
|
||||||
|
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
|
||||||
|
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
|
||||||
|
|
||||||
|
//bvhRigidIndex is not used if internal node
|
||||||
|
int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
|
||||||
|
|
||||||
|
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
|
||||||
|
|
||||||
|
if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) )
|
||||||
|
{
|
||||||
|
if(isLeaf)
|
||||||
|
{
|
||||||
|
int2 rayRigidPair;
|
||||||
|
rayRigidPair.x = rayIndex;
|
||||||
|
rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
|
||||||
|
|
||||||
|
int pairIndex = atomic_inc(out_numRayRigidPairs);
|
||||||
|
if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!isLeaf) //Internal node
|
||||||
|
{
|
||||||
|
if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
|
||||||
|
{
|
||||||
|
//Error
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
|
||||||
|
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ static const char* parallelLinearBvhCL= \
|
|||||||
"typedef float4 b3Vector3;\n"
|
"typedef float4 b3Vector3;\n"
|
||||||
"#define b3Max max\n"
|
"#define b3Max max\n"
|
||||||
"#define b3Min min\n"
|
"#define b3Min min\n"
|
||||||
|
"#define b3Sqrt sqrt\n"
|
||||||
"typedef struct\n"
|
"typedef struct\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" unsigned int m_key;\n"
|
" unsigned int m_key;\n"
|
||||||
@@ -372,4 +373,151 @@ static const char* parallelLinearBvhCL= \
|
|||||||
" \n"
|
" \n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"//From rayCastKernels.cl\n"
|
||||||
|
"typedef struct\n"
|
||||||
|
"{\n"
|
||||||
|
" float4 m_from;\n"
|
||||||
|
" float4 m_to;\n"
|
||||||
|
"} b3RayInfo;\n"
|
||||||
|
"typedef struct\n"
|
||||||
|
"{\n"
|
||||||
|
" float m_hitFraction;\n"
|
||||||
|
" int m_hitResult0;\n"
|
||||||
|
" int m_hitResult1;\n"
|
||||||
|
" int m_hitResult2;\n"
|
||||||
|
" float4 m_hitPoint;\n"
|
||||||
|
" float4 m_hitNormal;\n"
|
||||||
|
"} b3RayHit;\n"
|
||||||
|
"//From rayCastKernels.cl\n"
|
||||||
|
"b3Vector3 b3Vector3_normalize(b3Vector3 v)\n"
|
||||||
|
"{\n"
|
||||||
|
" b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n"
|
||||||
|
" return normalize(normal); //OpenCL normalize == vector4 normalize\n"
|
||||||
|
"}\n"
|
||||||
|
"b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n"
|
||||||
|
"b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n"
|
||||||
|
"/**\n"
|
||||||
|
"int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n"
|
||||||
|
"{\n"
|
||||||
|
" // not functional -- need to fix\n"
|
||||||
|
" //aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} )\n"
|
||||||
|
" //t_min is the first intersection, t_max is the second intersection\n"
|
||||||
|
" b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection;\n"
|
||||||
|
" int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y)\n"
|
||||||
|
" \n"
|
||||||
|
" //select(b, a, condition) == condition ? a : b\n"
|
||||||
|
" b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection;\n"
|
||||||
|
" b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection;\n"
|
||||||
|
" b3Scalar t_min_final = 0.0f;\n"
|
||||||
|
" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
|
||||||
|
" \n"
|
||||||
|
" //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n"
|
||||||
|
" //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n"
|
||||||
|
" //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN\n"
|
||||||
|
" t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n"
|
||||||
|
" t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n"
|
||||||
|
" \n"
|
||||||
|
" return (t_min_final <= t_max_final);\n"
|
||||||
|
"}\n"
|
||||||
|
"**/\n"
|
||||||
|
"void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection,\n"
|
||||||
|
" b3Scalar planeMin, b3Scalar planeMax, \n"
|
||||||
|
" b3Scalar* out_t_min, b3Scalar* out_t_max)\n"
|
||||||
|
"{\n"
|
||||||
|
" if(rayNormalizedDirection < 0.0f)\n"
|
||||||
|
" {\n"
|
||||||
|
" //max is closer, min is farther\n"
|
||||||
|
" *out_t_min = (planeMax - rayStart) / rayNormalizedDirection;\n"
|
||||||
|
" *out_t_max = (planeMin - rayStart) / rayNormalizedDirection;\n"
|
||||||
|
" }\n"
|
||||||
|
" else\n"
|
||||||
|
" {\n"
|
||||||
|
" //min is closer, max is farther\n"
|
||||||
|
" *out_t_min = (planeMin - rayStart) / rayNormalizedDirection;\n"
|
||||||
|
" *out_t_max = (planeMax - rayStart) / rayNormalizedDirection;\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n"
|
||||||
|
"{\n"
|
||||||
|
" b3Scalar t_min_x, t_min_y, t_min_z;\n"
|
||||||
|
" b3Scalar t_max_x, t_max_y, t_max_z;\n"
|
||||||
|
" \n"
|
||||||
|
" rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x);\n"
|
||||||
|
" rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y);\n"
|
||||||
|
" rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z);\n"
|
||||||
|
" \n"
|
||||||
|
" b3Scalar t_min_final = 0.0f;\n"
|
||||||
|
" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
|
||||||
|
" \n"
|
||||||
|
" t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) );\n"
|
||||||
|
" t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) );\n"
|
||||||
|
" \n"
|
||||||
|
" return (t_min_final <= t_max_final);\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n"
|
||||||
|
" __global int2* internalNodeChildIndices, \n"
|
||||||
|
" __global b3AabbCL* internalNodeAabbs,\n"
|
||||||
|
" __global int2* internalNodeLeafIndexRanges,\n"
|
||||||
|
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||||
|
" \n"
|
||||||
|
" __global b3RayInfo* rays,\n"
|
||||||
|
" \n"
|
||||||
|
" __global int* out_numRayRigidPairs, \n"
|
||||||
|
" __global int2* out_rayRigidPairs,\n"
|
||||||
|
" int maxRayRigidPairs, int numRays)\n"
|
||||||
|
"{\n"
|
||||||
|
" int rayIndex = get_global_id(0);\n"
|
||||||
|
" if(rayIndex >= numRays) return;\n"
|
||||||
|
" \n"
|
||||||
|
" b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
|
||||||
|
" b3Vector3 rayTo = rays[rayIndex].m_to;\n"
|
||||||
|
" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from);\n"
|
||||||
|
" \n"
|
||||||
|
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
|
||||||
|
" \n"
|
||||||
|
" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n"
|
||||||
|
" int stackSize = 2;\n"
|
||||||
|
" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n"
|
||||||
|
" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n"
|
||||||
|
" \n"
|
||||||
|
" while(stackSize)\n"
|
||||||
|
" {\n"
|
||||||
|
" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
|
||||||
|
" --stackSize;\n"
|
||||||
|
" \n"
|
||||||
|
" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
|
||||||
|
" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
|
||||||
|
" \n"
|
||||||
|
" //bvhRigidIndex is not used if internal node\n"
|
||||||
|
" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
|
||||||
|
" \n"
|
||||||
|
" if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) )\n"
|
||||||
|
" {\n"
|
||||||
|
" if(isLeaf)\n"
|
||||||
|
" {\n"
|
||||||
|
" int2 rayRigidPair;\n"
|
||||||
|
" rayRigidPair.x = rayIndex;\n"
|
||||||
|
" rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
|
||||||
|
" \n"
|
||||||
|
" int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
|
||||||
|
" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" if(!isLeaf) //Internal node\n"
|
||||||
|
" {\n"
|
||||||
|
" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
|
||||||
|
" {\n"
|
||||||
|
" //Error\n"
|
||||||
|
" }\n"
|
||||||
|
" else\n"
|
||||||
|
" {\n"
|
||||||
|
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
|
||||||
|
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -8,6 +8,11 @@
|
|||||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||||
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
|
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
|
||||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||||
|
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
|
||||||
|
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||||
|
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
|
||||||
|
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
|
||||||
|
|
||||||
#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
|
#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -20,7 +25,24 @@ struct b3GpuRaycastInternalData
|
|||||||
cl_context m_context;
|
cl_context m_context;
|
||||||
cl_device_id m_device;
|
cl_device_id m_device;
|
||||||
cl_command_queue m_q;
|
cl_command_queue m_q;
|
||||||
cl_kernel m_raytraceKernel;
|
cl_kernel m_raytraceKernel;
|
||||||
|
cl_kernel m_raytracePairsKernel;
|
||||||
|
cl_kernel m_findRayRigidPairIndexRanges;
|
||||||
|
|
||||||
|
b3GpuParallelLinearBvh* m_plbvh;
|
||||||
|
b3RadixSort32CL* m_radixSorter;
|
||||||
|
b3FillCL* m_fill;
|
||||||
|
|
||||||
|
//1 element per ray
|
||||||
|
b3OpenCLArray<b3RayInfo>* m_gpuRays;
|
||||||
|
b3OpenCLArray<b3RayHit>* m_gpuHitResults;
|
||||||
|
b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
|
||||||
|
b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
|
||||||
|
|
||||||
|
//1 element per (ray index, rigid index) pair
|
||||||
|
b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
|
||||||
|
b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs;
|
||||||
|
|
||||||
int m_test;
|
int m_test;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -31,7 +53,19 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
|||||||
m_data->m_device = device;
|
m_data->m_device = device;
|
||||||
m_data->m_q = q;
|
m_data->m_q = q;
|
||||||
m_data->m_raytraceKernel = 0;
|
m_data->m_raytraceKernel = 0;
|
||||||
|
m_data->m_raytracePairsKernel = 0;
|
||||||
|
m_data->m_findRayRigidPairIndexRanges = 0;
|
||||||
|
|
||||||
|
m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
|
||||||
|
m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
|
||||||
|
m_data->m_fill = new b3FillCL(ctx, device, q);
|
||||||
|
|
||||||
|
m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
|
||||||
|
m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
|
||||||
|
m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
|
||||||
|
m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
|
||||||
|
m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
|
||||||
|
m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
|
||||||
|
|
||||||
{
|
{
|
||||||
cl_int errNum=0;
|
cl_int errNum=0;
|
||||||
@@ -39,6 +73,10 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
|||||||
b3Assert(errNum==CL_SUCCESS);
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
|
m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
|
||||||
b3Assert(errNum==CL_SUCCESS);
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
|
m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog);
|
||||||
|
b3Assert(errNum==CL_SUCCESS);
|
||||||
clReleaseProgram(prog);
|
clReleaseProgram(prog);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,6 +86,20 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
|
|||||||
b3GpuRaycast::~b3GpuRaycast()
|
b3GpuRaycast::~b3GpuRaycast()
|
||||||
{
|
{
|
||||||
clReleaseKernel(m_data->m_raytraceKernel);
|
clReleaseKernel(m_data->m_raytraceKernel);
|
||||||
|
clReleaseKernel(m_data->m_raytracePairsKernel);
|
||||||
|
clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
|
||||||
|
|
||||||
|
delete m_data->m_plbvh;
|
||||||
|
delete m_data->m_radixSorter;
|
||||||
|
delete m_data->m_fill;
|
||||||
|
|
||||||
|
delete m_data->m_gpuRays;
|
||||||
|
delete m_data->m_gpuHitResults;
|
||||||
|
delete m_data->m_firstRayRigidPairIndexPerRay;
|
||||||
|
delete m_data->m_numRayRigidPairsPerRay;
|
||||||
|
delete m_data->m_gpuNumRayRigidPairs;
|
||||||
|
delete m_data->m_gpuRayRigidPairs;
|
||||||
|
|
||||||
delete m_data;
|
delete m_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -206,27 +258,32 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
|
|||||||
}
|
}
|
||||||
///todo: add some acceleration structure (AABBs, tree etc)
|
///todo: add some acceleration structure (AABBs, tree etc)
|
||||||
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
|
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
|
||||||
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
|
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
|
||||||
|
const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
|
||||||
{
|
{
|
||||||
|
|
||||||
//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
|
//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
|
||||||
|
|
||||||
B3_PROFILE("castRaysGPU");
|
B3_PROFILE("castRaysGPU");
|
||||||
|
|
||||||
b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
|
|
||||||
b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
B3_PROFILE("raycast copyFromHost");
|
B3_PROFILE("raycast copyFromHost");
|
||||||
gpuRays.copyFromHost(rays);
|
m_data->m_gpuRays->copyFromHost(rays);
|
||||||
|
m_data->m_gpuHitResults->copyFromHost(hitResults);
|
||||||
|
|
||||||
gpuHitResults.resize(hitResults.size());
|
|
||||||
gpuHitResults.copyFromHost(hitResults);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int numRays = hitResults.size();
|
||||||
|
{
|
||||||
|
m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
|
||||||
|
m_data->m_numRayRigidPairsPerRay->resize(numRays);
|
||||||
|
|
||||||
|
m_data->m_gpuNumRayRigidPairs->resize(1);
|
||||||
|
m_data->m_gpuRayRigidPairs->resize(numRays * 16);
|
||||||
|
}
|
||||||
|
|
||||||
//run kernel
|
//run kernel
|
||||||
|
const bool USE_BRUTE_FORCE_RAYCAST = false;
|
||||||
|
if(USE_BRUTE_FORCE_RAYCAST)
|
||||||
{
|
{
|
||||||
B3_PROFILE("raycast launch1D");
|
B3_PROFILE("raycast launch1D");
|
||||||
|
|
||||||
@@ -234,8 +291,8 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
|
|||||||
int numRays = rays.size();
|
int numRays = rays.size();
|
||||||
launcher.setConst(numRays);
|
launcher.setConst(numRays);
|
||||||
|
|
||||||
launcher.setBuffer(gpuRays.getBufferCL());
|
launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
|
||||||
launcher.setBuffer(gpuHitResults.getBufferCL());
|
launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
|
||||||
|
|
||||||
launcher.setConst(numBodies);
|
launcher.setConst(numBodies);
|
||||||
launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
|
launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
|
||||||
@@ -246,11 +303,90 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
|
|||||||
launcher.launch1D(numRays);
|
launcher.launch1D(numRays);
|
||||||
clFinish(m_data->m_q);
|
clFinish(m_data->m_q);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//printf("broadphase->getAllAabbsGPU().size(): %d \n", broadphase->getAllAabbsGPU().size());
|
||||||
|
m_data->m_plbvh->build( broadphase->getAllAabbsGPU() );
|
||||||
|
|
||||||
|
m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
|
||||||
|
|
||||||
|
int numRayRigidPairs = -1;
|
||||||
|
m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
|
||||||
|
if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
|
||||||
|
{
|
||||||
|
numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
|
||||||
|
m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
|
||||||
|
|
||||||
|
//Sort ray-rigid pairs by ray index
|
||||||
|
{
|
||||||
|
B3_PROFILE("sort ray-rigid pairs");
|
||||||
|
m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
|
||||||
|
}
|
||||||
|
|
||||||
|
//detect start,count of each ray pair
|
||||||
|
{
|
||||||
|
B3_PROFILE("detect ray-rigid pair index ranges");
|
||||||
|
|
||||||
|
{
|
||||||
|
B3_PROFILE("reset ray-rigid pair index ranges");
|
||||||
|
|
||||||
|
m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
|
||||||
|
m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
|
||||||
|
clFinish(m_data->m_q);
|
||||||
|
}
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(numRayRigidPairs);
|
||||||
|
|
||||||
|
launcher.launch1D(numRayRigidPairs);
|
||||||
|
clFinish(m_data->m_q);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
B3_PROFILE("ray-rigid intersection");
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
|
||||||
|
b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(numRays);
|
||||||
|
|
||||||
|
launcher.launch1D(numRays);
|
||||||
|
clFinish(m_data->m_q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//copy results
|
//copy results
|
||||||
{
|
{
|
||||||
B3_PROFILE("raycast copyToHost");
|
B3_PROFILE("raycast copyToHost");
|
||||||
gpuHitResults.copyToHost(hitResults);
|
m_data->m_gpuHitResults->copyToHost(hitResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -23,8 +23,7 @@ public:
|
|||||||
|
|
||||||
void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
|
void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
|
||||||
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
|
int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
|
||||||
const struct b3GpuNarrowPhaseInternalData* narrowphaseData
|
const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase);
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -337,3 +337,103 @@ __kernel void rayCastKernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs,
|
||||||
|
__global int* out_firstRayRigidPairIndexPerRay,
|
||||||
|
__global int* out_numRayRigidPairsPerRay,
|
||||||
|
int numRayRigidPairs)
|
||||||
|
{
|
||||||
|
int rayRigidPairIndex = get_global_id(0);
|
||||||
|
if (rayRigidPairIndex >= numRayRigidPairs) return;
|
||||||
|
|
||||||
|
int rayIndex = rayRigidPairs[rayRigidPairIndex].x;
|
||||||
|
|
||||||
|
atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);
|
||||||
|
atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void rayCastPairsKernel(const __global b3RayInfo* rays,
|
||||||
|
__global b3RayHit* hitResults,
|
||||||
|
__global int* firstRayRigidPairIndexPerRay,
|
||||||
|
__global int* numRayRigidPairsPerRay,
|
||||||
|
|
||||||
|
__global Body* bodies,
|
||||||
|
__global Collidable* collidables,
|
||||||
|
__global const b3GpuFace* faces,
|
||||||
|
__global const ConvexPolyhedronCL* convexShapes,
|
||||||
|
|
||||||
|
__global int2* rayRigidPairs,
|
||||||
|
int numRays)
|
||||||
|
{
|
||||||
|
int i = get_global_id(0);
|
||||||
|
if (i >= numRays) return;
|
||||||
|
|
||||||
|
float4 rayFrom = rays[i].m_from;
|
||||||
|
float4 rayTo = rays[i].m_to;
|
||||||
|
|
||||||
|
hitResults[i].m_hitFraction = 1.f;
|
||||||
|
|
||||||
|
float hitFraction = 1.f;
|
||||||
|
float4 hitPoint;
|
||||||
|
float4 hitNormal;
|
||||||
|
int hitBodyIndex = -1;
|
||||||
|
|
||||||
|
//
|
||||||
|
for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)
|
||||||
|
{
|
||||||
|
int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];
|
||||||
|
int b = rayRigidPairs[rayRigidPairIndex].y;
|
||||||
|
|
||||||
|
if (hitResults[i].m_hitResult2 == b) continue;
|
||||||
|
|
||||||
|
Body body = bodies[b];
|
||||||
|
Collidable rigidCollidable = collidables[body.m_collidableIdx];
|
||||||
|
|
||||||
|
float4 pos = body.m_pos;
|
||||||
|
float4 orn = body.m_quat;
|
||||||
|
|
||||||
|
if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)
|
||||||
|
{
|
||||||
|
float4 invPos = (float4)(0,0,0,0);
|
||||||
|
float4 invOrn = (float4)(0,0,0,0);
|
||||||
|
float4 rayFromLocal = (float4)(0,0,0,0);
|
||||||
|
float4 rayToLocal = (float4)(0,0,0,0);
|
||||||
|
invOrn = qtInvert(orn);
|
||||||
|
invPos = qtRotate(invOrn, -pos);
|
||||||
|
rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;
|
||||||
|
rayToLocal = qtRotate( invOrn, rayTo) + invPos;
|
||||||
|
rayFromLocal.w = 0.f;
|
||||||
|
rayToLocal.w = 0.f;
|
||||||
|
int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;
|
||||||
|
int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;
|
||||||
|
|
||||||
|
if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))
|
||||||
|
{
|
||||||
|
hitBodyIndex = b;
|
||||||
|
hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rigidCollidable.m_shapeType == SHAPE_SPHERE)
|
||||||
|
{
|
||||||
|
float radius = rigidCollidable.m_radius;
|
||||||
|
|
||||||
|
if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))
|
||||||
|
{
|
||||||
|
hitBodyIndex = b;
|
||||||
|
hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);
|
||||||
|
hitNormal = (float4) (hitPoint - bodies[b].m_pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hitBodyIndex >= 0)
|
||||||
|
{
|
||||||
|
hitResults[i].m_hitFraction = hitFraction;
|
||||||
|
hitResults[i].m_hitPoint = hitPoint;
|
||||||
|
hitResults[i].m_hitNormal = normalize(hitNormal);
|
||||||
|
hitResults[i].m_hitResult0 = hitBodyIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -281,4 +281,101 @@ static const char* rayCastKernelCL= \
|
|||||||
" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
|
" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n"
|
||||||
|
" __global int* out_firstRayRigidPairIndexPerRay,\n"
|
||||||
|
" __global int* out_numRayRigidPairsPerRay,\n"
|
||||||
|
" int numRayRigidPairs)\n"
|
||||||
|
"{\n"
|
||||||
|
" int rayRigidPairIndex = get_global_id(0);\n"
|
||||||
|
" if (rayRigidPairIndex >= numRayRigidPairs) return;\n"
|
||||||
|
" \n"
|
||||||
|
" int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n"
|
||||||
|
" \n"
|
||||||
|
" atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n"
|
||||||
|
" atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n"
|
||||||
|
" __global b3RayHit* hitResults, \n"
|
||||||
|
" __global int* firstRayRigidPairIndexPerRay,\n"
|
||||||
|
" __global int* numRayRigidPairsPerRay,\n"
|
||||||
|
" \n"
|
||||||
|
" __global Body* bodies,\n"
|
||||||
|
" __global Collidable* collidables,\n"
|
||||||
|
" __global const b3GpuFace* faces,\n"
|
||||||
|
" __global const ConvexPolyhedronCL* convexShapes,\n"
|
||||||
|
" \n"
|
||||||
|
" __global int2* rayRigidPairs,\n"
|
||||||
|
" int numRays)\n"
|
||||||
|
"{\n"
|
||||||
|
" int i = get_global_id(0);\n"
|
||||||
|
" if (i >= numRays) return;\n"
|
||||||
|
" \n"
|
||||||
|
" float4 rayFrom = rays[i].m_from;\n"
|
||||||
|
" float4 rayTo = rays[i].m_to;\n"
|
||||||
|
" \n"
|
||||||
|
" hitResults[i].m_hitFraction = 1.f;\n"
|
||||||
|
" \n"
|
||||||
|
" float hitFraction = 1.f;\n"
|
||||||
|
" float4 hitPoint;\n"
|
||||||
|
" float4 hitNormal;\n"
|
||||||
|
" int hitBodyIndex = -1;\n"
|
||||||
|
" \n"
|
||||||
|
" //\n"
|
||||||
|
" for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n"
|
||||||
|
" {\n"
|
||||||
|
" int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n"
|
||||||
|
" int b = rayRigidPairs[rayRigidPairIndex].y;\n"
|
||||||
|
" \n"
|
||||||
|
" if (hitResults[i].m_hitResult2 == b) continue;\n"
|
||||||
|
" \n"
|
||||||
|
" Body body = bodies[b];\n"
|
||||||
|
" Collidable rigidCollidable = collidables[body.m_collidableIdx];\n"
|
||||||
|
" \n"
|
||||||
|
" float4 pos = body.m_pos;\n"
|
||||||
|
" float4 orn = body.m_quat;\n"
|
||||||
|
" \n"
|
||||||
|
" if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
|
||||||
|
" {\n"
|
||||||
|
" float4 invPos = (float4)(0,0,0,0);\n"
|
||||||
|
" float4 invOrn = (float4)(0,0,0,0);\n"
|
||||||
|
" float4 rayFromLocal = (float4)(0,0,0,0);\n"
|
||||||
|
" float4 rayToLocal = (float4)(0,0,0,0);\n"
|
||||||
|
" invOrn = qtInvert(orn);\n"
|
||||||
|
" invPos = qtRotate(invOrn, -pos);\n"
|
||||||
|
" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n"
|
||||||
|
" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n"
|
||||||
|
" rayFromLocal.w = 0.f;\n"
|
||||||
|
" rayToLocal.w = 0.f;\n"
|
||||||
|
" int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n"
|
||||||
|
" int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n"
|
||||||
|
" \n"
|
||||||
|
" if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
|
||||||
|
" {\n"
|
||||||
|
" hitBodyIndex = b;\n"
|
||||||
|
" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n"
|
||||||
|
" {\n"
|
||||||
|
" float radius = rigidCollidable.m_radius;\n"
|
||||||
|
" \n"
|
||||||
|
" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
|
||||||
|
" {\n"
|
||||||
|
" hitBodyIndex = b;\n"
|
||||||
|
" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
|
||||||
|
" hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" if (hitBodyIndex >= 0)\n"
|
||||||
|
" {\n"
|
||||||
|
" hitResults[i].m_hitFraction = hitFraction;\n"
|
||||||
|
" hitResults[i].m_hitPoint = hitPoint;\n"
|
||||||
|
" hitResults[i].m_hitNormal = normalize(hitNormal);\n"
|
||||||
|
" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
"}\n"
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -703,6 +703,6 @@ void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& ray
|
|||||||
{
|
{
|
||||||
this->m_data->m_raycaster->castRays(rays,hitResults,
|
this->m_data->m_raycaster->castRays(rays,hitResults,
|
||||||
getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),
|
getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),
|
||||||
m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getInternalData()
|
m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),
|
||||||
);
|
m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user