Accelerate GPU raycaster with PLBVH.

2014-02-23 20:40:58 -08:00
parent e955192971
commit e4fbd5332d
10 changed files with 732 additions and 33 deletions
--- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
@@ -18,6 +18,7 @@ subject to the following restrictions:
 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
 #include "Bullet3Common/shared/b3Int2.h"
 #include "Bullet3Common/shared/b3Int4.h"
+#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"

 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
@@ -59,6 +60,7 @@ class b3GpuParallelLinearBvh
 	cl_kernel m_determineInternalNodeAabbsKernel;
 	
 	cl_kernel m_plbvhCalculateOverlappingPairsKernel;
+	cl_kernel m_plbvhRayTraverseKernel;

 	b3FillCL m_fill;
 	b3RadixSort32CL m_radixSorter;
@@ -79,6 +81,7 @@ class b3GpuParallelLinearBvh
 	b3OpenCLArray<int> m_leafNodeParentNodes;
 	b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies;		//m_key = morton code, m_value == aabb index
 	b3OpenCLArray<b3SapAabb> m_mergedAabb;
+	b3OpenCLArray<b3SapAabb> m_leafNodeAabbs;
 	
 public:
 	b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
@@ -94,7 +97,8 @@ public:
 		m_internalNodeParentNodes(context, queue),
 		m_leafNodeParentNodes(context, queue),
 		m_mortonCodesAndAabbIndicies(context, queue),
-		m_mergedAabb(context, queue)
+		m_mergedAabb(context, queue),
+		m_leafNodeAabbs(context, queue)
 	{
 		const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
 		
@@ -115,6 +119,8 @@ public:
 		
 		m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
 		b3Assert(m_plbvhCalculateOverlappingPairsKernel);
+		m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
+		b3Assert(m_plbvhRayTraverseKernel);
 	}
 	
 	virtual ~b3GpuParallelLinearBvh() 
@@ -125,6 +131,7 @@ public:
 		clReleaseKernel(m_determineInternalNodeAabbsKernel);
 		
 		clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
+		clReleaseKernel(m_plbvhRayTraverseKernel);
 		
 		clReleaseProgram(m_parallelLinearBvhProgram);
 	}
@@ -148,8 +155,12 @@ public:
 			m_leafNodeParentNodes.resize(numLeaves);
 			m_mortonCodesAndAabbIndicies.resize(numLeaves);
 			m_mergedAabb.resize(numLeaves);
+			m_leafNodeAabbs.resize(numLeaves);
 		}
 		
+		//
+		m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs);
+		
 		//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
 		//The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes)
 		int numLevels = 0;
@@ -168,7 +179,7 @@ public:
 			if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
 		}
 		
-		//Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU
+		//Determine number of internal nodes per level, use prefix sum to get offsets of each level, and send to GPU
 		{
 			B3_PROFILE("Determine number of nodes per level");
 			
@@ -329,7 +340,7 @@ public:
 		}
 		
 		//For each internal node, check children to get its AABB; start from the 
-		//last level and move towards the root
+		//last level, which contains the leaves, and move towards the root
 		{
 			B3_PROFILE("Set AABBs");
 		
@@ -416,10 +427,12 @@ public:
 		}
 	}
 	
-	//Max number of pairs is out_overlappingPairs.size()
-	//If the number of overlapping pairs is < out_overlappingPairs.size(), the array is resized
-	void calculateOverlappingPairs(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, 
-									b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
+	///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses
+	///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
+	///@param out_numPairs If number of pairs exceeds the max number of pairs, this is clamped to the max number.
+	///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
+	///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
+	void calculateOverlappingPairs(b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
 	{
 		b3Assert( out_numPairs.size() == 1 );
 		
@@ -431,11 +444,11 @@ public:
 		{
 			B3_PROFILE("PLBVH calculateOverlappingPairs");
 		
-			int numQueryAabbs = worldSpaceAabbs.size();
+			int numQueryAabbs = m_leafNodeAabbs.size();
 			
 			b3BufferInfoCL bufferInfo[] = 
 			{
-				b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
+				b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
 				
 				b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
 				b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
@@ -468,6 +481,53 @@ public:
 		
 		out_overlappingPairs.resize(numPairs);
 	}
+	
+	///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
+	///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
+	///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
+	///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
+	void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, 
+								b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs)
+	{
+		B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()");
+		
+		int numRays = rays.size();
+		int maxRayRigidPairs = out_rayRigidPairs.size();
+		
+		int reset = 0;
+		out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
+		
+		b3BufferInfoCL bufferInfo[] = 
+		{
+			b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
+			b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
+			b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
+			b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ),
+			b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
+			
+			b3BufferInfoCL( rays.getBufferCL() ),
+			
+			b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ),
+			b3BufferInfoCL( out_rayRigidPairs.getBufferCL() )
+		};
+		
+		b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel");
+		launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
+		launcher.setConst(maxRayRigidPairs);
+		launcher.setConst(numRays);
+		
+		launcher.launch1D(numRays);
+		clFinish(m_queue);
+		
+		
+		//
+		int numRayRigidPairs = -1;
+		out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1);
+		
+		if(numRayRigidPairs > maxRayRigidPairs)
+			b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs);
+		
+	}
 };

 #endif