add preliminary GPU ray cast (brute-force slow, only against sphere, allocates OpenCL buffers at every cast)

2013-05-28 18:39:53 -07:00
parent f55473d586
commit a886a978b7
7 changed files with 458 additions and 117 deletions
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
@@ -0,0 +1,151 @@
+
+#include "b3GpuRaycast.h"
+#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
+#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
+#include "Bullet3Common/b3Quickprof.h"
+#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
+#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
+
+
+
+#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
+
+
+
+struct b3GpuRaycastInternalData
+{
+	cl_context m_context;
+	cl_device_id m_device;
+	cl_command_queue  m_q;
+	cl_kernel	m_raytraceKernel;
+	int m_test;
+};
+
+b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue  q)
+{
+	m_data = new b3GpuRaycastInternalData;
+	m_data->m_context = ctx;
+	m_data->m_device = device;
+	m_data->m_q = q;
+	m_data->m_raytraceKernel = 0;
+
+	char* rayCastKernelCL= 0;
+
+	{
+		cl_int errNum=0;
+		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH);
+		b3Assert(errNum==CL_SUCCESS);
+		m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
+		b3Assert(errNum==CL_SUCCESS);
+		clReleaseProgram(prog);
+	}
+
+
+}
+
+b3GpuRaycast::~b3GpuRaycast()
+{
+	clReleaseKernel(m_data->m_raytraceKernel);
+	delete m_data;
+}
+
+bool sphere_intersect(const b3Vector3& spherePos,  b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo)
+{
+    // rs = ray.org - sphere.center
+    const b3Vector3& rs = rayFrom - spherePos;
+	b3Vector3 rayDir = rayTo-rayFrom;//rayFrom-rayTo;
+	rayDir.normalize();
+
+    float B = b3Dot(rs, rayDir);
+    float C = b3Dot(rs, rs) - (radius * radius);
+    float D = B * B - C;
+
+    if (D > 0.0)
+    {
+        float t = -B - sqrt(D);
+        if ( (t > 0.0))// && (t < isect.t) )
+        {
+            return true;//isect.t = t;
+		}
+	}
+	return false;
+}
+
+
+void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
+{
+
+//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
+
+	B3_PROFILE("castRaysHost");
+
+	for (int r=0;r<rays.size();r++)
+	{
+		b3Vector3 rayFrom = rays[r].m_from;
+		b3Vector3 rayTo = rays[r].m_to;
+
+		//if there is a hit, color the pixels
+		bool hits  = false;
+
+		for (int b=0;b<numBodies && !hits;b++)
+		{
+				
+			const b3Vector3& pos = bodies[b].m_pos;
+			const b3Quaternion& orn = bodies[b].m_quat;
+			
+			b3Scalar radius = 1;
+
+			if (sphere_intersect(pos,  radius, rayFrom, rayTo))
+				hits = true;
+		}
+		if (hits)
+			hitResults[r].m_hitFraction = 0.f;
+
+	}
+}
+
+void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
+{
+	B3_PROFILE("castRaysGPU");
+
+	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
+	gpuRays.copyFromHost(rays);
+
+	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
+	gpuHitResults.resize(hitResults.size());
+
+	b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
+	gpuBodies.resize(numBodies);
+	gpuBodies.copyFromHostPointer(bodies,numBodies);
+
+	b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
+	gpuCollidables.resize(numCollidables);
+	gpuCollidables.copyFromHostPointer(collidables,numCollidables);
+
+
+	//run kernel
+	{
+		B3_PROFILE("raycast launch1D");
+
+		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
+		int numRays = rays.size();
+		launcher.setConst(numRays);
+
+		launcher.setBuffer(gpuRays.getBufferCL());
+		launcher.setBuffer(gpuHitResults.getBufferCL());
+
+		launcher.setConst(numBodies);
+		launcher.setBuffer(gpuBodies.getBufferCL());
+		launcher.setBuffer(gpuCollidables.getBufferCL());
+
+		launcher.launch1D(numRays);
+		clFinish(m_data->m_q);
+	}
+
+	//copy results
+	gpuHitResults.copyToHost(hitResults);
+
+}
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h
@@ -0,0 +1,68 @@
+#ifndef B3_GPU_RAYCAST_H
+#define B3_GPU_RAYCAST_H
+
+#include "Bullet3Common/b3Vector3.h"
+#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
+
+#include "Bullet3Common/b3AlignedObjectArray.h"
+
+struct b3RayInfo
+{
+	b3Vector3 m_from;
+	b3Vector3 m_to;
+};
+
+struct b3RayHit
+{
+		b3Scalar	m_hitFraction;
+		int	m_hitResult0;
+		int	m_hitResult1;
+		int	m_hitResult2;
+		b3Vector3 m_hitPoint;
+		b3Vector3 m_hitNormal;
+};
+
+class b3GpuRaycast
+{
+protected:
+	struct b3GpuRaycastInternalData* m_data;
+public:
+	b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue  q);
+	virtual ~b3GpuRaycast();
+
+	void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn,	b3AlignedObjectArray<b3RayHit>& hitResults,
+		int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
+
+	void castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
+	
+/*			const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
+			b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
+			int maxContactCapacity,
+			const b3OpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
+			const b3OpenCLArray<b3Vector3>& vertices,
+			const b3OpenCLArray<b3Vector3>& uniqueEdges,
+			const b3OpenCLArray<b3GpuFace>& faces,
+			const b3OpenCLArray<int>& indices,
+			const b3OpenCLArray<b3Collidable>& gpuCollidables,
+			const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
+
+			const b3OpenCLArray<b3YetAnotherAabb>& clAabbs,
+           b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
+           b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
+           b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
+           b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
+           b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
+		   b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
+		   b3OpenCLArray<b3QuantizedBvhNode>*	treeNodesGPU,
+			b3OpenCLArray<b3BvhSubtreeInfo>*	subTreesGPU,
+			b3OpenCLArray<b3BvhInfo>*	bvhInfo,
+			int numObjects,
+			int maxTriConvexPairCapacity,
+			b3OpenCLArray<b3Int4>& triangleConvexPairs,
+			int& numTriConvexPairsOut
+			*/
+		
+};
+
+#endif //B3_GPU_RAYCAST_H
--- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl
+++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl
@@ -0,0 +1,96 @@
+
+typedef struct
+{
+	float4 m_from;
+	float4 m_to;
+} b3RayInfo;
+
+typedef struct
+{
+		float m_hitFraction;
+		int	m_hitResult0;
+		int	m_hitResult1;
+		int	m_hitResult2;
+		float4	m_hitPoint;
+		float4	m_hitNormal;
+} b3RayHit;
+
+typedef struct
+{
+	float4 m_pos;
+	float4 m_quat;
+	float4 m_linVel;
+	float4 m_angVel;
+
+	unsigned int m_collidableIdx;
+	float m_invMass;
+	float m_restituitionCoeff;
+	float m_frictionCoeff;
+} Body;
+
+typedef struct Collidable
+{
+	int m_unused1;
+	int m_unused2;
+	int m_shapeType;
+	int m_shapeIndex;
+} Collidable;
+
+bool sphere_intersect(float4 spherePos,  float radius, float4 rayFrom, float4 rayTo)
+{
+    // rs = ray.org - sphere.center
+  float4 rs = rayFrom - spherePos;
+  rs.w = 0.f;
+	float4 rayDir = (rayTo-rayFrom);
+	rayDir.w = 0.f;
+	rayDir = normalize(rayDir);
+
+  float B = dot(rs, rayDir);
+  float C = dot(rs, rs) - (radius * radius);
+  float D = B * B - C;
+
+    if (D > 0.0)
+    {
+        float t = -B - sqrt(D);
+        if ( (t > 0.0))// && (t < isect.t) )
+        {
+            return true;//isect.t = t;
+		}
+	}
+	return false;
+}
+
+__kernel void rayCastKernel(  
+	int numRays, 
+	const __global b3RayInfo* rays, 
+	__global b3RayHit* hits, 
+	const int numBodies, 
+	__global Body* bodies,
+	__global Collidable* collidables)
+{
+
+	bool hit=false;
+
+	int i = get_global_id(0);
+	if (i<numRays)
+	{
+		hits[i].m_hitFraction = 1.f;
+
+		float4 rayFrom = rays[i].m_from;
+		float4 rayTo = rays[i].m_to;
+		
+		for (int b=0;b<numBodies;b++)
+		{
+					
+				float4 pos = bodies[b].m_pos;
+	//		float4 orn = bodies[b].m_quat;
+				
+				float radius = 1.f;
+	
+				if (sphere_intersect(pos,  radius, rayFrom, rayTo))
+					hit = true;
+		}
+		if (hit)
+					hits[i].m_hitFraction = 0.f;
+	}
+}
--- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp
@@ -678,6 +678,10 @@ cl_mem	b3GpuNarrowPhase::getBodiesGpu()
 	return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL();
 }

+const struct b3RigidBodyCL* b3GpuNarrowPhase::getBodiesCpu() const
+{
+	return &m_data->m_bodyBufferCPU->at(0);
+};

 int	b3GpuNarrowPhase::getNumBodiesGpu() const
 {
@@ -710,6 +714,12 @@ cl_mem b3GpuNarrowPhase::getCollidablesGpu()
 	return m_data->m_collidablesGPU->getBufferCL();
 }

+const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const
+{
+	return &m_data->m_collidablesCPU[0];
+}
+
+
 cl_mem	b3GpuNarrowPhase::getAabbBufferGpu()
 {
 	return m_data->m_localShapeAABBGPU->getBufferCL();
--- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h
@@ -56,12 +56,15 @@ public:
 	

 	cl_mem	getBodiesGpu();
+	const struct b3RigidBodyCL* getBodiesCpu() const;
+
 	int	getNumBodiesGpu() const;

 	cl_mem	getBodyInertiasGpu();
 	int	getNumBodyInertiasGpu() const;

 	cl_mem	getCollidablesGpu();
+	const struct b3Collidable* getCollidablesCpu() const;
 	int		getNumCollidablesGpu() const;