add preliminary GPU ray cast (brute-force slow, only against sphere, allocates OpenCL buffers at every cast)

This commit is contained in:
erwin coumans
2013-05-28 18:39:53 -07:00
parent f55473d586
commit a886a978b7
7 changed files with 458 additions and 117 deletions

View File

@@ -0,0 +1,151 @@
#include "b3GpuRaycast.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
struct b3GpuRaycastInternalData
{
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_q;
cl_kernel m_raytraceKernel;
int m_test;
};
b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q)
{
m_data = new b3GpuRaycastInternalData;
m_data->m_context = ctx;
m_data->m_device = device;
m_data->m_q = q;
m_data->m_raytraceKernel = 0;
char* rayCastKernelCL= 0;
{
cl_int errNum=0;
cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH);
b3Assert(errNum==CL_SUCCESS);
m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
b3Assert(errNum==CL_SUCCESS);
clReleaseProgram(prog);
}
}
b3GpuRaycast::~b3GpuRaycast()
{
clReleaseKernel(m_data->m_raytraceKernel);
delete m_data;
}
bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo)
{
// rs = ray.org - sphere.center
const b3Vector3& rs = rayFrom - spherePos;
b3Vector3 rayDir = rayTo-rayFrom;//rayFrom-rayTo;
rayDir.normalize();
float B = b3Dot(rs, rayDir);
float C = b3Dot(rs, rs) - (radius * radius);
float D = B * B - C;
if (D > 0.0)
{
float t = -B - sqrt(D);
if ( (t > 0.0))// && (t < isect.t) )
{
return true;//isect.t = t;
}
}
return false;
}
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
{
// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
B3_PROFILE("castRaysHost");
for (int r=0;r<rays.size();r++)
{
b3Vector3 rayFrom = rays[r].m_from;
b3Vector3 rayTo = rays[r].m_to;
//if there is a hit, color the pixels
bool hits = false;
for (int b=0;b<numBodies && !hits;b++)
{
const b3Vector3& pos = bodies[b].m_pos;
const b3Quaternion& orn = bodies[b].m_quat;
b3Scalar radius = 1;
if (sphere_intersect(pos, radius, rayFrom, rayTo))
hits = true;
}
if (hits)
hitResults[r].m_hitFraction = 0.f;
}
}
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
{
B3_PROFILE("castRaysGPU");
b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
gpuRays.copyFromHost(rays);
b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
gpuHitResults.resize(hitResults.size());
b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
gpuBodies.resize(numBodies);
gpuBodies.copyFromHostPointer(bodies,numBodies);
b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
gpuCollidables.resize(numCollidables);
gpuCollidables.copyFromHostPointer(collidables,numCollidables);
//run kernel
{
B3_PROFILE("raycast launch1D");
b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
int numRays = rays.size();
launcher.setConst(numRays);
launcher.setBuffer(gpuRays.getBufferCL());
launcher.setBuffer(gpuHitResults.getBufferCL());
launcher.setConst(numBodies);
launcher.setBuffer(gpuBodies.getBufferCL());
launcher.setBuffer(gpuCollidables.getBufferCL());
launcher.launch1D(numRays);
clFinish(m_data->m_q);
}
//copy results
gpuHitResults.copyToHost(hitResults);
}

View File

@@ -0,0 +1,68 @@
#ifndef B3_GPU_RAYCAST_H
#define B3_GPU_RAYCAST_H
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
#include "Bullet3Common/b3AlignedObjectArray.h"
struct b3RayInfo
{
b3Vector3 m_from;
b3Vector3 m_to;
};
struct b3RayHit
{
b3Scalar m_hitFraction;
int m_hitResult0;
int m_hitResult1;
int m_hitResult2;
b3Vector3 m_hitPoint;
b3Vector3 m_hitNormal;
};
class b3GpuRaycast
{
protected:
struct b3GpuRaycastInternalData* m_data;
public:
b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q);
virtual ~b3GpuRaycast();
void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
/* const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
int maxContactCapacity,
const b3OpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
const b3OpenCLArray<b3Vector3>& vertices,
const b3OpenCLArray<b3Vector3>& uniqueEdges,
const b3OpenCLArray<b3GpuFace>& faces,
const b3OpenCLArray<int>& indices,
const b3OpenCLArray<b3Collidable>& gpuCollidables,
const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
const b3OpenCLArray<b3YetAnotherAabb>& clAabbs,
b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
b3OpenCLArray<b3BvhInfo>* bvhInfo,
int numObjects,
int maxTriConvexPairCapacity,
b3OpenCLArray<b3Int4>& triangleConvexPairs,
int& numTriConvexPairsOut
*/
};
#endif //B3_GPU_RAYCAST_H

View File

@@ -0,0 +1,96 @@
typedef struct
{
float4 m_from;
float4 m_to;
} b3RayInfo;
typedef struct
{
float m_hitFraction;
int m_hitResult0;
int m_hitResult1;
int m_hitResult2;
float4 m_hitPoint;
float4 m_hitNormal;
} b3RayHit;
typedef struct
{
float4 m_pos;
float4 m_quat;
float4 m_linVel;
float4 m_angVel;
unsigned int m_collidableIdx;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
} Body;
typedef struct Collidable
{
int m_unused1;
int m_unused2;
int m_shapeType;
int m_shapeIndex;
} Collidable;
bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo)
{
// rs = ray.org - sphere.center
float4 rs = rayFrom - spherePos;
rs.w = 0.f;
float4 rayDir = (rayTo-rayFrom);
rayDir.w = 0.f;
rayDir = normalize(rayDir);
float B = dot(rs, rayDir);
float C = dot(rs, rs) - (radius * radius);
float D = B * B - C;
if (D > 0.0)
{
float t = -B - sqrt(D);
if ( (t > 0.0))// && (t < isect.t) )
{
return true;//isect.t = t;
}
}
return false;
}
__kernel void rayCastKernel(
int numRays,
const __global b3RayInfo* rays,
__global b3RayHit* hits,
const int numBodies,
__global Body* bodies,
__global Collidable* collidables)
{
bool hit=false;
int i = get_global_id(0);
if (i<numRays)
{
hits[i].m_hitFraction = 1.f;
float4 rayFrom = rays[i].m_from;
float4 rayTo = rays[i].m_to;
for (int b=0;b<numBodies;b++)
{
float4 pos = bodies[b].m_pos;
// float4 orn = bodies[b].m_quat;
float radius = 1.f;
if (sphere_intersect(pos, radius, rayFrom, rayTo))
hit = true;
}
if (hit)
hits[i].m_hitFraction = 0.f;
}
}

View File

@@ -678,6 +678,10 @@ cl_mem b3GpuNarrowPhase::getBodiesGpu()
return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL();
}
const struct b3RigidBodyCL* b3GpuNarrowPhase::getBodiesCpu() const
{
return &m_data->m_bodyBufferCPU->at(0);
};
int b3GpuNarrowPhase::getNumBodiesGpu() const
{
@@ -710,6 +714,12 @@ cl_mem b3GpuNarrowPhase::getCollidablesGpu()
return m_data->m_collidablesGPU->getBufferCL();
}
const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const
{
return &m_data->m_collidablesCPU[0];
}
cl_mem b3GpuNarrowPhase::getAabbBufferGpu()
{
return m_data->m_localShapeAABBGPU->getBufferCL();

View File

@@ -56,12 +56,15 @@ public:
cl_mem getBodiesGpu();
const struct b3RigidBodyCL* getBodiesCpu() const;
int getNumBodiesGpu() const;
cl_mem getBodyInertiasGpu();
int getNumBodyInertiasGpu() const;
cl_mem getCollidablesGpu();
const struct b3Collidable* getCollidablesCpu() const;
int getNumCollidablesGpu() const;