add preliminary GPU ray cast (brute-force slow, only against sphere, allocates OpenCL buffers at every cast)

This commit is contained in:
erwin coumans
2013-05-28 18:39:53 -07:00
parent f55473d586
commit a886a978b7
7 changed files with 458 additions and 117 deletions

View File

@@ -19,11 +19,15 @@
#include "../gwenUserInterface.h"
#include "Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h"
#include "OpenGLWindow/GLPrimitiveRenderer.h"
#include "Bullet3OpenCL/RayCast/b3GpuRayCast.h"
void GpuConvexScene::setupScene(const ConstructionInfo& ci)
{
m_primRenderer = ci.m_primRenderer;
m_raycaster = new b3GpuRaycast(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
int index=0;
createStaticEnvironment(ci);
@@ -46,6 +50,12 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
ci.m_gui->setStatusBarMessage(msg,true);
}
void GpuConvexScene::destroyScene()
{
delete m_raycaster;
m_raycaster = 0;
}
int GpuConvexScene::createDynamicsObjects(const ConstructionInfo& ci)
{
int strideInBytes = 9*sizeof(float);
@@ -236,27 +246,6 @@ GpuRaytraceScene::~GpuRaytraceScene()
}
bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo)
{
// rs = ray.org - sphere.center
const b3Vector3& rs = rayFrom - spherePos;
b3Vector3 rayDir = rayTo-rayFrom;//rayFrom-rayTo;
rayDir.normalize();
float B = b3Dot(rs, rayDir);
float C = b3Dot(rs, rs) - (radius * radius);
float D = B * B - C;
if (D > 0.0)
{
float t = -B - sqrt(D);
if ( (t > 0.0))// && (t < isect.t) )
{
return true;//isect.t = t;
}
}
return false;
}
int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci)
{
@@ -284,14 +273,14 @@ int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci)
//int colIndex = m_data->m_np->registerSphereShape(1);
for (int i=0;i<1;i++)
for (int i=0;i<10;i++)
//for (int i=0;i<ci.arraySizeX;i++)
{
//for (int j=0;j<ci.arraySizeY;j++)
for (int j=0;j<5;j++)
for (int j=0;j<10;j++)
{
// for (int k=0;k<ci.arraySizeZ;k++)
for (int k=0;k<1;k++)
for (int k=0;k<10;k++)
{
float mass = 1.f;
if (j==0)//ci.arraySizeY-1)
@@ -328,6 +317,8 @@ int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci)
}
//create primary rays
b3AlignedObjectArray<b3RayInfo> rays;
void GpuRaytraceScene::renderScene()
{
@@ -339,109 +330,125 @@ void GpuRaytraceScene::renderScene()
m_instancingRenderer->updateCamera();
//generate primary rays
float top = 1.f;
float bottom = -1.f;
float nearPlane = 1.f;
float farPlane = 1000.f;
float tanFov = (top-bottom)*0.5f / nearPlane;
float screenWidth = m_instancingRenderer->getScreenWidth();
float screenHeight = m_instancingRenderer->getScreenHeight();
float fov = 2. * atanf (tanFov);
float aspect = screenWidth / screenHeight;
b3Vector3 rayFrom, camTarget;
m_instancingRenderer->getCameraPosition(rayFrom);
m_instancingRenderer->getCameraTargetPosition(camTarget);
b3Vector3 rayForward = camTarget-rayFrom;
rayForward.normalize();
rayForward*= farPlane;
b3Vector3 rightOffset;
b3Vector3 vertical(0.f,1.f,0.f);
b3Vector3 hor;
hor = rayForward.cross(vertical);
hor.normalize();
vertical = hor.cross(rayForward);
vertical.normalize();
float tanfov = tanf(0.5f*fov);
hor *= aspect*2.f * farPlane * tanfov;
vertical *= 2.f * farPlane * tanfov;
b3Vector3 rayToCenter = rayFrom + rayForward;
float texWidth = m_raytraceData->textureWidth;
float texHeight = m_raytraceData->textureHeight;
float widthFactor = (screenWidth/texWidth);
float heightFactor = (screenHeight/texHeight);
//should be screenwidth/height
b3Vector3 dHor = hor * 1./float(screenWidth);
b3Vector3 dVert = vertical * 1./float(screenHeight);
b3Transform rayFromTrans;
rayFromTrans.setIdentity();
rayFromTrans.setOrigin(rayFrom);
b3Transform rayFromLocal;
b3Transform rayToLocal;
//cast primary rays
m_data->m_np->readbackAllBodiesToCpu();
for (int x=0;x<m_raytraceData->textureWidth;x++)
{
for (int y=0;y<m_raytraceData->textureHeight;y++)
B3_PROFILE("Generate primary rays");
float top = 1.f;
float bottom = -1.f;
float nearPlane = 1.f;
float farPlane = 1000.f;
float tanFov = (top-bottom)*0.5f / nearPlane;
float screenWidth = m_instancingRenderer->getScreenWidth();
float screenHeight = m_instancingRenderer->getScreenHeight();
float fov = 2. * atanf (tanFov);
float aspect = screenWidth / screenHeight;
b3Vector3 rayFrom, camTarget;
m_instancingRenderer->getCameraPosition(rayFrom);
m_instancingRenderer->getCameraTargetPosition(camTarget);
b3Vector3 rayForward = camTarget-rayFrom;
rayForward.normalize();
rayForward*= farPlane;
b3Vector3 rightOffset;
b3Vector3 vertical(0.f,1.f,0.f);
b3Vector3 hor;
hor = rayForward.cross(vertical);
hor.normalize();
vertical = hor.cross(rayForward);
vertical.normalize();
float tanfov = tanf(0.5f*fov);
hor *= aspect*2.f * farPlane * tanfov;
vertical *= 2.f * farPlane * tanfov;
b3Vector3 rayToCenter = rayFrom + rayForward;
float texWidth = m_raytraceData->textureWidth;
float texHeight = m_raytraceData->textureHeight;
float widthFactor = (screenWidth/texWidth);
float heightFactor = (screenHeight/texHeight);
//should be screenwidth/height
b3Vector3 dHor = hor * 1./float(screenWidth);
b3Vector3 dVert = vertical * 1./float(screenHeight);
b3Transform rayFromTrans;
rayFromTrans.setIdentity();
rayFromTrans.setOrigin(rayFrom);
b3Transform rayFromLocal;
b3Transform rayToLocal;
m_data->m_np->readbackAllBodiesToCpu();
//create primary rays
rays.resize(m_raytraceData->textureWidth*m_raytraceData->textureHeight);
b3Vector3 rayTo;
b3RayInfo ray;
{
b3Vector3 rayTo = rayToCenter - 0.5f * hor + 0.5f * vertical;
rayTo += x * dHor*widthFactor;
rayTo -= y * dVert*heightFactor;
//if there is a hit, color the pixels
int numBodies = m_data->m_rigidBodyPipeline->getNumBodies();
bool hits = false;
for (int i=0;i<numBodies && !hits;i++)
for (int x=0;x<m_raytraceData->textureWidth;x++)
{
b3Vector3 pos;
b3Quaternion orn;
m_data->m_np->getObjectTransformFromCpu(pos,orn,i);
b3Scalar radius = 1;
for (int y=0;y<m_raytraceData->textureHeight;y++)
{
hits = sphere_intersect(pos, radius, rayFrom, rayTo);
rayTo = rayToCenter - 0.5f * hor + 0.5f * vertical;
rayTo += x * dHor*widthFactor;
rayTo -= y * dVert*heightFactor;
ray.m_from = rayFrom;
ray.m_to = rayTo;
rays[x+m_raytraceData->textureWidth*y] = ray;
}
}
if (hits)
{
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+0] = 255;
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+1] = 0;
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+2] = 0;
} else
{
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+0] = 0;
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+1] = 0;
m_raytraceData->m_texels[(x+m_raytraceData->textureWidth*y)*3+2] = 0;
}
}
}
b3AlignedObjectArray<b3RayHit> hits;
hits.resize(rays.size());
{
B3_PROFILE("init hits");
for (int i=0;i<hits.size();i++)
{
hits[i].m_hitFraction = 1.f;
}
}
m_raycaster->castRaysHost(rays, hits, this->m_data->m_np->getNumBodiesGpu(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu());
{
B3_PROFILE("write texels");
for (int i=0;i<hits.size();i++)
{
bool hit = hits[i].m_hitFraction < 1.f;
if (hit)
{
m_raytraceData->m_texels[(i)*3+0] = 255;
m_raytraceData->m_texels[(i)*3+1] = 0;
m_raytraceData->m_texels[(i)*3+2] = 0;
} else
{
m_raytraceData->m_texels[(i)*3+0] = 0;
m_raytraceData->m_texels[(i)*3+1] = 0;
m_raytraceData->m_texels[(i)*3+2] = 0;
}
}
}
GLint err;
err = glGetError();

View File

@@ -8,9 +8,13 @@ class GpuConvexScene : public GpuRigidBodyDemo
protected:
class GLPrimitiveRenderer* m_primRenderer;
class b3GpuRaycast* m_raycaster;
public:
GpuConvexScene() :m_primRenderer(0) {}
GpuConvexScene() :m_primRenderer(0), m_raycaster(0)
{
}
virtual ~GpuConvexScene(){}
virtual const char* getName()
{
@@ -25,6 +29,8 @@ public:
virtual void setupScene(const ConstructionInfo& ci);
virtual void destroyScene();
virtual int createDynamicsObjects(const ConstructionInfo& ci);
virtual int createDynamicsObjects2(const ConstructionInfo& ci,const float* vertices, int numVertices, const int* indices,int numIndices);

View File

@@ -0,0 +1,151 @@
#include "b3GpuRaycast.h"
#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Common/b3Quickprof.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
struct b3GpuRaycastInternalData
{
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_q;
cl_kernel m_raytraceKernel;
int m_test;
};
b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q)
{
m_data = new b3GpuRaycastInternalData;
m_data->m_context = ctx;
m_data->m_device = device;
m_data->m_q = q;
m_data->m_raytraceKernel = 0;
char* rayCastKernelCL= 0;
{
cl_int errNum=0;
cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH);
b3Assert(errNum==CL_SUCCESS);
m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
b3Assert(errNum==CL_SUCCESS);
clReleaseProgram(prog);
}
}
b3GpuRaycast::~b3GpuRaycast()
{
clReleaseKernel(m_data->m_raytraceKernel);
delete m_data;
}
bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo)
{
// rs = ray.org - sphere.center
const b3Vector3& rs = rayFrom - spherePos;
b3Vector3 rayDir = rayTo-rayFrom;//rayFrom-rayTo;
rayDir.normalize();
float B = b3Dot(rs, rayDir);
float C = b3Dot(rs, rs) - (radius * radius);
float D = B * B - C;
if (D > 0.0)
{
float t = -B - sqrt(D);
if ( (t > 0.0))// && (t < isect.t) )
{
return true;//isect.t = t;
}
}
return false;
}
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
{
// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
B3_PROFILE("castRaysHost");
for (int r=0;r<rays.size();r++)
{
b3Vector3 rayFrom = rays[r].m_from;
b3Vector3 rayTo = rays[r].m_to;
//if there is a hit, color the pixels
bool hits = false;
for (int b=0;b<numBodies && !hits;b++)
{
const b3Vector3& pos = bodies[b].m_pos;
const b3Quaternion& orn = bodies[b].m_quat;
b3Scalar radius = 1;
if (sphere_intersect(pos, radius, rayFrom, rayTo))
hits = true;
}
if (hits)
hitResults[r].m_hitFraction = 0.f;
}
}
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
{
B3_PROFILE("castRaysGPU");
b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
gpuRays.copyFromHost(rays);
b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
gpuHitResults.resize(hitResults.size());
b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
gpuBodies.resize(numBodies);
gpuBodies.copyFromHostPointer(bodies,numBodies);
b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
gpuCollidables.resize(numCollidables);
gpuCollidables.copyFromHostPointer(collidables,numCollidables);
//run kernel
{
B3_PROFILE("raycast launch1D");
b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
int numRays = rays.size();
launcher.setConst(numRays);
launcher.setBuffer(gpuRays.getBufferCL());
launcher.setBuffer(gpuHitResults.getBufferCL());
launcher.setConst(numBodies);
launcher.setBuffer(gpuBodies.getBufferCL());
launcher.setBuffer(gpuCollidables.getBufferCL());
launcher.launch1D(numRays);
clFinish(m_data->m_q);
}
//copy results
gpuHitResults.copyToHost(hitResults);
}

View File

@@ -0,0 +1,68 @@
#ifndef B3_GPU_RAYCAST_H
#define B3_GPU_RAYCAST_H
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
#include "Bullet3Common/b3AlignedObjectArray.h"
struct b3RayInfo
{
b3Vector3 m_from;
b3Vector3 m_to;
};
struct b3RayHit
{
b3Scalar m_hitFraction;
int m_hitResult0;
int m_hitResult1;
int m_hitResult2;
b3Vector3 m_hitPoint;
b3Vector3 m_hitNormal;
};
class b3GpuRaycast
{
protected:
struct b3GpuRaycastInternalData* m_data;
public:
b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q);
virtual ~b3GpuRaycast();
void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
/* const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
int maxContactCapacity,
const b3OpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
const b3OpenCLArray<b3Vector3>& vertices,
const b3OpenCLArray<b3Vector3>& uniqueEdges,
const b3OpenCLArray<b3GpuFace>& faces,
const b3OpenCLArray<int>& indices,
const b3OpenCLArray<b3Collidable>& gpuCollidables,
const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
const b3OpenCLArray<b3YetAnotherAabb>& clAabbs,
b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
b3OpenCLArray<b3BvhInfo>* bvhInfo,
int numObjects,
int maxTriConvexPairCapacity,
b3OpenCLArray<b3Int4>& triangleConvexPairs,
int& numTriConvexPairsOut
*/
};
#endif //B3_GPU_RAYCAST_H

View File

@@ -0,0 +1,96 @@
typedef struct
{
float4 m_from;
float4 m_to;
} b3RayInfo;
typedef struct
{
float m_hitFraction;
int m_hitResult0;
int m_hitResult1;
int m_hitResult2;
float4 m_hitPoint;
float4 m_hitNormal;
} b3RayHit;
typedef struct
{
float4 m_pos;
float4 m_quat;
float4 m_linVel;
float4 m_angVel;
unsigned int m_collidableIdx;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
} Body;
typedef struct Collidable
{
int m_unused1;
int m_unused2;
int m_shapeType;
int m_shapeIndex;
} Collidable;
bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo)
{
// rs = ray.org - sphere.center
float4 rs = rayFrom - spherePos;
rs.w = 0.f;
float4 rayDir = (rayTo-rayFrom);
rayDir.w = 0.f;
rayDir = normalize(rayDir);
float B = dot(rs, rayDir);
float C = dot(rs, rs) - (radius * radius);
float D = B * B - C;
if (D > 0.0)
{
float t = -B - sqrt(D);
if ( (t > 0.0))// && (t < isect.t) )
{
return true;//isect.t = t;
}
}
return false;
}
__kernel void rayCastKernel(
int numRays,
const __global b3RayInfo* rays,
__global b3RayHit* hits,
const int numBodies,
__global Body* bodies,
__global Collidable* collidables)
{
bool hit=false;
int i = get_global_id(0);
if (i<numRays)
{
hits[i].m_hitFraction = 1.f;
float4 rayFrom = rays[i].m_from;
float4 rayTo = rays[i].m_to;
for (int b=0;b<numBodies;b++)
{
float4 pos = bodies[b].m_pos;
// float4 orn = bodies[b].m_quat;
float radius = 1.f;
if (sphere_intersect(pos, radius, rayFrom, rayTo))
hit = true;
}
if (hit)
hits[i].m_hitFraction = 0.f;
}
}

View File

@@ -678,6 +678,10 @@ cl_mem b3GpuNarrowPhase::getBodiesGpu()
return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL();
}
const struct b3RigidBodyCL* b3GpuNarrowPhase::getBodiesCpu() const
{
return &m_data->m_bodyBufferCPU->at(0);
};
int b3GpuNarrowPhase::getNumBodiesGpu() const
{
@@ -710,6 +714,12 @@ cl_mem b3GpuNarrowPhase::getCollidablesGpu()
return m_data->m_collidablesGPU->getBufferCL();
}
const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const
{
return &m_data->m_collidablesCPU[0];
}
cl_mem b3GpuNarrowPhase::getAabbBufferGpu()
{
return m_data->m_localShapeAABBGPU->getBufferCL();

View File

@@ -56,12 +56,15 @@ public:
cl_mem getBodiesGpu();
const struct b3RigidBodyCL* getBodiesCpu() const;
int getNumBodiesGpu() const;
cl_mem getBodyInertiasGpu();
int getNumBodyInertiasGpu() const;
cl_mem getCollidablesGpu();
const struct b3Collidable* getCollidablesCpu() const;
int getNumCollidablesGpu() const;