Merge branch 'master' of https://github.com/erwincoumans/bullet3

2013-06-20 12:55:35 -07:00
parent 13a4084adf 92f2f330df
commit 47403c7e6f
66 changed files with 1026 additions and 914 deletions
--- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
+++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
@@ -2,7 +2,7 @@
 #include "b3GpuSapBroadphase.h"
 #include "Bullet3Common/b3Vector3.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
-#include "Bullet3Common/b3Quickprof.h"
+
 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
 #include "kernels/sapKernels.h"
 #include "kernels/sapFastKernels.h"
--- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
@@ -28,7 +28,7 @@ int b3g_actualSATPairTests=0;


 typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
-#include "Bullet3Common/b3Quickprof.h"
+

 #include <float.h> //for FLT_MAX
 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
--- a/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp
+++ b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp
@@ -319,10 +319,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
 	

 //fast prefix scan is not working properly on Mac OSX yet
-#ifdef _WIN32
-	bool fastScan=!m_deviceCPU;//only use fast scan on GPU
-#else
+#ifdef __APPLE__
 	bool fastScan=false;
+#else
+	bool fastScan=!m_deviceCPU;//only use fast scan on GPU
 #endif

 		if (fastScan)
@@ -653,11 +653,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
        

 //fast prefix scan is not working properly on Mac OSX yet
-#ifdef _WIN32
-	bool fastScan=!m_deviceCPU;
-	
+#ifdef __APPLE__
+	bool fastScan=false;	
 #else
-	bool fastScan=false;
+	bool fastScan=!m_deviceCPU;
 #endif

 		if (fastScan)
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
@@ -2,7 +2,9 @@
 #include "b3GpuRaycast.h"
 #include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
 #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
-#include "Bullet3Common/b3Quickprof.h"
+#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
+
+
 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
@@ -73,9 +75,57 @@ bool sphere_intersect(const b3Vector3& spherePos,  b3Scalar radius, const b3Vect
 	return false;
 }

+bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronCL& poly,
+	const b3AlignedObjectArray<b3GpuFace>& faces,  float& hitFraction, b3Vector3& hitNormal)
+{
+	float exitFraction = hitFraction;
+	float enterFraction = -0.1f;
+	b3Vector3 curHitNormal(0,0,0);
+	for (int i=0;i<poly.m_numFaces;i++)
+	{
+		const b3GpuFace& face = faces[poly.m_faceOffset+i];
+		float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w;
+		float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w;
+		if (fromPlaneDist<0.f)
+		{
+			if (toPlaneDist >= 0.f)
+			{
+				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
+				if (exitFraction>fraction)
+				{
+					exitFraction = fraction;
+				}
+			} 			
+		} else
+		{
+			if (toPlaneDist<0.f)
+			{
+				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
+				if (enterFraction <= fraction)
+				{
+					enterFraction = fraction;
+					curHitNormal = face.m_plane;
+					curHitNormal.w = 0.f;
+				}
+			} else
+			{
+				return false;
+			}
+		}
+		if (exitFraction <= enterFraction)
+			return false;
+	}
+
+	if (enterFraction < 0.f)
+		return false;
+
+	hitFraction = enterFraction;
+	hitNormal = curHitNormal;
+	return true;
+}

 void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
-		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
 {

 //	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
@@ -88,6 +138,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3A
 		float hitFraction = hitResults[r].m_hitFraction;

 		int hitBodyIndex= -1;
+		b3Vector3 hitNormal;

 		for (int b=0;b<numBodies;b++)
 		{
@@ -103,9 +154,34 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3A
 					if (sphere_intersect(pos,  radius, rayFrom, rayTo,hitFraction))
 					{
 						hitBodyIndex = b;
+						b3Vector3 hitPoint;
+						hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
+						hitNormal = (hitPoint-bodies[b].m_pos).normalize();
 					}
 				}
+			case SHAPE_CONVEX_HULL:
+				{

+					b3Transform convexWorldTransform;
+					convexWorldTransform.setIdentity();
+					convexWorldTransform.setOrigin(bodies[b].m_pos);
+					convexWorldTransform.setRotation(bodies[b].m_quat);
+					b3Transform convexWorld2Local = convexWorldTransform.inverse();
+
+					b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
+					b3Vector3 rayToLocal = convexWorld2Local(rayTo);
+					
+					
+					int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
+					const b3ConvexPolyhedronCL& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
+					if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
+					{
+						hitBodyIndex = b;
+					}
+
+					
+					break;
+				}
 			default:
 				{
 					static bool once=true;
@@ -122,7 +198,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3A

 			hitResults[r].m_hitFraction = hitFraction;
 			hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
-			hitResults[r].m_hitNormal = (hitResults[r].m_hitPoint-bodies[hitBodyIndex].m_pos).normalize();
+			hitResults[r].m_hitNormal = hitNormal;
 			hitResults[r].m_hitResult0 = hitBodyIndex;
 		}

@@ -130,8 +206,9 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3A
 }

 void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
-		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
 {
+	
 	B3_PROFILE("castRaysGPU");

 	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
@@ -141,14 +218,6 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3Align
 	gpuHitResults.resize(hitResults.size());
 	gpuHitResults.copyFromHost(hitResults);

-	b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
-	gpuBodies.resize(numBodies);
-	gpuBodies.copyFromHostPointer(bodies,numBodies);
-
-	b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
-	gpuCollidables.resize(numCollidables);
-	gpuCollidables.copyFromHostPointer(collidables,numCollidables);
-

 	//run kernel
 	{
@@ -162,9 +231,11 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3Align
 		launcher.setBuffer(gpuHitResults.getBufferCL());

 		launcher.setConst(numBodies);
-		launcher.setBuffer(gpuBodies.getBufferCL());
-		launcher.setBuffer(gpuCollidables.getBufferCL());
-
+		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
+		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
+		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
+		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
+		
 		launcher.launch1D(numRays);
 		clFinish(m_data->m_q);
 	}
--- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h
+++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h
@@ -18,10 +18,13 @@ public:
 	virtual ~b3GpuRaycast();

 	void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn,	b3AlignedObjectArray<b3RayHit>& hitResults,
-		int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
+		int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables,
+		const struct b3GpuNarrowPhaseInternalData* narrowphaseData);

 	void castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
-		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables);
+		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables,
+		const struct b3GpuNarrowPhaseInternalData* narrowphaseData
+		);
 	
 /*			const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
 			b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
--- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl
+++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl
@@ -47,6 +47,184 @@ typedef struct Collidable
 } Collidable;


+typedef struct  
+{
+	float4		m_localCenter;
+	float4		m_extents;
+	float4		mC;
+	float4		mE;
+	
+	float			m_radius;
+	int	m_faceOffset;
+	int m_numFaces;
+	int	m_numVertices;
+	
+	int m_vertexOffset;
+	int	m_uniqueEdgesOffset;
+	int	m_numUniqueEdges;
+	int m_unused;
+
+} ConvexPolyhedronCL;
+
+typedef struct
+{
+	float4 m_plane;
+	int m_indexOffset;
+	int m_numIndices;
+} b3GpuFace;
+
+
+
+///////////////////////////////////////
+//	Quaternion
+///////////////////////////////////////
+
+typedef float4 Quaternion;
+
+__inline
+Quaternion qtMul(Quaternion a, Quaternion b);
+
+__inline
+Quaternion qtNormalize(Quaternion in);
+
+__inline
+float4 qtRotate(Quaternion q, float4 vec);
+
+__inline
+Quaternion qtInvert(Quaternion q);
+
+
+__inline
+float dot3F4(float4 a, float4 b)
+{
+	float4 a1 = (float4)(a.xyz,0.f);
+	float4 b1 = (float4)(b.xyz,0.f);
+	return dot(a1, b1);
+}
+
+
+__inline
+Quaternion qtMul(Quaternion a, Quaternion b)
+{
+	Quaternion ans;
+	ans = cross( a, b );
+	ans += a.w*b+b.w*a;
+//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
+	ans.w = a.w*b.w - dot3F4(a, b);
+	return ans;
+}
+
+__inline
+Quaternion qtNormalize(Quaternion in)
+{
+	return fast_normalize(in);
+//	in /= length( in );
+//	return in;
+}
+__inline
+float4 qtRotate(Quaternion q, float4 vec)
+{
+	Quaternion qInv = qtInvert( q );
+	float4 vcpy = vec;
+	vcpy.w = 0.f;
+	float4 out = qtMul(qtMul(q,vcpy),qInv);
+	return out;
+}
+
+__inline
+Quaternion qtInvert(Quaternion q)
+{
+	return (Quaternion)(-q.xyz, q.w);
+}
+
+__inline
+float4 qtInvRotate(const Quaternion q, float4 vec)
+{
+	return qtRotate( qtInvert( q ), vec );
+}
+
+__inline
+float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
+{
+	return qtRotate( *orientation, *p ) + (*translation);
+}
+
+void	trInverse(float4 translationIn, Quaternion orientationIn,
+		float4* translationOut, Quaternion* orientationOut)
+{
+	*orientationOut = qtInvert(orientationIn);
+	*translationOut = qtRotate(*orientationOut, -translationIn);
+}
+
+void	trMul(float4 translationA, Quaternion orientationA,
+						float4 translationB, Quaternion orientationB,
+		float4* translationOut, Quaternion* orientationOut)
+{
+	*orientationOut = qtMul(orientationA,orientationB);
+	*translationOut = transform(&translationB,&translationA,&orientationA);
+}
+
+
+
+bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,
+	__global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)
+{
+	rayFromLocal.w = 0.f;
+	rayToLocal.w = 0.f;
+  bool result = true;
+  
+	float exitFraction = *hitFraction;
+	float enterFraction = -0.1f;
+	float4 curHitNormal = (float4)(0,0,0,0);
+	for (int i=0;i<numFaces && result;i++)
+	{
+		b3GpuFace face = faces[faceOffset+i];
+		float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;
+		float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;
+		if (fromPlaneDist<0.f)
+		{
+			if (toPlaneDist >= 0.f)
+			{
+				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
+				if (exitFraction>fraction)
+				{
+					exitFraction = fraction;
+				}
+			} 			
+		} else
+		{
+			if (toPlaneDist<0.f)
+			{
+				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
+				if (enterFraction <= fraction)
+				{
+					enterFraction = fraction;
+					curHitNormal = face.m_plane;
+					curHitNormal.w = 0.f;
+				}
+			} else
+			{
+				result = false;
+			}
+		}
+		if (exitFraction <= enterFraction)
+			result = false;
+	}
+	
+	result = result && (enterFraction < 0.f);
+	
+	if (result)
+	{	
+		*hitFraction = enterFraction;
+		*hitNormal = curHitNormal;
+	}
+	return result;
+}
+
+
+
+
+

 bool sphere_intersect(float4 spherePos,  float radius, float4 rayFrom, float4 rayTo, float* hitFraction)
 {
@@ -88,10 +266,11 @@ __kernel void rayCastKernel(
 	__global b3RayHit* hitResults, 
 	const int numBodies, 
 	__global Body* bodies,
-	__global Collidable* collidables)
+	__global Collidable* collidables,
+	__global const b3GpuFace* faces,
+	__global const ConvexPolyhedronCL* convexShapes	)
 {

-
 	int i = get_global_id(0);
 	if (i<numRays)
 	{
@@ -100,6 +279,8 @@ __kernel void rayCastKernel(
 		float4 rayFrom = rays[i].m_from;
 		float4 rayTo = rays[i].m_to;
 		float hitFraction = 1.f;
+		float4 hitPoint;
+		float4 hitNormal;
 		int hitBodyIndex= -1;
 		
 		int cachedCollidableIndex = -1;		
@@ -109,7 +290,7 @@ __kernel void rayCastKernel(
 		{
 					
 				float4 pos = bodies[b].m_pos;
-	//		float4 orn = bodies[b].m_quat;
+				float4 orn = bodies[b].m_quat;
 				if (cachedCollidableIndex !=bodies[b].m_collidableIdx)
 				{
 						cachedCollidableIndex = bodies[b].m_collidableIdx;
@@ -123,15 +304,38 @@ __kernel void rayCastKernel(
 					if (sphere_intersect(pos,  radius, rayFrom, rayTo, &hitFraction))
 					{
 						hitBodyIndex = b;
+						hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);
+						hitNormal = (float4) (hitPoint-bodies[b].m_pos);
 					}
 				}
+				
+				if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)
+				{
+				
+					float4 invPos = (float4)(0,0,0,0);
+					float4 invOrn = (float4)(0,0,0,0);
+					float4 rayFromLocal = (float4)(0,0,0,0);
+					float4 rayToLocal = (float4)(0,0,0,0);
+					
+					trInverse(pos,orn, &invPos, &invOrn);
+					rayFromLocal = transform(&rayFrom, &invPos, &invOrn);
+					rayToLocal = transform(&rayTo, &invPos, &invOrn);
+					
+					int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;
+					int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;
+					
+					if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))
+					{
+						hitBodyIndex = b;
+					}
+				}
+			
 		}
 		
 		if (hitBodyIndex>=0)
 		{
 			hitResults[i].m_hitFraction = hitFraction;
-			hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);
-			float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos);
+			hitResults[i].m_hitPoint = hitPoint;
 			hitResults[i].m_hitNormal = normalize(hitNormal);
 			hitResults[i].m_hitResult0 = hitBodyIndex;
 		}
--- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h
+++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h
@@ -49,6 +49,184 @@ static const char* rayCastKernelCL= \
 "} Collidable;\n"
 "\n"
 "\n"
+"typedef struct  \n"
+"{\n"
+"	float4		m_localCenter;\n"
+"	float4		m_extents;\n"
+"	float4		mC;\n"
+"	float4		mE;\n"
+"	\n"
+"	float			m_radius;\n"
+"	int	m_faceOffset;\n"
+"	int m_numFaces;\n"
+"	int	m_numVertices;\n"
+"	\n"
+"	int m_vertexOffset;\n"
+"	int	m_uniqueEdgesOffset;\n"
+"	int	m_numUniqueEdges;\n"
+"	int m_unused;\n"
+"\n"
+"} ConvexPolyhedronCL;\n"
+"\n"
+"typedef struct\n"
+"{\n"
+"	float4 m_plane;\n"
+"	int m_indexOffset;\n"
+"	int m_numIndices;\n"
+"} b3GpuFace;\n"
+"\n"
+"\n"
+"\n"
+"///////////////////////////////////////\n"
+"//	Quaternion\n"
+"///////////////////////////////////////\n"
+"\n"
+"typedef float4 Quaternion;\n"
+"\n"
+"__inline\n"
+"Quaternion qtMul(Quaternion a, Quaternion b);\n"
+"\n"
+"__inline\n"
+"Quaternion qtNormalize(Quaternion in);\n"
+"\n"
+"__inline\n"
+"float4 qtRotate(Quaternion q, float4 vec);\n"
+"\n"
+"__inline\n"
+"Quaternion qtInvert(Quaternion q);\n"
+"\n"
+"\n"
+"__inline\n"
+"float dot3F4(float4 a, float4 b)\n"
+"{\n"
+"	float4 a1 = (float4)(a.xyz,0.f);\n"
+"	float4 b1 = (float4)(b.xyz,0.f);\n"
+"	return dot(a1, b1);\n"
+"}\n"
+"\n"
+"\n"
+"__inline\n"
+"Quaternion qtMul(Quaternion a, Quaternion b)\n"
+"{\n"
+"	Quaternion ans;\n"
+"	ans = cross( a, b );\n"
+"	ans += a.w*b+b.w*a;\n"
+"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
+"	ans.w = a.w*b.w - dot3F4(a, b);\n"
+"	return ans;\n"
+"}\n"
+"\n"
+"__inline\n"
+"Quaternion qtNormalize(Quaternion in)\n"
+"{\n"
+"	return fast_normalize(in);\n"
+"//	in /= length( in );\n"
+"//	return in;\n"
+"}\n"
+"__inline\n"
+"float4 qtRotate(Quaternion q, float4 vec)\n"
+"{\n"
+"	Quaternion qInv = qtInvert( q );\n"
+"	float4 vcpy = vec;\n"
+"	vcpy.w = 0.f;\n"
+"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
+"	return out;\n"
+"}\n"
+"\n"
+"__inline\n"
+"Quaternion qtInvert(Quaternion q)\n"
+"{\n"
+"	return (Quaternion)(-q.xyz, q.w);\n"
+"}\n"
+"\n"
+"__inline\n"
+"float4 qtInvRotate(const Quaternion q, float4 vec)\n"
+"{\n"
+"	return qtRotate( qtInvert( q ), vec );\n"
+"}\n"
+"\n"
+"__inline\n"
+"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
+"{\n"
+"	return qtRotate( *orientation, *p ) + (*translation);\n"
+"}\n"
+"\n"
+"void	trInverse(float4 translationIn, Quaternion orientationIn,\n"
+"		float4* translationOut, Quaternion* orientationOut)\n"
+"{\n"
+"	*orientationOut = qtInvert(orientationIn);\n"
+"	*translationOut = qtRotate(*orientationOut, -translationIn);\n"
+"}\n"
+"\n"
+"void	trMul(float4 translationA, Quaternion orientationA,\n"
+"						float4 translationB, Quaternion orientationB,\n"
+"		float4* translationOut, Quaternion* orientationOut)\n"
+"{\n"
+"	*orientationOut = qtMul(orientationA,orientationB);\n"
+"	*translationOut = transform(&translationB,&translationA,&orientationA);\n"
+"}\n"
+"\n"
+"\n"
+"\n"
+"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n"
+"	__global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n"
+"{\n"
+"	rayFromLocal.w = 0.f;\n"
+"	rayToLocal.w = 0.f;\n"
+"  bool result = true;\n"
+"  \n"
+"	float exitFraction = *hitFraction;\n"
+"	float enterFraction = -0.1f;\n"
+"	float4 curHitNormal = (float4)(0,0,0,0);\n"
+"	for (int i=0;i<numFaces && result;i++)\n"
+"	{\n"
+"		b3GpuFace face = faces[faceOffset+i];\n"
+"		float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n"
+"		float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n"
+"		if (fromPlaneDist<0.f)\n"
+"		{\n"
+"			if (toPlaneDist >= 0.f)\n"
+"			{\n"
+"				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
+"				if (exitFraction>fraction)\n"
+"				{\n"
+"					exitFraction = fraction;\n"
+"				}\n"
+"			} 			\n"
+"		} else\n"
+"		{\n"
+"			if (toPlaneDist<0.f)\n"
+"			{\n"
+"				float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
+"				if (enterFraction <= fraction)\n"
+"				{\n"
+"					enterFraction = fraction;\n"
+"					curHitNormal = face.m_plane;\n"
+"					curHitNormal.w = 0.f;\n"
+"				}\n"
+"			} else\n"
+"			{\n"
+"				result = false;\n"
+"			}\n"
+"		}\n"
+"		if (exitFraction <= enterFraction)\n"
+"			result = false;\n"
+"	}\n"
+"	\n"
+"	result = result && (enterFraction < 0.f);\n"
+"	\n"
+"	if (result)\n"
+"	{	\n"
+"		*hitFraction = enterFraction;\n"
+"		*hitNormal = curHitNormal;\n"
+"	}\n"
+"	return result;\n"
+"}\n"
+"\n"
+"\n"
+"\n"
+"\n"
+"\n"
 "\n"
 "bool sphere_intersect(float4 spherePos,  float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n"
 "{\n"
@@ -90,10 +268,11 @@ static const char* rayCastKernelCL= \
 "	__global b3RayHit* hitResults, \n"
 "	const int numBodies, \n"
 "	__global Body* bodies,\n"
-"	__global Collidable* collidables)\n"
+"	__global Collidable* collidables,\n"
+"	__global const b3GpuFace* faces,\n"
+"	__global const ConvexPolyhedronCL* convexShapes	)\n"
 "{\n"
 "\n"
-"\n"
 "	int i = get_global_id(0);\n"
 "	if (i<numRays)\n"
 "	{\n"
@@ -102,6 +281,8 @@ static const char* rayCastKernelCL= \
 "		float4 rayFrom = rays[i].m_from;\n"
 "		float4 rayTo = rays[i].m_to;\n"
 "		float hitFraction = 1.f;\n"
+"		float4 hitPoint;\n"
+"		float4 hitNormal;\n"
 "		int hitBodyIndex= -1;\n"
 "		\n"
 "		int cachedCollidableIndex = -1;		\n"
@@ -111,7 +292,7 @@ static const char* rayCastKernelCL= \
 "		{\n"
 "					\n"
 "				float4 pos = bodies[b].m_pos;\n"
-"	//		float4 orn = bodies[b].m_quat;\n"
+"				float4 orn = bodies[b].m_quat;\n"
 "				if (cachedCollidableIndex !=bodies[b].m_collidableIdx)\n"
 "				{\n"
 "						cachedCollidableIndex = bodies[b].m_collidableIdx;\n"
@@ -125,15 +306,38 @@ static const char* rayCastKernelCL= \
 "					if (sphere_intersect(pos,  radius, rayFrom, rayTo, &hitFraction))\n"
 "					{\n"
 "						hitBodyIndex = b;\n"
+"						hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
+"						hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n"
 "					}\n"
 "				}\n"
+"				\n"
+"				if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
+"				{\n"
+"				\n"
+"					float4 invPos = (float4)(0,0,0,0);\n"
+"					float4 invOrn = (float4)(0,0,0,0);\n"
+"					float4 rayFromLocal = (float4)(0,0,0,0);\n"
+"					float4 rayToLocal = (float4)(0,0,0,0);\n"
+"					\n"
+"					trInverse(pos,orn, &invPos, &invOrn);\n"
+"					rayFromLocal = transform(&rayFrom, &invPos, &invOrn);\n"
+"					rayToLocal = transform(&rayTo, &invPos, &invOrn);\n"
+"					\n"
+"					int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n"
+"					int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n"
+"					\n"
+"					if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
+"					{\n"
+"						hitBodyIndex = b;\n"
+"					}\n"
+"				}\n"
+"			\n"
 "		}\n"
 "		\n"
 "		if (hitBodyIndex>=0)\n"
 "		{\n"
 "			hitResults[i].m_hitFraction = hitFraction;\n"
-"			hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
-"			float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos);\n"
+"			hitResults[i].m_hitPoint = hitPoint;\n"
 "			hitResults[i].m_hitNormal = normalize(hitNormal);\n"
 "			hitResults[i].m_hitResult0 = hitBodyIndex;\n"
 "		}\n"
--- a/src/Bullet3OpenCL/RigidBody/b3Config.h
+++ b/src/Bullet3OpenCL/RigidBody/b3Config.h
@@ -19,7 +19,7 @@ struct	b3Config
 	int m_maxTriConvexPairCapacity;

 	b3Config()
-		:m_maxConvexBodies(128*1024),
+		:m_maxConvexBodies(32*1024),
 		m_maxVerticesPerFace(64),
 		m_maxFacesPerShape(12),
 		m_maxConvexVertices(8192),
@@ -29,7 +29,7 @@ struct	b3Config
 		m_maxTriConvexPairCapacity(256*1024)
 	{
 		m_maxConvexShapes = m_maxConvexBodies;
-		m_maxBroadphasePairs = 8*m_maxConvexBodies;
+		m_maxBroadphasePairs = 12*m_maxConvexBodies;
 		m_maxContactCapacity = m_maxBroadphasePairs;
 	}
 };
--- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp
@@ -5,7 +5,7 @@ bool b3GpuSolveConstraint = true;

 #include "b3GpuBatchingPgsSolver.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
-#include "Bullet3Common/b3Quickprof.h"
+
 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
--- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp
@@ -12,73 +12,7 @@
 #include "Bullet3Geometry/b3AabbUtil.h"
 #include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h"

-struct b3GpuNarrowPhaseInternalData
-{
-	b3AlignedObjectArray<b3ConvexUtility*>* m_convexData;
-    
-	b3AlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
-	b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
-	b3AlignedObjectArray<b3Vector3> m_convexVertices;
-	b3AlignedObjectArray<int> m_convexIndices;
-    
-	b3OpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
-	b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
-	b3OpenCLArray<b3Vector3>* m_convexVerticesGPU;
-	b3OpenCLArray<int>* m_convexIndicesGPU;
-    
-    b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU;
-    b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU;
-    b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU;
-    b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU;
-    b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU;
-    
-	b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes;
-	b3OpenCLArray<b3GpuChildShape>*	m_gpuChildShapes;
-    
-	b3AlignedObjectArray<b3GpuFace> m_convexFaces;
-	b3OpenCLArray<b3GpuFace>* m_convexFacesGPU;
-    
-	GpuSatCollision*	m_gpuSatCollision;
-	    
-	b3AlignedObjectArray<b3Int2>* m_pBufPairsCPU;
-    
-	//b3OpenCLArray<b3Int2>* m_convexPairsOutGPU;
-	//b3OpenCLArray<b3Int2>* m_planePairs;
-    
-	b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
-	b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
-	
-    
-	b3AlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
-	b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
-    
-	b3AlignedObjectArray<b3InertiaCL>*	m_inertiaBufferCPU;
-	b3OpenCLArray<b3InertiaCL>*	m_inertiaBufferGPU;
-    
-	int m_numAcceleratedShapes;
-	int m_numAcceleratedRigidBodies;
-    
-	b3AlignedObjectArray<b3Collidable>	m_collidablesCPU;
-	b3OpenCLArray<b3Collidable>*	m_collidablesGPU;
-
-	b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
-	b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
-
-	b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
-
-	b3AlignedObjectArray<b3QuantizedBvhNode>	m_treeNodesCPU;
-	b3AlignedObjectArray<b3BvhSubtreeInfo>	m_subTreesCPU;
-
-	b3AlignedObjectArray<b3BvhInfo>	m_bvhInfoCPU;
-	b3OpenCLArray<b3BvhInfo>*			m_bvhInfoGPU;
-	
-	b3OpenCLArray<b3QuantizedBvhNode>*	m_treeNodesGPU;
-	b3OpenCLArray<b3BvhSubtreeInfo>*	m_subTreesGPU;
-	
-
-	b3Config	m_config;
-    
-};
+#include "b3GpuNarrowPhaseInternalData.h"



--- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h
@@ -91,6 +91,11 @@ public:
 	b3Collidable& getCollidableCpu(int collidableIndex);
 	const b3Collidable& getCollidableCpu(int collidableIndex) const;

+	const b3GpuNarrowPhaseInternalData*	getInternalData() const
+	{
+			return m_data;
+	}
+
 	const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
 };

--- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h
@@ -0,0 +1,93 @@
+
+#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H
+#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H
+
+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
+#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h"
+#include "b3Config.h"
+#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h"
+#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
+#include "Bullet3Common/b3AlignedObjectArray.h"
+#include "Bullet3Common/b3Vector3.h"
+
+#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
+#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
+#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
+
+#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h"
+#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h"
+#include "Bullet3Common/b3Int4.h"
+#include "Bullet3Common/b3Int2.h"
+
+
+class b3ConvexUtility;
+
+struct b3GpuNarrowPhaseInternalData
+{
+	b3AlignedObjectArray<b3ConvexUtility*>* m_convexData;
+    
+	b3AlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
+	b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
+	b3AlignedObjectArray<b3Vector3> m_convexVertices;
+	b3AlignedObjectArray<int> m_convexIndices;
+    
+	b3OpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
+	b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU;
+	b3OpenCLArray<b3Vector3>* m_convexVerticesGPU;
+	b3OpenCLArray<int>* m_convexIndicesGPU;
+    
+    b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU;
+    b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU;
+    b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU;
+    b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU;
+    b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU;
+    
+	b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes;
+	b3OpenCLArray<b3GpuChildShape>*	m_gpuChildShapes;
+    
+	b3AlignedObjectArray<b3GpuFace> m_convexFaces;
+	b3OpenCLArray<b3GpuFace>* m_convexFacesGPU;
+    
+	struct GpuSatCollision*	m_gpuSatCollision;
+	    
+	b3AlignedObjectArray<b3Int2>* m_pBufPairsCPU;
+    
+	//b3OpenCLArray<b3Int2>* m_convexPairsOutGPU;
+	//b3OpenCLArray<b3Int2>* m_planePairs;
+    
+	b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
+	b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
+	
+    
+	b3AlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
+	b3OpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
+    
+	b3AlignedObjectArray<b3InertiaCL>*	m_inertiaBufferCPU;
+	b3OpenCLArray<b3InertiaCL>*	m_inertiaBufferGPU;
+    
+	int m_numAcceleratedShapes;
+	int m_numAcceleratedRigidBodies;
+    
+	b3AlignedObjectArray<b3Collidable>	m_collidablesCPU;
+	b3OpenCLArray<b3Collidable>*	m_collidablesGPU;
+
+	b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
+	b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
+
+	b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
+
+	b3AlignedObjectArray<b3QuantizedBvhNode>	m_treeNodesCPU;
+	b3AlignedObjectArray<b3BvhSubtreeInfo>	m_subTreesCPU;
+
+	b3AlignedObjectArray<b3BvhInfo>	m_bvhInfoCPU;
+	b3OpenCLArray<b3BvhInfo>*			m_bvhInfoGPU;
+	
+	b3OpenCLArray<b3QuantizedBvhNode>*	m_treeNodesGPU;
+	b3OpenCLArray<b3BvhSubtreeInfo>*	m_subTreesGPU;
+	
+
+	b3Config	m_config;
+    
+};
+
+#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H
--- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp
+++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp
@@ -31,7 +31,6 @@ bool dumpContactStats = false;
 #include "b3GpuBatchingPgsSolver.h"
 #include "b3Solver.h"

-#include "Bullet3Common/b3Quickprof.h"
 #include "b3Config.h"
 #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"

@@ -458,6 +457,9 @@ int		b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po

 void	b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults)
 {
-	this->m_data->m_raycaster->castRays(rays,hitResults,getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu());
+	this->m_data->m_raycaster->castRays(rays,hitResults,
+		getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),
+		m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),	m_data->m_narrowphase->getInternalData()
+		);
 }

--- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp
+++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp
@@ -37,7 +37,6 @@ bool useNewBatchingKernel = true;
 #include "kernels/batchingKernelsNew.h"


-#include "Bullet3Common/b3Quickprof.h"
 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
 #include "Bullet3Common/b3Vector3.h"