added pgs/jacobi cpu solver making the gpu rigid body pipeline work again (aside from running the solver on CPU)

2013-03-15 18:44:55 -07:00
parent d49e9fd44d
commit d91d18b5f5
24 changed files with 5154 additions and 95 deletions
--- a/opencl/gpu_rigidbody/kernels/integrateKernel.cl
+++ b/opencl/gpu_rigidbody/kernels/integrateKernel.cl
@@ -46,9 +46,9 @@ __kernel void
 {
 	int nodeID = get_global_id(0);
 	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
-	if( nodeID < numNodes )
+	if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))
 	{
-		if (1)
+		//angular velocity
 		{
 			float4 axis;
 			//add some hardcoded angular damping
@@ -82,8 +82,12 @@ __kernel void
 			bodies[nodeID].m_quat=predictedOrn;
 		}

-	//linear velocity		
+		//linear velocity		
 		bodies[nodeID].m_pos +=  bodies[nodeID].m_linVel * timeStep;
 		
+		//apply gravity
+		float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);
+		bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
+		
 	}
 }
--- a/opencl/gpu_rigidbody/kernels/integrateKernel.h
+++ b/opencl/gpu_rigidbody/kernels/integrateKernel.h
@@ -48,9 +48,9 @@ static const char* integrateKernelCL= \
 "{\n"
 "	int nodeID = get_global_id(0);\n"
 "	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
-"	if( nodeID < numNodes )\n"
+"	if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))\n"
 "	{\n"
-"		if (1)\n"
+"		//angular velocity\n"
 "		{\n"
 "			float4 axis;\n"
 "			//add some hardcoded angular damping\n"
@@ -84,9 +84,13 @@ static const char* integrateKernelCL= \
 "			bodies[nodeID].m_quat=predictedOrn;\n"
 "		}\n"
 "\n"
-"	//linear velocity		\n"
+"		//linear velocity		\n"
 "		bodies[nodeID].m_pos +=  bodies[nodeID].m_linVel * timeStep;\n"
 "		\n"
+"		//apply gravity\n"
+"		float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);\n"
+"		bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n"
+"		\n"
 "	}\n"
 "}\n"
 "\n"
--- a/opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl
+++ b/opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl
@@ -0,0 +1,195 @@
+
+#define SHAPE_CONVEX_HULL 3
+
+typedef float4 Quaternion;
+
+__inline
+float4 cross3(float4 a, float4 b)
+{
+	return cross(a,b);
+}
+
+__inline
+float dot3F4(float4 a, float4 b)
+{
+	float4 a1 = (float4)(a.xyz,0.f);
+	float4 b1 = (float4)(b.xyz,0.f);
+	return dot(a1, b1);
+}
+
+
+__inline
+Quaternion qtMul(Quaternion a, Quaternion b)
+{
+	Quaternion ans;
+	ans = cross3( a, b );
+	ans += a.w*b+b.w*a;
+	ans.w = a.w*b.w - dot3F4(a, b);
+	return ans;
+}
+
+__inline
+Quaternion qtInvert(Quaternion q)
+{
+	return (Quaternion)(-q.xyz, q.w);
+}
+
+__inline
+float4 qtRotate(Quaternion q, float4 vec)
+{
+	Quaternion qInv = qtInvert( q );
+	float4 vcpy = vec;
+	vcpy.w = 0.f;
+	float4 out = qtMul(qtMul(q,vcpy),qInv);
+	return out;
+}
+
+__inline
+float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
+{
+	return qtRotate( *orientation, *p ) + (*translation);
+}
+
+typedef struct
+{
+	float4	m_row[3];
+} Matrix3x3;
+
+typedef unsigned int u32;
+
+
+typedef struct
+{
+	float4 m_pos;
+	float4 m_quat;
+	float4 m_linVel;
+	float4 m_angVel;
+
+	u32 m_collidableIdx;
+	float m_invMass;
+	float m_restituitionCoeff;
+	float m_frictionCoeff;
+} Body;
+
+typedef struct Collidable
+{
+	int m_unused1;
+	int m_unused2;
+	int m_shapeType;
+	int m_shapeIndex;
+} Collidable;
+
+
+typedef struct
+{
+	Matrix3x3 m_invInertia;
+	Matrix3x3 m_initInvInertia;
+} Shape;
+
+
+__inline
+Matrix3x3 qtGetRotationMatrix(float4 quat)
+{
+	float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
+	Matrix3x3 out;
+
+	out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);
+	out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);
+	out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);
+	out.m_row[0].w = 0.f;
+
+	out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);
+	out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);
+	out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);
+	out.m_row[1].w = 0.f;
+
+	out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);
+	out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);
+	out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);
+	out.m_row[2].w = 0.f;
+
+	return out;
+}
+
+
+typedef struct 
+{
+	float			fx;
+	float			fy;
+	float			fz;
+	int	uw;
+} btAABBCL;
+
+__inline
+Matrix3x3 mtTranspose(Matrix3x3 m)
+{
+	Matrix3x3 out;
+	out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
+	out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
+	out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
+	return out;
+}
+
+
+
+__inline
+Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
+{
+	Matrix3x3 transB;
+	transB = mtTranspose( b );
+	Matrix3x3 ans;
+	//	why this doesn't run when 0ing in the for{}
+	a.m_row[0].w = 0.f;
+	a.m_row[1].w = 0.f;
+	a.m_row[2].w = 0.f;
+	for(int i=0; i<3; i++)
+	{
+//	a.m_row[i].w = 0.f;
+		ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
+		ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
+		ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
+		ans.m_row[i].w = 0.f;
+	}
+	return ans;
+}
+
+
+__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)
+{
+	int nodeID = get_global_id(0);
+		
+	if( nodeID < numNodes )
+	{
+		float4 position = gBodies[nodeID].m_pos;
+		float4 orientation = gBodies[nodeID].m_quat;
+		
+			
+		int collidableIndex = gBodies[nodeID].m_collidableIdx;
+		int shapeIndex = collidables[collidableIndex].m_shapeIndex;
+			
+		if (shapeIndex>=0)
+		{
+			btAABBCL minAabb = plocalShapeAABB[collidableIndex*2];
+			btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];
+				
+			float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;
+			float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;
+				
+			float4 worldCenter = transform(&localCenter,&position,&orientation);
+				
+			Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
+			float4 extent = (float4) (	dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);
+			
+	
+			pAABB[nodeID*2].fx = worldCenter.x-extent.x;
+			pAABB[nodeID*2].fy = worldCenter.y-extent.y;
+			pAABB[nodeID*2].fz = worldCenter.z-extent.z;
+			pAABB[nodeID*2].uw = nodeID;
+	
+			pAABB[nodeID*2+1].fx = worldCenter.x+extent.x;
+			pAABB[nodeID*2+1].fy = worldCenter.y+extent.y;
+			pAABB[nodeID*2+1].fz = worldCenter.z+extent.z;
+			pAABB[nodeID*2+1].uw = gBodies[nodeID].m_invMass==0.f? 0 : 1;
+		}
+	} 
+}
--- a/opencl/gpu_rigidbody/kernels/updateAabbsKernel.h
+++ b/opencl/gpu_rigidbody/kernels/updateAabbsKernel.h
@@ -0,0 +1,199 @@
+//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
+static const char* updateAabbsKernelCL= \
+"\n"
+"#define SHAPE_CONVEX_HULL 3\n"
+"\n"
+"typedef float4 Quaternion;\n"
+"\n"
+"__inline\n"
+"float4 cross3(float4 a, float4 b)\n"
+"{\n"
+"	return cross(a,b);\n"
+"}\n"
+"\n"
+"__inline\n"
+"float dot3F4(float4 a, float4 b)\n"
+"{\n"
+"	float4 a1 = (float4)(a.xyz,0.f);\n"
+"	float4 b1 = (float4)(b.xyz,0.f);\n"
+"	return dot(a1, b1);\n"
+"}\n"
+"\n"
+"\n"
+"__inline\n"
+"Quaternion qtMul(Quaternion a, Quaternion b)\n"
+"{\n"
+"	Quaternion ans;\n"
+"	ans = cross3( a, b );\n"
+"	ans += a.w*b+b.w*a;\n"
+"	ans.w = a.w*b.w - dot3F4(a, b);\n"
+"	return ans;\n"
+"}\n"
+"\n"
+"__inline\n"
+"Quaternion qtInvert(Quaternion q)\n"
+"{\n"
+"	return (Quaternion)(-q.xyz, q.w);\n"
+"}\n"
+"\n"
+"__inline\n"
+"float4 qtRotate(Quaternion q, float4 vec)\n"
+"{\n"
+"	Quaternion qInv = qtInvert( q );\n"
+"	float4 vcpy = vec;\n"
+"	vcpy.w = 0.f;\n"
+"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
+"	return out;\n"
+"}\n"
+"\n"
+"__inline\n"
+"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
+"{\n"
+"	return qtRotate( *orientation, *p ) + (*translation);\n"
+"}\n"
+"\n"
+"typedef struct\n"
+"{\n"
+"	float4	m_row[3];\n"
+"} Matrix3x3;\n"
+"\n"
+"typedef unsigned int u32;\n"
+"\n"
+"\n"
+"typedef struct\n"
+"{\n"
+"	float4 m_pos;\n"
+"	float4 m_quat;\n"
+"	float4 m_linVel;\n"
+"	float4 m_angVel;\n"
+"\n"
+"	u32 m_collidableIdx;\n"
+"	float m_invMass;\n"
+"	float m_restituitionCoeff;\n"
+"	float m_frictionCoeff;\n"
+"} Body;\n"
+"\n"
+"typedef struct Collidable\n"
+"{\n"
+"	int m_unused1;\n"
+"	int m_unused2;\n"
+"	int m_shapeType;\n"
+"	int m_shapeIndex;\n"
+"} Collidable;\n"
+"\n"
+"\n"
+"typedef struct\n"
+"{\n"
+"	Matrix3x3 m_invInertia;\n"
+"	Matrix3x3 m_initInvInertia;\n"
+"} Shape;\n"
+"\n"
+"\n"
+"__inline\n"
+"Matrix3x3 qtGetRotationMatrix(float4 quat)\n"
+"{\n"
+"	float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
+"	Matrix3x3 out;\n"
+"\n"
+"	out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);\n"
+"	out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);\n"
+"	out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);\n"
+"	out.m_row[0].w = 0.f;\n"
+"\n"
+"	out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);\n"
+"	out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);\n"
+"	out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);\n"
+"	out.m_row[1].w = 0.f;\n"
+"\n"
+"	out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);\n"
+"	out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);\n"
+"	out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);\n"
+"	out.m_row[2].w = 0.f;\n"
+"\n"
+"	return out;\n"
+"}\n"
+"\n"
+"\n"
+"typedef struct \n"
+"{\n"
+"	float			fx;\n"
+"	float			fy;\n"
+"	float			fz;\n"
+"	int	uw;\n"
+"} btAABBCL;\n"
+"\n"
+"__inline\n"
+"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
+"{\n"
+"	Matrix3x3 out;\n"
+"	out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
+"	out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
+"	out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
+"	return out;\n"
+"}\n"
+"\n"
+"\n"
+"\n"
+"__inline\n"
+"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
+"{\n"
+"	Matrix3x3 transB;\n"
+"	transB = mtTranspose( b );\n"
+"	Matrix3x3 ans;\n"
+"	//	why this doesn't run when 0ing in the for{}\n"
+"	a.m_row[0].w = 0.f;\n"
+"	a.m_row[1].w = 0.f;\n"
+"	a.m_row[2].w = 0.f;\n"
+"	for(int i=0; i<3; i++)\n"
+"	{\n"
+"//	a.m_row[i].w = 0.f;\n"
+"		ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n"
+"		ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n"
+"		ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n"
+"		ans.m_row[i].w = 0.f;\n"
+"	}\n"
+"	return ans;\n"
+"}\n"
+"\n"
+"\n"
+"__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n"
+"{\n"
+"	int nodeID = get_global_id(0);\n"
+"		\n"
+"	if( nodeID < numNodes )\n"
+"	{\n"
+"		float4 position = gBodies[nodeID].m_pos;\n"
+"		float4 orientation = gBodies[nodeID].m_quat;\n"
+"		\n"
+"			\n"
+"		int collidableIndex = gBodies[nodeID].m_collidableIdx;\n"
+"		int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n"
+"			\n"
+"		if (shapeIndex>=0)\n"
+"		{\n"
+"			btAABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n"
+"			btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n"
+"				\n"
+"			float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;\n"
+"			float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;\n"
+"				\n"
+"			float4 worldCenter = transform(&localCenter,&position,&orientation);\n"
+"				\n"
+"			Matrix3x3 abs_b = qtGetRotationMatrix(orientation);\n"
+"			float4 extent = (float4) (	dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);\n"
+"			\n"
+"	\n"
+"			pAABB[nodeID*2].fx = worldCenter.x-extent.x;\n"
+"			pAABB[nodeID*2].fy = worldCenter.y-extent.y;\n"
+"			pAABB[nodeID*2].fz = worldCenter.z-extent.z;\n"
+"			pAABB[nodeID*2].uw = nodeID;\n"
+"	\n"
+"			pAABB[nodeID*2+1].fx = worldCenter.x+extent.x;\n"
+"			pAABB[nodeID*2+1].fy = worldCenter.y+extent.y;\n"
+"			pAABB[nodeID*2+1].fz = worldCenter.z+extent.z;\n"
+"			pAABB[nodeID*2+1].uw = gBodies[nodeID].m_invMass==0.f? 0 : 1;\n"
+"		}\n"
+"	} \n"
+"}\n"
+"\n"
+;