Added GPU SoftBody constraint solvers for DirectX 11 (Direct Compute) and OpenCL, thanks to AMD.

See also http://code.google.com/p/bullet/issues/detail?id=390 Added Demos/DX11ClothDemo (an OpenCL cloth demo will follow soon)
2010-07-20 16:09:53 +00:00
parent 5fd08505ba
commit 11fa2e8b43
99 changed files with 117195 additions and 0 deletions
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
@@ -0,0 +1,95 @@
+MSTRINGIFY(
+
+cbuffer ApplyForcesCB : register( b0 )
+{
+	unsigned int numNodes;
+	float solverdt;
+	float epsilon;
+	int padding3;
+};
+
+
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexNormal : register( t1 );
+StructuredBuffer<float> g_vertexArea : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+// TODO: These could be combined into a lift/drag factor array along with medium density
+StructuredBuffer<float> g_clothLiftFactor : register( t4 );
+StructuredBuffer<float> g_clothDragFactor : register( t5 );
+StructuredBuffer<float4> g_clothWindVelocity : register( t6 );
+StructuredBuffer<float4> g_clothAcceleration : register( t7 );
+StructuredBuffer<float> g_clothMediumDensity : register( t8 );
+
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+
+float3 projectOnAxis( float3 v, float3 a )
+{
+	return (a*dot(v, a));
+}
+
+[numthreads(128, 1, 1)]
+void 
+ApplyForcesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	unsigned int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{		
+		int clothId = g_vertexClothIdentifier[nodeID];
+		float nodeIM = g_vertexInverseMass[nodeID];
+		
+		if( nodeIM > 0.0f )
+		{
+			float3 nodeV = g_vertexVelocity[nodeID].xyz;
+			float3 normal = g_vertexNormal[nodeID].xyz;
+			float area = g_vertexArea[nodeID];
+			float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
+			
+			// Read per-cloth values
+			float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
+			float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
+			float liftFactor = g_clothLiftFactor[clothId];
+			float dragFactor = g_clothDragFactor[clothId];
+			float mediumDensity = g_clothMediumDensity[clothId];
+		
+			// Apply the acceleration to the cloth rather than do this via a force
+			nodeV += (clothAcceleration*solverdt);
+
+			g_vertexVelocity[nodeID] = float4(nodeV, 0.f);
+
+			float3 relativeWindVelocity = nodeV - clothWindVelocity;
+			float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
+			
+			if( relativeSpeedSquared > epsilon )
+			{
+				// Correct direction of normal relative to wind direction and get dot product
+				normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
+				float dvNormal = dot(normal, relativeWindVelocity);
+				if( dvNormal > 0 )
+				{
+					float3 force = float3(0.f, 0.f, 0.f);
+					float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
+					float c1 = c0 * mediumDensity;
+					force += normal * (-c1 * liftFactor);
+					force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
+					
+					float dtim = solverdt * nodeIM;
+					float3 forceDTIM = force * dtim;
+					
+					float3 nodeFPlusForce = nodeF + force;
+					
+					// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;	
+					float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
+					
+					nodeF = nodeFPlusForce;
+					if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
+						nodeF = nodeFMinus;
+									
+					g_vertexForceAccumulator[nodeID] = float4(nodeF, 0.0f);	
+				}
+			}
+		}
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
@@ -0,0 +1,41 @@
+MSTRINGIFY(
+
+cbuffer IntegrateCB : register( b0 )
+{
+	int numNodes;
+	float solverdt;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<float> g_vertexInverseMasses : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u2 );
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u3 );
+
+[numthreads(128, 1, 1)]
+void 
+IntegrateKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocity[nodeID].xyz;
+		float3 force = g_vertexForceAccumulator[nodeID].xyz;
+		float inverseMass = g_vertexInverseMasses[nodeID];
+		
+		g_vertexPreviousPositions[nodeID] = float4(position, 0.f);
+		velocity += force * inverseMass * solverdt;
+		position += velocity * solverdt;
+		
+		g_vertexForceAccumulator[nodeID] = float4(0.f, 0.f, 0.f, 0.0f);
+		g_vertexPositions[nodeID] = float4(position, 0.f);
+		g_vertexVelocity[nodeID] = float4(velocity, 0.f);	
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
@@ -0,0 +1,63 @@
+MSTRINGIFY(
+
+cbuffer OutputToVertexArrayCB : register( b0 )
+{
+	int startNode;
+	int numNodes;
+	int positionOffset;
+	int positionStride;
+	
+	int normalOffset;	
+	int normalStride;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexNormals : register( t1 );
+
+RWBuffer<float> g_vertexBuffer : register( u0 );
+
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;
+		
+		int normalDestination = nodeID * normalStride + normalOffset;
+		g_vertexBuffer[normalDestination] = normal.x;
+		g_vertexBuffer[normalDestination+1] = normal.y;
+		g_vertexBuffer[normalDestination+2] = normal.z;		
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithoutNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;		
+	}
+}
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer PrepareLinksCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float4> g_nodesPreviousPosition : register( t2 );
+
+RWStructuredBuffer<float> g_linksLengthRatio : register( u0 );
+RWStructuredBuffer<float4> g_linksCurrentLength : register( u1 );
+
+[numthreads(128, 1, 1)]
+void 
+PrepareLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+		float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+		float massLSC = g_linksMassLSC[linkID];
+		
+		float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+		
+		float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+		linkLengthRatio = 1./linkLengthRatio;
+		
+		g_linksCurrentLength[linkID] = linkCurrentLength;
+		g_linksLengthRatio[linkID] = linkLengthRatio;		
+	}
+}
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
@@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	float ti;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float> g_linksRestLengthSquared : register( t2 );
+StructuredBuffer<float> g_verticesInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{	
+		float massLSC = g_linksMassLSC[linkID];
+		float restLengthSquared = g_linksRestLengthSquared[linkID];
+		
+		if( massLSC > 0.0f )
+		{		
+			int2 nodeIndices = g_linksVertexIndices[linkID];
+			int node0 = nodeIndices.x;
+			int node1 = nodeIndices.y;
+			
+			float3 position0 = g_vertexPositions[node0].xyz;
+			float3 position1 = g_vertexPositions[node1].xyz;
+
+			float inverseMass0 = g_verticesInverseMass[node0];
+			float inverseMass1 = g_verticesInverseMass[node1]; 
+
+			float3 del = position1 - position0;
+			float len = dot(del, del);
+			float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+
+			g_vertexPositions[node0] = float4(position0, 0.f);
+			g_vertexPositions[node1] = float4(position1, 0.f);
+
+		}
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
@@ -0,0 +1,48 @@
+MSTRINGIFY(
+
+cbuffer UpdateConstantsCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<float> g_vertexInverseMasses : register( t2 );
+StructuredBuffer<float> g_linksMaterialLSC : register( t3 );
+
+RWStructuredBuffer<float> g_linksMassLSC : register( u0 );
+RWStructuredBuffer<float> g_linksRestLengthSquared : register( u1 );
+RWStructuredBuffer<float> g_linksRestLengths : register( u2 );
+
+[numthreads(128, 1, 1)]
+void 
+UpdateConstantsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+		
+		float3 position0 = g_vertexPositions[node0].xyz;
+		float3 position1 = g_vertexPositions[node1].xyz;
+		float inverseMass0 = g_vertexInverseMasses[node0];
+		float inverseMass1 = g_vertexInverseMasses[node1];
+
+		float3 difference = position0 - position1;
+		float length2 = dot(difference, difference);
+		float length = sqrt(length2);
+	
+		g_linksRestLengths[linkID] = length;
+		g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+		g_linksRestLengthSquared[linkID] = length*length;		
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
@@ -0,0 +1,49 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothVelocityCorrectionCoefficients : register( t3 );
+StructuredBuffer<float> g_clothDampingFactor : register( t4 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity += difference*velocityCorrectionCoefficient*isolverdt;
+		
+		// Damp the velocity
+		velocity *= velocityCoefficient;
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
@@ -0,0 +1,98 @@
+MSTRINGIFY(
+
+cbuffer UpdateSoftBodiesCB : register( b0 )
+{
+	unsigned int numNodes;
+	unsigned int startFace;
+	unsigned int numFaces;
+	float epsilon;
+};
+
+
+// Node indices for each link
+StructuredBuffer<int4> g_triangleVertexIndexSet : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<int> g_vertexTriangleCount : register( t2 );
+
+RWStructuredBuffer<float4> g_vertexNormals : register( u0 );
+RWStructuredBuffer<float> g_vertexArea : register( u1 );
+RWStructuredBuffer<float4> g_triangleNormals : register( u2 );
+RWStructuredBuffer<float> g_triangleArea : register( u3 );
+
+
+[numthreads(128, 1, 1)]
+void 
+ResetNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		g_vertexNormals[DTid.x] = float4(0.0f, 0.0f, 0.0f, 0.0f);
+		g_vertexArea[DTid.x] = 0.0f;
+	}
+}
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdateSoftBodiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int faceID = DTid.x + startFace;
+	if( DTid.x < numFaces )
+	{		
+		int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+		int nodeIndex0 = triangleIndexSet.x;
+		int nodeIndex1 = triangleIndexSet.y;
+		int nodeIndex2 = triangleIndexSet.z;
+
+		float3 node0 = g_vertexPositions[nodeIndex0].xyz;
+		float3 node1 = g_vertexPositions[nodeIndex1].xyz;
+		float3 node2 = g_vertexPositions[nodeIndex2].xyz;
+		float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
+		float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
+		float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
+		float vertexArea0 = g_vertexArea[nodeIndex0];
+		float vertexArea1 = g_vertexArea[nodeIndex1];
+		float vertexArea2 = g_vertexArea[nodeIndex2];
+		
+		float3 vector0 = node1 - node0;
+		float3 vector1 = node2 - node0;
+		
+		float3 faceNormal = cross(vector0.xyz, vector1.xyz);
+		float triangleArea = length(faceNormal);
+
+		nodeNormal0 = nodeNormal0 + faceNormal;
+		nodeNormal1 = nodeNormal1 + faceNormal;
+		nodeNormal2 = nodeNormal2 + faceNormal;
+		vertexArea0 = vertexArea0 + triangleArea;
+		vertexArea1 = vertexArea1 + triangleArea;
+		vertexArea2 = vertexArea2 + triangleArea;
+		
+		g_triangleNormals[faceID] = float4(normalize(faceNormal), 0.f);
+		g_vertexNormals[nodeIndex0] = float4(nodeNormal0, 0.f);
+		g_vertexNormals[nodeIndex1] = float4(nodeNormal1, 0.f);
+		g_vertexNormals[nodeIndex2] = float4(nodeNormal2, 0.f);
+		g_triangleArea[faceID] = triangleArea;
+		g_vertexArea[nodeIndex0] = vertexArea0;
+		g_vertexArea[nodeIndex1] = vertexArea1;
+		g_vertexArea[nodeIndex2] = vertexArea2;
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+NormalizeNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		float4 normal = g_vertexNormals[DTid.x];
+		float area = g_vertexArea[DTid.x];
+		int numTriangles = g_vertexTriangleCount[DTid.x];
+		
+		float vectorLength = length(normal);
+		
+		g_vertexNormals[DTid.x] = normalize(normal);
+		g_vertexArea[DTid.x] = area/float(numTriangles);
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithoutVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithoutVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity = difference*velocityCoefficient*isolverdt;		
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
@@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+cbuffer UpdatePositionsFromVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float solverSDT;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexVelocities : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexCurrentPosition : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdatePositionsFromVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int vertexID = DTid.x;
+	if( vertexID < numNodes )
+	{	
+		float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
+		float3 velocity = g_vertexVelocities[vertexID].xyz;
+		
+		float3 newPosition = previousPosition + velocity*solverSDT;
+		
+		g_vertexCurrentPosition[vertexID] = float4(newPosition, 0.f);
+		g_vertexPreviousPositions[vertexID] = float4(newPosition, 0.f);
+	}
+}
+
+);
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
@@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer VSolveLinksCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	int padding;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksLengthRatio : register( t1 );
+StructuredBuffer<float4> g_linksCurrentLength : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocity : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+VSolveLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float linkLengthRatio = g_linksLengthRatio[linkID];
+		float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+		
+		float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+		float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+		float vertexInverseMass0 = g_vertexInverseMass[node0];
+		float vertexInverseMass1 = g_vertexInverseMass[node1]; 
+
+		float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+		float dotResult = dot(linkCurrentLength, nodeDifference);
+		float j = -dotResult*linkLengthRatio*kst;
+		
+		float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+		float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+		
+		vertexVelocity0 += velocityChange0;
+		vertexVelocity1 -= velocityChange1;
+
+		g_vertexVelocity[node0] = float4(vertexVelocity0, 0.f);
+		g_vertexVelocity[node1] = float4(vertexVelocity1, 0.f);
+	}
+}
+
+);