Apply a patch that updates vertex position on GPU for the OpenCL version, by Dongsoo Han (saggita), work-in-progress

Removed the unused OpenCL kernels
Add example how to cache binary kernels, see SoftDemo compiled with OpenCL AMD using msvc/vs2008_opencl.bat
This commit is contained in:
erwin.coumans
2011-12-20 18:03:24 +00:00
parent 5cb40ed6e3
commit 1f7a67b7da
31 changed files with 441 additions and 1386 deletions

View File

@@ -55,7 +55,7 @@ public:
virtual bool onAccelerator(); virtual bool onAccelerator();
virtual bool moveToAccelerator(); virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(); virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
}; };

View File

@@ -308,18 +308,42 @@ bool btSoftBodyVertexDataDX11::moveToAccelerator()
return success; return success;
} }
bool btSoftBodyVertexDataDX11::moveFromAccelerator() bool btSoftBodyVertexDataDX11::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
{ {
bool success = true; bool success = true;
success = success && m_dx11ClothIdentifier.moveFromGPU();
success = success && m_dx11VertexPosition.moveFromGPU(); if (!bCopy)
success = success && m_dx11VertexPreviousPosition.moveFromGPU(); {
success = success && m_dx11VertexVelocity.moveFromGPU(); success = success && m_dx11ClothIdentifier.moveFromGPU();
success = success && m_dx11VertexForceAccumulator.moveFromGPU(); success = success && m_dx11VertexPosition.moveFromGPU();
success = success && m_dx11VertexNormal.moveFromGPU(); success = success && m_dx11VertexPreviousPosition.moveFromGPU();
success = success && m_dx11VertexInverseMass.moveFromGPU(); success = success && m_dx11VertexVelocity.moveFromGPU();
success = success && m_dx11VertexArea.moveFromGPU(); success = success && m_dx11VertexForceAccumulator.moveFromGPU();
success = success && m_dx11VertexTriangleCount.moveFromGPU(); success = success && m_dx11VertexNormal.moveFromGPU();
success = success && m_dx11VertexInverseMass.moveFromGPU();
success = success && m_dx11VertexArea.moveFromGPU();
success = success && m_dx11VertexTriangleCount.moveFromGPU();
}
else
{
if (bCopyMinimum)
{
success = success && m_dx11VertexPosition.copyFromGPU();
success = success && m_dx11VertexNormal.copyFromGPU();
}
else
{
success = success && m_dx11ClothIdentifier.copyFromGPU();
success = success && m_dx11VertexPosition.copyFromGPU();
success = success && m_dx11VertexPreviousPosition.copyFromGPU();
success = success && m_dx11VertexVelocity.copyFromGPU();
success = success && m_dx11VertexForceAccumulator.copyFromGPU();
success = success && m_dx11VertexNormal.copyFromGPU();
success = success && m_dx11VertexInverseMass.copyFromGPU();
success = success && m_dx11VertexArea.copyFromGPU();
success = success && m_dx11VertexTriangleCount.copyFromGPU();
}
}
if( success ) if( success )
m_onGPU = true; m_onGPU = true;
@@ -619,10 +643,10 @@ void btDX11SoftBodySolver::releaseKernels()
} }
void btDX11SoftBodySolver::copyBackToSoftBodies() void btDX11SoftBodySolver::copyBackToSoftBodies(bool bMove)
{ {
// Move the vertex data back to the host first // Move the vertex data back to the host first
m_vertexData.moveFromAccelerator(); m_vertexData.moveFromAccelerator(!bMove);
// Loop over soft bodies, copying all the vertex positions back for each body in turn // Loop over soft bodies, copying all the vertex positions back for each body in turn
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )

View File

@@ -607,7 +607,7 @@ public:
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false); virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(); virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt ); virtual void solveConstraints( float solverdt );

View File

@@ -34,7 +34,6 @@ SET(BulletSoftBodyOpenCLSolvers_HDRS
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
${BulletSoftBodyOpenCLSolvers_SRCS} ${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS} ${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
) )
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION}) SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})

View File

@@ -45,7 +45,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders
) )
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f) endforeach(f)

View File

@@ -25,6 +25,7 @@ subject to the following restrictions:
#include "../OpenCLC10/UpdatePositions.cl" #include "../OpenCLC10/UpdatePositions.cl"
#include "../OpenCLC10/UpdatePositionsFromVelocities.cl" #include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
#include "../OpenCLC10/VSolveLinks.cl" #include "../OpenCLC10/VSolveLinks.cl"
#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" //#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
@@ -39,7 +40,7 @@ MINICL_REGISTER(ApplyForcesKernel)
MINICL_REGISTER(ResetNormalsAndAreasKernel) MINICL_REGISTER(ResetNormalsAndAreasKernel)
MINICL_REGISTER(NormalizeNormalsAndAreasKernel) MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
MINICL_REGISTER(UpdateSoftBodiesKernel) MINICL_REGISTER(UpdateSoftBodiesKernel)
MINICL_REGISTER(UpdateFixedVertexPositions)
float mydot3a(float4 a, float4 b) float mydot3a(float4 a, float4 b)
{ {

View File

@@ -44,7 +44,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders
) )
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f) endforeach(f)

View File

@@ -1,91 +0,0 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
float3 projectOnAxis( float3 v, float3 a )
{
return (a*dot(v, a));
}
__kernel void
ApplyForcesKernel(
const uint numNodes,
const float solverdt,
const float epsilon,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexNormal,
__global float * g_vertexArea,
__global float * g_vertexInverseMass,
__global float * g_clothLiftFactor,
__global float * g_clothDragFactor,
__global float4 * g_clothWindVelocity,
__global float4 * g_clothAcceleration,
__global float * g_clothMediumDensity,
__global float4 * g_vertexForceAccumulator,
__global float4 * g_vertexVelocity)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float3 nodeV = g_vertexVelocity[nodeID].xyz;
float3 normal = g_vertexNormal[nodeID].xyz;
float area = g_vertexArea[nodeID];
float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
// Read per-cloth values
float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = (float4)(nodeV, 0.f);
float3 relativeWindVelocity = nodeV - clothWindVelocity;
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
if( relativeSpeedSquared > epsilon )
{
// Correct direction of normal relative to wind direction and get dot product
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
float dvNormal = dot(normal, relativeWindVelocity);
if( dvNormal > 0 )
{
float3 force = (float3)(0.f, 0.f, 0.f);
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
float c1 = c0 * mediumDensity;
force += normal * (-c1 * liftFactor);
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
float dtim = solverdt * nodeIM;
float3 forceDTIM = force * dtim;
float3 nodeFPlusForce = nodeF + force;
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
nodeF = nodeFPlusForce;
if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
nodeF = nodeFMinus;
g_vertexForceAccumulator[nodeID] = (float4)(nodeF, 0.0f);
}
}
}
}
}
);

View File

@@ -1,80 +0,0 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_amd_printf : enable \n
__kernel void
ComputeBoundsKernel(
int numNodes,
int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_GLOBAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)];
if( clothIdentifier >= 0 )
{
float3 position = g_vertexPositions[get_global_id(0)].xyz;
/* Reinterpret position as uint */
uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z));
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
/* Min/max with the LDS values */
atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data*/
if( get_local_id(0) < numSoftBodies )
{
/*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);

View File

@@ -1,35 +0,0 @@
MSTRINGIFY(
// Node indices for each link
//#define float3 float4
__kernel void
IntegrateKernel(
const int numNodes,
const float solverdt,
__global float * g_vertexInverseMasses,
__global float4 * g_vertexPositions,
__global float4 * g_vertexVelocity,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexForceAccumulator)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 velocity = g_vertexVelocity[nodeID].xyz;
float3 force = g_vertexForceAccumulator[nodeID].xyz;
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = (float4)(position, 0.f);
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = (float4)(position, 0.f);
g_vertexVelocity[nodeID] = (float4)(velocity, 0.f);
}
}
);

View File

@@ -1,46 +0,0 @@
MSTRINGIFY(
cbuffer OutputToVertexArrayCB : register( b0 )
{
int startNode;
int numNodes;
int offsetX;
int strideX;
int offsetN;
int strideN;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_nodesx : register( t0 );
StructuredBuffer<float4> g_nodesn : register( t1 );
RWStructuredBuffer<float> g_vertexBuffer : register( u0 );
[numthreads(128, 1, 1)]
void
OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 nodeX = g_nodesx[nodeID + startNode];
float4 nodeN = g_nodesn[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * strideX + offsetX;
g_vertexBuffer[positionDestination] = nodeX.x;
g_vertexBuffer[positionDestination+1] = nodeX.y;
g_vertexBuffer[positionDestination+2] = nodeX.z;
int normalDestination = nodeID * strideN + offsetN;
g_vertexBuffer[normalDestination] = nodeN.x;
g_vertexBuffer[normalDestination+1] = nodeN.y;
g_vertexBuffer[normalDestination+2] = nodeN.z;
}
}
);

View File

@@ -1,34 +0,0 @@
MSTRINGIFY(
__kernel void
PrepareLinksKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float4 * g_nodesPreviousPosition,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1.0f/linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View File

@@ -1,195 +0,0 @@
MSTRINGIFY(
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
/* From btBroadphaseProxy.h */
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
/* Multiply column-major matrix against vector */
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions)
{
int nodeID = get_global_id(0);
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
{
velocity = (float3)(15, 0, 0);
/* We have some possible collisions to deal with */
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
/* Colliding with a capsule */
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
/* Four columns of worldTransform matrix */
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
/* compute distance of tangent to vertex along line segment in capsule */
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
/* Final distance from collision, point to push from, direction to push in
for impulse force */
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
float minDistance = capsuleRadius + capsuleMargin;
/* In case of no collision, this is the value of velocity */
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
/* Project back to surface along normal */
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
/* Full friction is sum of velocities in each direction of plane */
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
/* Real friction is peak friction corrected by friction coefficients */
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
} else {
/* Update velocity */
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
/* Update external force */
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
}
);

View File

@@ -1,213 +0,0 @@
MSTRINGIFY(
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; /* column major 4x4 matrix */
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
/* From btBroadphaseProxy.h */
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
/* Multiply column-major matrix against vector */
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions,
__local CollisionShapeDescription *localCollisionShapes)
{
int nodeID = get_global_id(0);
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
/* We have some possible collisions to deal with */
/* First load all of the collision objects into LDS */
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( get_local_id(0) < numObjects )
{
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
}
}
/* Safe as the vertices are padded so that not more than one soft body is in a group */
barrier(CLK_LOCAL_MEM_FENCE);
/* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */
if( numObjects > 0 )
{
velocity = (float3)(0, 0, 0);
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
/* Colliding with a capsule */
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
int capsuleupAxis = localCollisionShapes[collision].upAxis;
float4 worldTransform[4];
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
/* compute distance of tangent to vertex along line segment in capsule */
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
/* Final distance from collision, point to push from, direction to push in
for impulse force */
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
float minDistance = capsuleRadius + capsuleMargin;
/* In case of no collision, this is the value of velocity */
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
/* Check for a collision */
if( dist < minDistance )
{
/* Project back to surface along normal */
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
/* Full friction is sum of velocities in each direction of plane */
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
/* Real friction is peak friction corrected by friction coefficients */
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
} else {
/* Update velocity */
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
/* Update external force */
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
);

View File

@@ -1,55 +0,0 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
__kernel void
SolvePositionsFromLinksKernel(
const int startLink,
const int numLinks,
const float kst,
const float ti,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = (float4)(position0, 0.f);
g_vertexPositions[node1] = (float4)(position1, 0.f);
}
}
}
);

View File

@@ -1,129 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
SolvePositionsFromLinksKernel(
const int startWaveInBatch,
const int numWaves,
const float kst,
const float ti,
__global int2 *g_wavefrontBatchCountsVertexCounts,
__global int *g_vertexAddressesPerWavefront,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions,
__local int2 *wavefrontBatchCountsVertexCounts,
__local float4 *vertexPositionSharedData,
__local float *vertexInverseMassSharedData)
{
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
mem_fence(CLK_LOCAL_MEM_FENCE);
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float4 position0 = vertexPositionSharedData[vertexAddress0];
float4 position1 = vertexPositionSharedData[vertexAddress1];
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
mem_fence(CLK_LOCAL_MEM_FENCE);
vertexPositionSharedData[vertexAddress0] = position0;
vertexPositionSharedData[vertexAddress1] = position1;
// Ensure compiler does not re-order memory operations
mem_fence(CLK_LOCAL_MEM_FENCE);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
}
}
}
);

View File

@@ -1,44 +0,0 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
__kernel void
UpdateConstantsKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float4 * g_vertexPositions,
__global float * g_vertexInverseMasses,
__global float * g_linksMaterialLSC,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_linksRestLengths)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View File

@@ -1,40 +0,0 @@
MSTRINGIFY(
//#define float3 float4
__kernel void
updateVelocitiesFromPositionsWithVelocitiesKernel(
int numNodes,
float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float *g_clothVelocityCorrectionCoefficients,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View File

@@ -1,103 +0,0 @@
MSTRINGIFY(
//#define float3 float4
/*float length3(float3 a)
{
a.w = 0;
return length(a);
}
float normalize3(float3 a)
{
a.w = 0;
return normalize(a);
}*/
__kernel void
ResetNormalsAndAreasKernel(
const unsigned int numNodes,
__global float4 * g_vertexNormals,
__global float * g_vertexArea)
{
if( get_global_id(0) < numNodes )
{
g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[get_global_id(0)] = 0.0f;
}
}
__kernel void
UpdateSoftBodiesKernel(
const unsigned int startFace,
const unsigned int numFaces,
__global int4 * g_triangleVertexIndexSet,
__global float4 * g_vertexPositions,
__global float4 * g_vertexNormals,
__global float * g_vertexArea,
__global float4 * g_triangleNormals,
__global float * g_triangleArea)
{
int faceID = get_global_id(0) + startFace;
if( get_global_id(0) < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float3 node0 = g_vertexPositions[nodeIndex0].xyz;
float3 node1 = g_vertexPositions[nodeIndex1].xyz;
float3 node2 = g_vertexPositions[nodeIndex2].xyz;
float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float3 vector0 = node1 - node0;
float3 vector1 = node2 - node0;
float3 faceNormal = cross(vector0.xyz, vector1.xyz);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = (float4)(normalize(faceNormal), 0.f);
g_vertexNormals[nodeIndex0] = (float4)(nodeNormal0, 0.f);
g_vertexNormals[nodeIndex1] = (float4)(nodeNormal1, 0.f);
g_vertexNormals[nodeIndex2] = (float4)(nodeNormal2, 0.f);
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
__kernel void
NormalizeNormalsAndAreasKernel(
const unsigned int numNodes,
__global int * g_vertexTriangleCount,
__global float4 * g_vertexNormals,
__global float * g_vertexArea)
{
if( get_global_id(0) < numNodes )
{
float4 normal = g_vertexNormals[get_global_id(0)];
float area = g_vertexArea[get_global_id(0)];
int numTriangles = g_vertexTriangleCount[get_global_id(0)];
float vectorLength = length(normal);
g_vertexNormals[get_global_id(0)] = normalize(normal);
g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
}
}
);

View File

@@ -1,36 +0,0 @@
MSTRINGIFY(
//#define float3 float4
__kernel void
updateVelocitiesFromPositionsWithoutVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View File

@@ -1,26 +0,0 @@
MSTRINGIFY(
//#define float3 float4
__kernel void
UpdatePositionsFromVelocitiesKernel(
const int numNodes,
const float solverSDT,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexCurrentPosition)
{
int vertexID = get_global_id(0);
if( vertexID < numNodes )
{
float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
float3 velocity = g_vertexVelocities[vertexID].xyz;
float3 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = (float4)(newPosition, 0.f);
g_vertexPreviousPositions[vertexID] = (float4)(newPosition, 0.f);
}
}
);

View File

@@ -1,45 +0,0 @@
MSTRINGIFY(
__kernel void
VSolveLinksKernel(
int startLink,
int numLinks,
float kst,
__global int2 * g_linksVertexIndices,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength,
__global float * g_vertexInverseMass,
__global float4 * g_vertexVelocity)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
}
}
);

View File

@@ -6,6 +6,18 @@ float adot3(float4 a, float4 b)
return a.x*b.x + a.y*b.y + a.z*b.z; return a.x*b.x + a.y*b.y + a.z*b.z;
} }
float alength3(float4 a)
{
a.w = 0;
return length(a);
}
float4 anormalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
float4 projectOnAxis( float4 v, float4 a ) float4 projectOnAxis( float4 v, float4 a )
{ {
return (a*adot3(v, a)); return (a*adot3(v, a));
@@ -53,36 +65,35 @@ ApplyForcesKernel(
g_vertexVelocity[nodeID] = nodeV; g_vertexVelocity[nodeID] = nodeV;
float4 relativeWindVelocity = nodeV - clothWindVelocity; // Aerodynamics
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity); float4 rel_v = nodeV - clothWindVelocity;
float rel_v_len = alength3(rel_v);
float rel_v2 = dot(rel_v, rel_v);
if( relativeSpeedSquared > epsilon ) if( rel_v2 > epsilon )
{ {
// Correct direction of normal relative to wind direction and get dot product float4 rel_v_nrm = anormalize3(rel_v);
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f); float4 nrm = normal;
float dvNormal = dot(normal, relativeWindVelocity);
if( dvNormal > 0 )
{
float4 force = (float4)(0.f, 0.f, 0.f, 0.f);
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
float c1 = c0 * mediumDensity;
force += normal * (-c1 * liftFactor);
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
float dtim = solverdt * nodeIM; nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
float4 forceDTIM = force * dtim;
float4 nodeFPlusForce = nodeF + force; float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim; float n_dot_v = dot(nrm, rel_v_nrm);
float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
nodeF = nodeFPlusForce; // drag force
//if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) if ( dragFactor > 0.f )
// nodeF = nodeFMinus; fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
// lift force
// Check angle of attack
// cos(10<EFBFBD>) = 0.98480
if ( 0 < n_dot_v && n_dot_v < 0.98480f)
fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
nodeF += fDrag + fLift;
g_vertexForceAccumulator[nodeID] = nodeF; g_vertexForceAccumulator[nodeID] = nodeF;
}
} }
} }
} }

View File

@@ -0,0 +1,25 @@
MSTRINGIFY(
__kernel void
UpdateFixedVertexPositions(
const uint numNodes,
__global int * g_anchorIndex,
__global float4 * g_vertexPositions,
__global float4 * g_anchorPositions GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int anchorIndex = g_anchorIndex[nodeID];
float4 position = g_vertexPositions[nodeID];
if ( anchorIndex >= 0 )
{
float4 anchorPosition = g_anchorPositions[anchorIndex];
g_vertexPositions[nodeID] = anchorPosition;
}
}
}
);

View File

@@ -5,17 +5,10 @@
#include "btSoftBodySolverVertexBuffer_OpenGL.h" #include "btSoftBodySolverVertexBuffer_OpenGL.h"
#include "BulletSoftBody/btSoftBody.h" #include "BulletSoftBody/btSoftBody.h"
#if (0)//CL_VERSION_1_1 == 1)
//OpenCL 1.1 kernels use float3
#define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString =
#include "OpenCLC/OutputToVertexArray.cl"
#else
////OpenCL 1.0 kernels don't use float3 ////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A #define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString = static char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl" #include "OpenCLC10/OutputToVertexArray.cl"
#endif //CL_VERSION_1_1
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }} #define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
@@ -105,8 +98,8 @@ bool btSoftBodySolverOutputCLtoGL::buildShaders()
if( m_shadersInitialized ) if( m_shadersInitialized )
return true; return true;
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ); outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ); outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
if( returnVal ) if( returnVal )

View File

@@ -45,7 +45,7 @@ public:
virtual bool moveToAccelerator(); virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(); virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
}; };

View File

@@ -20,10 +20,13 @@ subject to the following restrictions:
#include "btSoftBodySolver_OpenCL.h" #include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" #include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "BulletSoftBody/btSoftBody.h" #include "BulletSoftBody/btSoftBody.h"
#include "BulletSoftBody/btSoftBodyInternals.h"
#include "BulletCollision/CollisionShapes/btCapsuleShape.h" #include "BulletCollision/CollisionShapes/btCapsuleShape.h"
#include "BulletCollision/COllisionShapes/btSphereShape.h"
#include "LinearMath/btQuickprof.h" #include "LinearMath/btQuickprof.h"
#include <limits.h> #include <limits.h>
#define BT_SUPPRESS_OPENCL_ASSERTS #define BT_SUPPRESS_OPENCL_ASSERTS
#ifdef USE_MINICL #ifdef USE_MINICL
@@ -36,8 +39,7 @@ subject to the following restrictions:
#endif //__APPLE__ #endif //__APPLE__
#endif//USE_MINICL #endif//USE_MINICL
#define BT_DEFAULT_WORKGROUPSIZE 128 #define BT_DEFAULT_WORKGROUPSIZE 64
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }} #define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
@@ -45,32 +47,6 @@ subject to the following restrictions:
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
#if (0)//CL_VERSION_1_1 == 1)
//OpenCL 1.1 kernels use float3
#define MSTRINGIFY(A) #A
static const char* PrepareLinksCLString =
#include "OpenCLC/PrepareLinks.cl"
static const char* UpdatePositionsFromVelocitiesCLString =
#include "OpenCLC/UpdatePositionsFromVelocities.cl"
static const char* SolvePositionsCLString =
#include "OpenCLC/SolvePositions.cl"
static const char* UpdateNodesCLString =
#include "OpenCLC/UpdateNodes.cl"
static const char* UpdatePositionsCLString =
#include "OpenCLC/UpdatePositions.cl"
static const char* UpdateConstantsCLString =
#include "OpenCLC/UpdateConstants.cl"
static const char* IntegrateCLString =
#include "OpenCLC/Integrate.cl"
static const char* ApplyForcesCLString =
#include "OpenCLC/ApplyForces.cl"
static const char* UpdateNormalsCLString =
#include "OpenCLC/UpdateNormals.cl"
static const char* VSolveLinksCLString =
#include "OpenCLC/VSolveLinks.cl"
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC/SolveCollisionsAndUpdateVelocities.cl"
#else
////OpenCL 1.0 kernels don't use float3 ////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A #define MSTRINGIFY(A) #A
static const char* PrepareLinksCLString = static const char* PrepareLinksCLString =
@@ -89,13 +65,14 @@ static const char* IntegrateCLString =
#include "OpenCLC10/Integrate.cl" #include "OpenCLC10/Integrate.cl"
static const char* ApplyForcesCLString = static const char* ApplyForcesCLString =
#include "OpenCLC10/ApplyForces.cl" #include "OpenCLC10/ApplyForces.cl"
static const char* UpdateFixedVertexPositionsCLString =
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
static const char* UpdateNormalsCLString = static const char* UpdateNormalsCLString =
#include "OpenCLC10/UpdateNormals.cl" #include "OpenCLC10/UpdateNormals.cl"
static const char* VSolveLinksCLString = static const char* VSolveLinksCLString =
#include "OpenCLC10/VSolveLinks.cl" #include "OpenCLC10/VSolveLinks.cl"
static const char* SolveCollisionsAndUpdateVelocitiesCLString = static const char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" #include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
#endif //CL_VERSION_1_1
btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) : btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) :
@@ -141,18 +118,42 @@ bool btSoftBodyVertexDataOpenCL::moveToAccelerator()
return success; return success;
} }
bool btSoftBodyVertexDataOpenCL::moveFromAccelerator() bool btSoftBodyVertexDataOpenCL::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
{ {
bool success = true; bool success = true;
success = success && m_clClothIdentifier.moveFromGPU();
success = success && m_clVertexPosition.moveFromGPU(); if (!bCopy)
success = success && m_clVertexPreviousPosition.moveFromGPU(); {
success = success && m_clVertexVelocity.moveFromGPU(); success = success && m_clClothIdentifier.moveFromGPU();
success = success && m_clVertexForceAccumulator.moveFromGPU(); success = success && m_clVertexPosition.moveFromGPU();
success = success && m_clVertexNormal.moveFromGPU(); success = success && m_clVertexPreviousPosition.moveFromGPU();
success = success && m_clVertexInverseMass.moveFromGPU(); success = success && m_clVertexVelocity.moveFromGPU();
success = success && m_clVertexArea.moveFromGPU(); success = success && m_clVertexForceAccumulator.moveFromGPU();
success = success && m_clVertexTriangleCount.moveFromGPU(); success = success && m_clVertexNormal.moveFromGPU();
success = success && m_clVertexInverseMass.moveFromGPU();
success = success && m_clVertexArea.moveFromGPU();
success = success && m_clVertexTriangleCount.moveFromGPU();
}
else
{
if (bCopyMinimum)
{
success = success && m_clVertexPosition.copyFromGPU();
success = success && m_clVertexNormal.copyFromGPU();
}
else
{
success = success && m_clClothIdentifier.copyFromGPU();
success = success && m_clVertexPosition.copyFromGPU();
success = success && m_clVertexPreviousPosition.copyFromGPU();
success = success && m_clVertexVelocity.copyFromGPU();
success = success && m_clVertexForceAccumulator.copyFromGPU();
success = success && m_clVertexNormal.copyFromGPU();
success = success && m_clVertexInverseMass.copyFromGPU();
success = success && m_clVertexArea.copyFromGPU();
success = success && m_clVertexTriangleCount.copyFromGPU();
}
}
if( success ) if( success )
m_onGPU = true; m_onGPU = true;
@@ -160,9 +161,6 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator()
return success; return success;
} }
btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx) btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx)
:m_cqCommandQue(queue), :m_cqCommandQue(queue),
m_clLinks( queue, ctx, &m_links, false ), m_clLinks( queue, ctx, &m_links, false ),
@@ -602,11 +600,12 @@ void btSoftBodyTriangleDataOpenCL::generateBatches()
btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) : btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
m_linkData(queue, ctx), m_linkData(queue, ctx),
m_vertexData(queue, ctx), m_vertexData(queue, ctx),
m_triangleData(queue, ctx), m_triangleData(queue, ctx),
m_clFunctions(queue, ctx), m_defaultCLFunctions(queue, ctx),
m_currentCLFunctions(&m_defaultCLFunctions),
m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ), m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ), m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ), m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
@@ -617,10 +616,14 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ), m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ),
m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ), m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ),
m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ), m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ),
m_clAnchorPosition( queue, ctx, &m_anchorPosition, true ),
m_clAnchorIndex( queue, ctx, &m_anchorIndex, true),
m_cqCommandQue( queue ), m_cqCommandQue( queue ),
m_cxMainContext(ctx), m_cxMainContext(ctx),
m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE) m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE),
m_bUpdateAnchoredNodePos(bUpdateAchchoredNodePos)
{ {
// Initial we will clearly need to update solver constants // Initial we will clearly need to update solver constants
// For now this is global for the cloths linked with this solver - we should probably make this body specific // For now this is global for the cloths linked with this solver - we should probably make this body specific
// for performance in future once we understand more clearly when constants need to be updated // for performance in future once we understand more clearly when constants need to be updated
@@ -643,6 +646,7 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
m_normalizeNormalsAndAreasKernel = 0; m_normalizeNormalsAndAreasKernel = 0;
m_outputToVertexArrayKernel = 0; m_outputToVertexArrayKernel = 0;
m_applyForcesKernel = 0; m_applyForcesKernel = 0;
m_updateFixedVertexPositionsKernel = 0;
} }
btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver() btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver()
@@ -666,14 +670,16 @@ void btOpenCLSoftBodySolver::releaseKernels()
RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel ); RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel );
RELEASE_CL_KERNEL( m_outputToVertexArrayKernel ); RELEASE_CL_KERNEL( m_outputToVertexArrayKernel );
RELEASE_CL_KERNEL( m_applyForcesKernel ); RELEASE_CL_KERNEL( m_applyForcesKernel );
RELEASE_CL_KERNEL( m_updateFixedVertexPositionsKernel );
m_shadersInitialized = false; m_shadersInitialized = false;
} }
void btOpenCLSoftBodySolver::copyBackToSoftBodies() void btOpenCLSoftBodySolver::copyBackToSoftBodies(bool bMove)
{ {
// Move the vertex data back to the host first // Move the vertex data back to the host first
m_vertexData.moveFromAccelerator(); m_vertexData.moveFromAccelerator(!bMove);
// Loop over soft bodies, copying all the vertex positions back for each body in turn // Loop over soft bodies, copying all the vertex positions back for each body in turn
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
@@ -688,15 +694,16 @@ void btOpenCLSoftBodySolver::copyBackToSoftBodies()
for( int vertex = 0; vertex < numVertices; ++vertex ) for( int vertex = 0; vertex < numVertices; ++vertex )
{ {
using Vectormath::Aos::Point3; using Vectormath::Aos::Point3;
Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] ); Point3 vertexPosition( m_vertexData.getVertexPositions()[firstVertex + vertex] );
Point3 normal(m_vertexData.getNormal(firstVertex + vertex));
softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() ); softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() ); softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() ); softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() ); softBody->m_nodes[vertex].m_n.setX( normal.getX() );
softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() ); softBody->m_nodes[vertex].m_n.setY( normal.getY() );
softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() ); softBody->m_nodes[vertex].m_n.setZ( normal.getZ() );
} }
} }
} // btOpenCLSoftBodySolver::copyBackToSoftBodies } // btOpenCLSoftBodySolver::copyBackToSoftBodies
@@ -710,7 +717,10 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
getTriangleData().clear(); getTriangleData().clear();
getLinkData().clear(); getLinkData().clear();
m_softBodySet.resize(0); m_softBodySet.resize(0);
m_anchorIndex.clear();
int maxPiterations = 0;
int maxViterations = 0;
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
{ {
@@ -759,6 +769,8 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
float vertexInverseMass = softBody->m_nodes[vertex].m_im; float vertexInverseMass = softBody->m_nodes[vertex].m_im;
desc.setInverseMass(vertexInverseMass); desc.setInverseMass(vertexInverseMass);
getVertexData().setVertexAt( desc, firstVertex + vertex ); getVertexData().setVertexAt( desc, firstVertex + vertex );
m_anchorIndex.push_back(-1.0);
} }
// Copy triangles similarly // Copy triangles similarly
@@ -805,13 +817,76 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
newSoftBody->setMaxTriangles( maxTriangles ); newSoftBody->setMaxTriangles( maxTriangles );
newSoftBody->setFirstLink( firstLink ); newSoftBody->setFirstLink( firstLink );
newSoftBody->setNumLinks( numLinks ); newSoftBody->setNumLinks( numLinks );
// Find maximum piterations and viterations
int piterations = softBody->m_cfg.piterations;
if ( piterations > maxPiterations )
maxPiterations = piterations;
int viterations = softBody->m_cfg.viterations;
if ( viterations > maxViterations )
maxViterations = viterations;
// zero mass
for( int vertex = 0; vertex < numVertices; ++vertex )
{
if ( softBody->m_nodes[vertex].m_im == 0 )
{
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + vertex;
nodeInfo.pNode = &softBody->m_nodes[vertex];
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
// anchor position
if ( numVertices > 0 )
{
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
{
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
nodeInfo.pNode = anchorNode;
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
} }
m_anchorPosition.clear();
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
{
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
}
updateConstants(0.f); updateConstants(0.f);
// set position and velocity iterations
setNumberOfPositionIterations(maxPiterations);
setNumberOfVelocityIterations(maxViterations);
// set wind velocity
m_perClothWindVelocity.resize( m_softBodySet.size() );
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
}
m_clPerClothWindVelocity.changedOnCPU();
// generate batches
m_linkData.generateBatches(); m_linkData.generateBatches();
m_triangleData.generateBatches(); m_triangleData.generateBatches();
@@ -861,7 +936,6 @@ void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
{ {
cl_int ciErrNum; cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices); ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices);
@@ -882,7 +956,6 @@ void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
{ {
cl_int ciErrNum; cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle); ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle);
ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles); ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles);
@@ -948,9 +1021,34 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath
} }
} }
void btOpenCLSoftBodySolver::updateFixedVertexPositions()
{
// Ensure data is on accelerator
m_vertexData.moveToAccelerator();
m_clAnchorPosition.moveToGPU();
m_clAnchorIndex.moveToGPU();
cl_int ciErrNum ;
int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,1, sizeof(cl_mem), &m_clAnchorIndex.m_buffer);
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,3, sizeof(cl_mem), &m_clAnchorPosition.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
if (numWorkItems)
{
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateFixedVertexPositionsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(m_updateFixedVertexPositionsKernel)");
}
}
}
void btOpenCLSoftBodySolver::applyForces( float solverdt ) void btOpenCLSoftBodySolver::applyForces( float solverdt )
{ {
// Ensure data is on accelerator // Ensure data is on accelerator
m_vertexData.moveToAccelerator(); m_vertexData.moveToAccelerator();
m_clPerClothAcceleration.moveToGPU(); m_clPerClothAcceleration.moveToGPU();
@@ -976,6 +1074,7 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer); ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer);
ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize); size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
if (numWorkItems) if (numWorkItems)
{ {
@@ -993,8 +1092,6 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
*/ */
void btOpenCLSoftBodySolver::integrate( float solverdt ) void btOpenCLSoftBodySolver::integrate( float solverdt )
{ {
// Ensure data is on accelerator // Ensure data is on accelerator
m_vertexData.moveToAccelerator(); m_vertexData.moveToAccelerator();
@@ -1186,7 +1283,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
} }
// Solve drift // Solve position
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
{ {
for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
@@ -1210,7 +1307,6 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
// Kernel dispatches // Kernel dispatches
void btOpenCLSoftBodySolver::prepareLinks() void btOpenCLSoftBodySolver::prepareLinks()
{ {
cl_int ciErrNum; cl_int ciErrNum;
int numLinks = m_linkData.getNumLinks(); int numLinks = m_linkData.getNumLinks();
ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks); ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks);
@@ -1231,7 +1327,6 @@ void btOpenCLSoftBodySolver::prepareLinks()
void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
{ {
cl_int ciErrNum; cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices(); int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts); ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts);
@@ -1251,7 +1346,6 @@ void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti ) void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
{ {
cl_int ciErrNum; cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink); ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks); ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks);
@@ -1275,7 +1369,6 @@ void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks,
void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst ) void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
{ {
cl_int ciErrNum; cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink); ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink);
ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks); ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks);
@@ -1297,7 +1390,6 @@ void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks,
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
{ {
cl_int ciErrNum; cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices(); int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts); ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts);
@@ -1322,7 +1414,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
{ {
cl_int ciErrNum; cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices(); int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts); ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts);
@@ -1347,7 +1438,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( flo
void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt ) void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
{ {
// Copy kernel parameters to GPU // Copy kernel parameters to GPU
m_vertexData.moveToAccelerator(); m_vertexData.moveToAccelerator();
m_clPerClothFriction.moveToGPU(); m_clPerClothFriction.moveToGPU();
@@ -1355,7 +1445,6 @@ void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt
m_clPerClothCollisionObjects.moveToGPU(); m_clPerClothCollisionObjects.moveToGPU();
m_clCollisionObjectDetails.moveToGPU(); m_clCollisionObjectDetails.moveToGPU();
cl_int ciErrNum; cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices(); int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts); ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
@@ -1446,7 +1535,7 @@ void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody
cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ) cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ,const char* orgSrcFileNameForCaching)
{ {
printf("compiling kernelName: %s ",kernelName); printf("compiling kernelName: %s ",kernelName);
cl_kernel kernel=0; cl_kernel kernel=0;
@@ -1554,20 +1643,25 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep )
m_clPerClothCollisionObjects.changedOnCPU(); m_clPerClothCollisionObjects.changedOnCPU();
m_collisionObjectDetails.clear(); m_collisionObjectDetails.clear();
if ( m_bUpdateAnchoredNodePos )
{ {
BT_PROFILE("perClothWindVelocity"); // In OpenCL cloth solver, if softbody node has zero inverse mass(infinite mass) or anchor attached,
// Fill the force arrays with current acceleration data etc // we need to update the node position in case the node or anchor is animated externally.
m_perClothWindVelocity.resize( m_softBodySet.size() ); // If there is no such node, we can eliminate the unnecessary CPU-to-GPU data trasferring.
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) for ( int i = 0; i < m_anchorNodeInfoArray.size(); i++ )
{ {
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[i];
btSoftBody::Node* node = anchorNodeInfo.pNode;
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); using Vectormath::Aos::Point3;
Point3 pos((float)node->m_x.getX(), (float)node->m_x.getY(), (float)node->m_x.getZ());
m_anchorPosition[i] = pos;
} }
}
{ if ( m_anchorNodeInfoArray.size() > 0 )
BT_PROFILE("changedOnCPU"); m_clAnchorPosition.changedOnCPU();
m_clPerClothWindVelocity.changedOnCPU();
updateFixedVertexPositions();
} }
{ {
@@ -1601,7 +1695,7 @@ static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound ) void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
{ {
float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin(); float scalarMargin = (float)getSoftBody()->getCollisionShape()->getMargin();
btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin ); btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
m_softBody->m_bounds[0] = lowerBound - vectorMargin; m_softBody->m_bounds[0] = lowerBound - vectorMargin;
m_softBody->m_bounds[1] = upperBound + vectorMargin; m_softBody->m_bounds[1] = upperBound + vectorMargin;
@@ -1641,7 +1735,8 @@ void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollision
newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity()); newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
m_collisionObjectDetails.push_back( newCollisionShapeDescription ); m_collisionObjectDetails.push_back( newCollisionShapeDescription );
} else { }
else {
printf("Unsupported collision shape type\n"); printf("Unsupported collision shape type\n");
//btAssert(0 && "Unsupported collision shape type\n"); //btAssert(0 && "Unsupported collision shape type\n");
} }
@@ -1688,32 +1783,36 @@ bool btOpenCLSoftBodySolver::checkInitialized()
bool btOpenCLSoftBodySolver::buildShaders() bool btOpenCLSoftBodySolver::buildShaders()
{ {
// Ensure current kernels are released first
releaseKernels();
if( m_shadersInitialized ) if( m_shadersInitialized )
return true; return true;
m_clFunctions.clearKernelCompilationFailures(); const char* additionalMacros="";
m_prepareLinksKernel = m_clFunctions.compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel" ); // Ensure current kernels are released first
m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ); releaseKernels();
m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" );
m_vSolveLinksKernel = m_clFunctions.compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ); m_currentCLFunctions->clearKernelCompilationFailures();
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" );
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ); m_prepareLinksKernel = m_currentCLFunctions->compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel",additionalMacros,"OpenCLC10/PrepareLinks.cl" );
m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ); m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ); m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel",additionalMacros,"OpenCLC10/SolvePositions.cl" );
m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ); m_vSolveLinksKernel = m_currentCLFunctions->compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ,additionalMacros,"OpenCLC10/VSolveLinks.cl");
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdateNodes.cl");
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositions.cl");
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ,additionalMacros,"OpenCLC10/SolveCollisionsAndUpdateVelocities.cl");
m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ,additionalMacros,"OpenCLC10/Integrate.cl");
m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ,additionalMacros,"OpenCLC10/ApplyForces.cl");
m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" , additionalMacros, "OpenCLC10/UpdateFixedVertexPositions.cl");
// TODO: Rename to UpdateSoftBodies // TODO: Rename to UpdateSoftBodies
m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ); m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ); m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ); m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
if( m_clFunctions.getKernelCompilationFailures()==0 ) if( m_currentCLFunctions->getKernelCompilationFailures()==0 )
m_shadersInitialized = true; m_shadersInitialized = true;
return m_shadersInitialized; return m_shadersInitialized;
} }

View File

@@ -20,6 +20,7 @@ subject to the following restrictions:
#include "vectormath/vmInclude.h" #include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btSoftBodySolvers.h"
#include "BulletSoftBody/btSoftBody.h"
#include "btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCL.h" #include "btSoftBodySolverLinkData_OpenCL.h"
#include "btSoftBodySolverVertexData_OpenCL.h" #include "btSoftBodySolverVertexData_OpenCL.h"
@@ -33,6 +34,7 @@ protected:
int m_kernelCompilationFailures; int m_kernelCompilationFailures;
public: public:
CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) : CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ), m_cqCommandQue( cqCommandQue ),
@@ -49,7 +51,7 @@ public:
/** /**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object. * Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/ */
cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros = "" ); virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
void clearKernelCompilationFailures() void clearKernelCompilationFailures()
{ {
@@ -285,7 +287,8 @@ public:
protected: protected:
CLFunctions m_clFunctions; CLFunctions m_defaultCLFunctions;
CLFunctions* m_currentCLFunctions;
/** Variable to define whether we need to update solver constants on the next iteration */ /** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants; bool m_updateSolverConstants;
@@ -350,7 +353,20 @@ protected:
btAlignedObjectArray< float > m_perClothFriction; btAlignedObjectArray< float > m_perClothFriction;
btOpenCLBuffer< float > m_clPerClothFriction; btOpenCLBuffer< float > m_clPerClothFriction;
// anchor node info
struct AnchorNodeInfoCL
{
int clVertexIndex;
btSoftBody::Node* pNode;
};
btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clAnchorPosition;
btAlignedObjectArray<int> m_anchorIndex;
btOpenCLBuffer<int> m_clAnchorIndex;
bool m_bUpdateAnchoredNodePos;
cl_kernel m_prepareLinksKernel; cl_kernel m_prepareLinksKernel;
cl_kernel m_solvePositionsFromLinksKernel; cl_kernel m_solvePositionsFromLinksKernel;
@@ -368,6 +384,7 @@ protected:
cl_kernel m_outputToVertexArrayKernel; cl_kernel m_outputToVertexArrayKernel;
cl_kernel m_applyForcesKernel; cl_kernel m_applyForcesKernel;
cl_kernel m_updateFixedVertexPositionsKernel;
cl_command_queue m_cqCommandQue; cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext; cl_context m_cxMainContext;
@@ -394,6 +411,8 @@ protected:
virtual void applyForces( float solverdt ); virtual void applyForces( float solverdt );
void updateFixedVertexPositions();
/** /**
* Integrate motion on the solver. * Integrate motion on the solver.
*/ */
@@ -430,7 +449,7 @@ protected:
void releaseKernels(); void releaseKernels();
public: public:
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx); btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolver(); virtual ~btOpenCLSoftBodySolver();
@@ -456,7 +475,7 @@ public:
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false); virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(); virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt ); virtual void solveConstraints( float solverdt );
@@ -475,6 +494,14 @@ public:
return m_defaultWorkGroupSize; return m_defaultWorkGroupSize;
} }
void setCLFunctions(CLFunctions* funcs)
{
if (funcs)
m_currentCLFunctions = funcs;
else
m_currentCLFunctions = &m_defaultCLFunctions;
}
}; // btOpenCLSoftBodySolver }; // btOpenCLSoftBodySolver

View File

@@ -33,32 +33,6 @@ static const size_t workGroupSize = GROUP_SIZE;
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
#if (0)//CL_VERSION_1_1 == 1)
//OpenCL 1.1 kernels use float3
#define MSTRINGIFY(A) #A
static const char* UpdatePositionsFromVelocitiesCLString =
#include "OpenCLC/UpdatePositionsFromVelocities.cl"
static const char* SolvePositionsCLString =
#include "OpenCLC/SolvePositionsSIMDBatched.cl"
static const char* UpdateNodesCLString =
#include "OpenCLC/UpdateNodes.cl"
static const char* UpdatePositionsCLString =
#include "OpenCLC/UpdatePositions.cl"
static const char* UpdateConstantsCLString =
#include "OpenCLC/UpdateConstants.cl"
static const char* IntegrateCLString =
#include "OpenCLC/Integrate.cl"
static const char* ApplyForcesCLString =
#include "OpenCLC/ApplyForces.cl"
static const char* UpdateNormalsCLString =
#include "OpenCLC/UpdateNormals.cl"
static const char* VSolveLinksCLString =
#include "OpenCLC/VSolveLinks.cl"
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
static const char* OutputToVertexArrayCLString =
#include "OpenCLC/OutputToVertexArray.cl"
#else
////OpenCL 1.0 kernels don't use float3 ////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A #define MSTRINGIFY(A) #A
static const char* UpdatePositionsFromVelocitiesCLString = static const char* UpdatePositionsFromVelocitiesCLString =
@@ -75,6 +49,8 @@ static const char* IntegrateCLString =
#include "OpenCLC10/Integrate.cl" #include "OpenCLC10/Integrate.cl"
static const char* ApplyForcesCLString = static const char* ApplyForcesCLString =
#include "OpenCLC10/ApplyForces.cl" #include "OpenCLC10/ApplyForces.cl"
static const char* UpdateFixedVertexPositionsCLString =
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
static const char* UpdateNormalsCLString = static const char* UpdateNormalsCLString =
#include "OpenCLC10/UpdateNormals.cl" #include "OpenCLC10/UpdateNormals.cl"
static const char* VSolveLinksCLString = static const char* VSolveLinksCLString =
@@ -83,7 +59,6 @@ static const char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl" #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
static const char* OutputToVertexArrayCLString = static const char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl" #include "OpenCLC10/OutputToVertexArray.cl"
#endif //CL_VERSION_1_1
@@ -194,8 +169,8 @@ bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx) : btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
btOpenCLSoftBodySolver( queue, ctx ), btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
m_linkData(queue, ctx) m_linkData(queue, ctx)
{ {
// Initial we will clearly need to update solver constants // Initial we will clearly need to update solver constants
@@ -213,14 +188,17 @@ btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate) void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
{ {
if( forceUpdate|| m_softBodySet.size() != softBodies.size() ) if( forceUpdate || m_softBodySet.size() != softBodies.size() )
{ {
// Have a change in the soft body set so update, reloading all the data // Have a change in the soft body set so update, reloading all the data
getVertexData().clear(); getVertexData().clear();
getTriangleData().clear(); getTriangleData().clear();
getLinkData().clear(); getLinkData().clear();
m_softBodySet.resize(0); m_softBodySet.resize(0);
m_anchorIndex.clear();
int maxPiterations = 0;
int maxViterations = 0;
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
{ {
@@ -238,8 +216,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
m_perClothFriction.push_back( softBody->getFriction() ); m_perClothFriction.push_back( softBody->getFriction() );
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) ); m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
@@ -253,6 +230,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
// Allocate space for new vertices in all the vertex arrays // Allocate space for new vertices in all the vertex arrays
getVertexData().createVertices( numVertices, softBodyIndex, maxVertices ); getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
int firstTriangle = getTriangleData().getNumTriangles(); int firstTriangle = getTriangleData().getNumTriangles();
int numTriangles = softBody->m_faces.size(); int numTriangles = softBody->m_faces.size();
int maxTriangles = numTriangles; int maxTriangles = numTriangles;
@@ -272,6 +250,8 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
float vertexInverseMass = softBody->m_nodes[vertex].m_im; float vertexInverseMass = softBody->m_nodes[vertex].m_im;
desc.setInverseMass(vertexInverseMass); desc.setInverseMass(vertexInverseMass);
getVertexData().setVertexAt( desc, firstVertex + vertex ); getVertexData().setVertexAt( desc, firstVertex + vertex );
m_anchorIndex.push_back(-1.0);
} }
// Copy triangles similarly // Copy triangles similarly
@@ -318,17 +298,78 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
newSoftBody->setMaxTriangles( maxTriangles ); newSoftBody->setMaxTriangles( maxTriangles );
newSoftBody->setFirstLink( firstLink ); newSoftBody->setFirstLink( firstLink );
newSoftBody->setNumLinks( numLinks ); newSoftBody->setNumLinks( numLinks );
// Find maximum piterations and viterations
int piterations = softBody->m_cfg.piterations;
if ( piterations > maxPiterations )
maxPiterations = piterations;
int viterations = softBody->m_cfg.viterations;
if ( viterations > maxViterations )
maxViterations = viterations;
// zero mass
for( int vertex = 0; vertex < numVertices; ++vertex )
{
if ( softBody->m_nodes[vertex].m_im == 0 )
{
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + vertex;
nodeInfo.pNode = &softBody->m_nodes[vertex];
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
// anchor position
if ( numVertices > 0 )
{
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
{
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
nodeInfo.pNode = anchorNode;
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
} }
m_anchorPosition.clear();
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
{
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
}
updateConstants(0.f); updateConstants(0.f);
// set position and velocity iterations
setNumberOfPositionIterations(maxPiterations);
setNumberOfVelocityIterations(maxViterations);
// set wind velocity
m_perClothWindVelocity.resize( m_softBodySet.size() );
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
}
m_clPerClothWindVelocity.changedOnCPU();
// generate batches
m_linkData.generateBatches(); m_linkData.generateBatches();
m_triangleData.generateBatches(); m_triangleData.generateBatches();
// Build the shaders to match the batching parameters // Build the shaders to match the batching parameters
buildShaders(); buildShaders();
} }
@@ -509,7 +550,9 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
if( m_shadersInitialized ) if( m_shadersInitialized )
return true; return true;
m_clFunctions.clearKernelCompilationFailures(); const char* additionalMacros="";
m_currentCLFunctions->clearKernelCompilationFailures();
char *wavefrontMacros = new char[256]; char *wavefrontMacros = new char[256];
@@ -522,22 +565,23 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
WAVEFRONT_BLOCK_MULTIPLIER, WAVEFRONT_BLOCK_MULTIPLIER,
WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize()); WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", "" ); m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ); m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", "" ); m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", "" ); m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" ); m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" ); m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" ); m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
// TODO: Rename to UpdateSoftBodies // TODO: Rename to UpdateSoftBodies
m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" ); m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", "" ); m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", "" ); m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
delete [] wavefrontMacros; delete [] wavefrontMacros;
if( m_clFunctions.getKernelCompilationFailures()==0) if( m_currentCLFunctions->getKernelCompilationFailures()==0)
{ {
m_shadersInitialized = true; m_shadersInitialized = true;
} }

View File

@@ -39,7 +39,7 @@ protected:
bool buildShaders(); virtual bool buildShaders();
void updateConstants( float timeStep ); void updateConstants( float timeStep );
@@ -59,7 +59,7 @@ protected:
///////////////////////////////////// /////////////////////////////////////
public: public:
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx); btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolverSIMDAware(); virtual ~btOpenCLSoftBodySolverSIMDAware();

View File

@@ -561,10 +561,14 @@ public:
} }
/** /**
* Move data from host memory from the accelerator. * Move data to host memory from the accelerator if bCopy is false.
* If bCopy is true, copy data to host memory from the accelerator so that data
* won't be moved to accelerator when moveToAccelerator() is called next time.
* If bCopyMinimum is true, only vertex position and normal are copied.
* bCopyMinimum will be meaningful only if bCopy is true.
* The CPU version will always return that it has moved it. * The CPU version will always return that it has moved it.
*/ */
virtual bool moveFromAccelerator() virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
{ {
return true; return true;
} }