added missing GPU cloth simulation files and DX11/OpenCL kernels
Thanks to Cameron Hart for the report, see Issue 486
This commit is contained in:
@@ -0,0 +1,83 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
cbuffer ComputeBoundsCB : register( b0 )
|
||||||
|
{
|
||||||
|
int numNodes;
|
||||||
|
int numSoftBodies;
|
||||||
|
int padding1;
|
||||||
|
int padding2;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Node indices for each link
|
||||||
|
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
|
||||||
|
StructuredBuffer<float4> g_vertexPositions : register( t1 );
|
||||||
|
|
||||||
|
RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
|
||||||
|
RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
|
||||||
|
|
||||||
|
groupshared uint4 clothMinBounds[256];
|
||||||
|
groupshared uint4 clothMaxBounds[256];
|
||||||
|
|
||||||
|
[numthreads(128, 1, 1)]
|
||||||
|
void
|
||||||
|
ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
|
||||||
|
{
|
||||||
|
const unsigned int UINT_MAX = 0xffffffff;
|
||||||
|
|
||||||
|
// Init min and max bounds arrays
|
||||||
|
if( GTid.x < numSoftBodies )
|
||||||
|
{
|
||||||
|
clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
|
||||||
|
clothMaxBounds[GTid.x] = uint4(0,0,0,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
AllMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
int nodeID = DTid.x;
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
if( clothIdentifier >= 0 )
|
||||||
|
{
|
||||||
|
float3 position = g_vertexPositions[nodeID].xyz;
|
||||||
|
|
||||||
|
// Reinterpret position as uint
|
||||||
|
uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
|
||||||
|
|
||||||
|
// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
|
||||||
|
//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
|
||||||
|
//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
|
||||||
|
//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
|
||||||
|
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
|
||||||
|
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
|
||||||
|
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
|
||||||
|
|
||||||
|
// Min/max with the LDS values
|
||||||
|
InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
|
||||||
|
InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
|
||||||
|
InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
|
||||||
|
|
||||||
|
InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
|
||||||
|
InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
|
||||||
|
InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AllMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
|
||||||
|
// Use global atomics to update the global versions of the data
|
||||||
|
if( GTid.x < numSoftBodies )
|
||||||
|
{
|
||||||
|
InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
|
||||||
|
InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
|
||||||
|
InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
|
||||||
|
|
||||||
|
InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);
|
||||||
|
InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
|
||||||
|
InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,170 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
|
||||||
|
{
|
||||||
|
unsigned int numNodes;
|
||||||
|
float isolverdt;
|
||||||
|
int padding0;
|
||||||
|
int padding1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CollisionObjectIndices
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CollisionShapeDescription
|
||||||
|
{
|
||||||
|
float4x4 shapeTransform;
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
int padding1;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
// From btBroadphaseProxy.h
|
||||||
|
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
// Node indices for each link
|
||||||
|
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
|
||||||
|
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
|
||||||
|
StructuredBuffer<float> g_perClothFriction : register( t2 );
|
||||||
|
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
|
||||||
|
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
|
||||||
|
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
|
||||||
|
|
||||||
|
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
|
||||||
|
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
|
||||||
|
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
|
||||||
|
|
||||||
|
[numthreads(128, 1, 1)]
|
||||||
|
void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
|
||||||
|
{
|
||||||
|
int nodeID = DTid.x;
|
||||||
|
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
|
||||||
|
if( DTid.x < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
float3 velocity;
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
|
||||||
|
{
|
||||||
|
velocity = float3(15, 0, 0);
|
||||||
|
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
|
||||||
|
{
|
||||||
|
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
|
||||||
|
float colliderFriction = shapeDescription.friction;
|
||||||
|
|
||||||
|
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
// Colliding with a capsule
|
||||||
|
|
||||||
|
float capsuleHalfHeight = shapeDescription.halfHeight;
|
||||||
|
float capsuleRadius = shapeDescription.radius;
|
||||||
|
float capsuleMargin = shapeDescription.margin;
|
||||||
|
float4x4 worldTransform = shapeDescription.shapeTransform;
|
||||||
|
|
||||||
|
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
float4 worldC1 = mul(worldTransform, c1);
|
||||||
|
float4 worldC2 = mul(worldTransform, c2);
|
||||||
|
float3 segment = (worldC2 - worldC1).xyz;
|
||||||
|
|
||||||
|
// compute distance of tangent to vertex along line segment in capsule
|
||||||
|
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
// Final distance from collision, point to push from, direction to push in
|
||||||
|
// for impulse force
|
||||||
|
float dist;
|
||||||
|
float3 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = normalize(position - worldC1).xyz;
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = normalize(position - worldC2).xyz;
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = normalize(position - closestPoint).xyz;
|
||||||
|
}
|
||||||
|
|
||||||
|
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
|
||||||
|
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
|
||||||
|
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
// In case of no collision, this is the value of velocity
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
// Check for a collision
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
// Project back to surface along normal
|
||||||
|
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float3 p1 = normalize(cross(normalVector, segment));
|
||||||
|
float3 p2 = normalize(cross(p1, normalVector));
|
||||||
|
// Full friction is sum of velocities in each direction of plane
|
||||||
|
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||||
|
|
||||||
|
// Real friction is peak friction corrected by friction coefficients
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0 )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Update velocity
|
||||||
|
float3 difference = position.xyz - previousPosition.xyz;
|
||||||
|
velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
|
||||||
|
|
||||||
|
// Update external force
|
||||||
|
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
|
||||||
|
{
|
||||||
|
unsigned int numNodes;
|
||||||
|
float isolverdt;
|
||||||
|
int padding0;
|
||||||
|
int padding1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CollisionObjectIndices
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CollisionShapeDescription
|
||||||
|
{
|
||||||
|
float4x4 shapeTransform;
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
int padding1;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
// From btBroadphaseProxy.h
|
||||||
|
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
// Node indices for each link
|
||||||
|
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
|
||||||
|
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
|
||||||
|
StructuredBuffer<float> g_perClothFriction : register( t2 );
|
||||||
|
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
|
||||||
|
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
|
||||||
|
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
|
||||||
|
|
||||||
|
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
|
||||||
|
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
|
||||||
|
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
|
||||||
|
|
||||||
|
// A buffer of local collision shapes
|
||||||
|
// TODO: Iterate to support more than 16
|
||||||
|
groupshared CollisionShapeDescription localCollisionShapes[16];
|
||||||
|
|
||||||
|
[numthreads(128, 1, 1)]
|
||||||
|
void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
|
||||||
|
{
|
||||||
|
int nodeID = DTid.x;
|
||||||
|
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
|
||||||
|
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
float3 velocity;
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
|
||||||
|
// First load all of the collision objects into LDS
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( GTid.x < numObjects )
|
||||||
|
{
|
||||||
|
localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe as the vertices are padded so that not more than one soft body is in a group
|
||||||
|
AllMemoryBarrierWithGroupSync();
|
||||||
|
|
||||||
|
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
velocity = float3(0, 0, 0);
|
||||||
|
|
||||||
|
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
for( int collision = 0; collision < numObjects; ++collision )
|
||||||
|
{
|
||||||
|
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
|
||||||
|
float colliderFriction = shapeDescription.friction;
|
||||||
|
|
||||||
|
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
// Colliding with a capsule
|
||||||
|
|
||||||
|
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
|
||||||
|
float capsuleRadius = localCollisionShapes[collision].radius;
|
||||||
|
float capsuleMargin = localCollisionShapes[collision].margin;
|
||||||
|
|
||||||
|
float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
|
||||||
|
|
||||||
|
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
float4 worldC1 = mul(worldTransform, c1);
|
||||||
|
float4 worldC2 = mul(worldTransform, c2);
|
||||||
|
float3 segment = (worldC2 - worldC1).xyz;
|
||||||
|
|
||||||
|
// compute distance of tangent to vertex along line segment in capsule
|
||||||
|
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
// Final distance from collision, point to push from, direction to push in
|
||||||
|
// for impulse force
|
||||||
|
float dist;
|
||||||
|
float3 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = normalize(position - worldC1).xyz;
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = normalize(position - worldC2).xyz;
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = normalize(position - closestPoint).xyz;
|
||||||
|
}
|
||||||
|
|
||||||
|
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
|
||||||
|
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
|
||||||
|
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
// In case of no collision, this is the value of velocity
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
// Check for a collision
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
// Project back to surface along normal
|
||||||
|
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float3 p1 = normalize(cross(normalVector, segment));
|
||||||
|
float3 p2 = normalize(cross(p1, normalVector));
|
||||||
|
// Full friction is sum of velocities in each direction of plane
|
||||||
|
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||||
|
|
||||||
|
// Real friction is peak friction corrected by friction coefficients
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0 )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Update velocity
|
||||||
|
float3 difference = position.xyz - previousPosition.xyz;
|
||||||
|
velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
|
||||||
|
|
||||||
|
// Update external force
|
||||||
|
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable \n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
ComputeBoundsKernel(
|
||||||
|
int numNodes,
|
||||||
|
int numSoftBodies,
|
||||||
|
__global int * g_vertexClothIdentifier,
|
||||||
|
__global float4 * g_vertexPositions,
|
||||||
|
volatile __global uint * g_clothMinBounds,
|
||||||
|
volatile __global uint * g_clothMaxBounds,
|
||||||
|
volatile __local uint * clothMinBounds,
|
||||||
|
volatile __local uint * clothMaxBounds)
|
||||||
|
{
|
||||||
|
// Init min and max bounds arrays
|
||||||
|
if( get_local_id(0) < numSoftBodies )
|
||||||
|
{
|
||||||
|
|
||||||
|
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
|
||||||
|
clothMaxBounds[get_local_id(0)*4] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+1] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+2] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+3] = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)];
|
||||||
|
if( clothIdentifier >= 0 )
|
||||||
|
{
|
||||||
|
float3 position = g_vertexPositions[get_global_id(0)].xyz;
|
||||||
|
|
||||||
|
/* Reinterpret position as uint */
|
||||||
|
uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z));
|
||||||
|
|
||||||
|
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
|
||||||
|
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
|
||||||
|
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
|
||||||
|
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
|
||||||
|
|
||||||
|
/* Min/max with the LDS values */
|
||||||
|
atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
|
||||||
|
atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||||
|
atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||||
|
|
||||||
|
atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
|
||||||
|
atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||||
|
atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
/* Use global atomics to update the global versions of the data*/
|
||||||
|
if( get_local_id(0) < numSoftBodies )
|
||||||
|
{
|
||||||
|
/*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
|
||||||
|
atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
|
||||||
|
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
|
||||||
|
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
|
||||||
|
|
||||||
|
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
|
||||||
|
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
|
||||||
|
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
cbuffer OutputToVertexArrayCB : register( b0 )
|
||||||
|
{
|
||||||
|
int startNode;
|
||||||
|
int numNodes;
|
||||||
|
int offsetX;
|
||||||
|
int strideX;
|
||||||
|
|
||||||
|
int offsetN;
|
||||||
|
int strideN;
|
||||||
|
int padding1;
|
||||||
|
int padding2;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
StructuredBuffer<float4> g_nodesx : register( t0 );
|
||||||
|
StructuredBuffer<float4> g_nodesn : register( t1 );
|
||||||
|
|
||||||
|
RWStructuredBuffer<float> g_vertexBuffer : register( u0 );
|
||||||
|
|
||||||
|
|
||||||
|
[numthreads(128, 1, 1)]
|
||||||
|
void
|
||||||
|
OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
|
||||||
|
{
|
||||||
|
int nodeID = DTid.x;
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
float4 nodeX = g_nodesx[nodeID + startNode];
|
||||||
|
float4 nodeN = g_nodesn[nodeID + startNode];
|
||||||
|
|
||||||
|
// Stride should account for the float->float4 conversion
|
||||||
|
int positionDestination = nodeID * strideX + offsetX;
|
||||||
|
g_vertexBuffer[positionDestination] = nodeX.x;
|
||||||
|
g_vertexBuffer[positionDestination+1] = nodeX.y;
|
||||||
|
g_vertexBuffer[positionDestination+2] = nodeX.z;
|
||||||
|
|
||||||
|
int normalDestination = nodeID * strideN + offsetN;
|
||||||
|
g_vertexBuffer[normalDestination] = nodeN.x;
|
||||||
|
g_vertexBuffer[normalDestination+1] = nodeN.y;
|
||||||
|
g_vertexBuffer[normalDestination+2] = nodeN.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
float mydot3(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
SolvePositionsFromLinksKernel(
|
||||||
|
const int startWaveInBatch,
|
||||||
|
const int numWaves,
|
||||||
|
const float kst,
|
||||||
|
const float ti,
|
||||||
|
__global int2 *g_wavefrontBatchCountsVertexCounts,
|
||||||
|
__global int *g_vertexAddressesPerWavefront,
|
||||||
|
__global int2 * g_linksVertexIndices,
|
||||||
|
__global float * g_linksMassLSC,
|
||||||
|
__global float * g_linksRestLengthSquared,
|
||||||
|
__global float * g_verticesInverseMass,
|
||||||
|
__global float4 * g_vertexPositions,
|
||||||
|
__local int2 *wavefrontBatchCountsVertexCounts,
|
||||||
|
__local float4 *vertexPositionSharedData,
|
||||||
|
__local float *vertexInverseMassSharedData)
|
||||||
|
{
|
||||||
|
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
|
||||||
|
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
|
||||||
|
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
|
||||||
|
const int localWavefront = wavefront - firstWavefrontInBlock;
|
||||||
|
|
||||||
|
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
|
||||||
|
if( wavefront < (startWaveInBatch + numWaves) )
|
||||||
|
{
|
||||||
|
// Load the batch counts for the wavefronts
|
||||||
|
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
|
||||||
|
if( laneInWavefront == 0 )
|
||||||
|
{
|
||||||
|
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
|
||||||
|
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
|
||||||
|
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
|
||||||
|
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
|
||||||
|
|
||||||
|
// Load the vertices for the wavefronts
|
||||||
|
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||||
|
{
|
||||||
|
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||||
|
|
||||||
|
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
|
||||||
|
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
|
||||||
|
}
|
||||||
|
|
||||||
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
// Loop through the batches performing the solve on each in LDS
|
||||||
|
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
|
||||||
|
|
||||||
|
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
|
||||||
|
|
||||||
|
int batch = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
|
||||||
|
int locationOfValue = baseDataLocation + laneInWavefront;
|
||||||
|
|
||||||
|
|
||||||
|
// These loads should all be perfectly linear across the WF
|
||||||
|
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
|
||||||
|
float massLSC = g_linksMassLSC[locationOfValue];
|
||||||
|
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
|
||||||
|
|
||||||
|
// LDS vertex addresses based on logical wavefront number in block and loaded index
|
||||||
|
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
|
||||||
|
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
|
||||||
|
|
||||||
|
float4 position0 = vertexPositionSharedData[vertexAddress0];
|
||||||
|
float4 position1 = vertexPositionSharedData[vertexAddress1];
|
||||||
|
|
||||||
|
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
|
||||||
|
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
|
||||||
|
|
||||||
|
float4 del = position1 - position0;
|
||||||
|
float len = mydot3(del, del);
|
||||||
|
|
||||||
|
float k = 0;
|
||||||
|
if( massLSC > 0.0f )
|
||||||
|
{
|
||||||
|
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
|
||||||
|
}
|
||||||
|
|
||||||
|
position0 = position0 - del*(k*inverseMass0);
|
||||||
|
position1 = position1 + del*(k*inverseMass1);
|
||||||
|
|
||||||
|
// Ensure compiler does not re-order memory operations
|
||||||
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
vertexPositionSharedData[vertexAddress0] = position0;
|
||||||
|
vertexPositionSharedData[vertexAddress1] = position1;
|
||||||
|
|
||||||
|
// Ensure compiler does not re-order memory operations
|
||||||
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
++batch;
|
||||||
|
} while( batch < batchesWithinWavefront );
|
||||||
|
|
||||||
|
// Update the global memory vertices for the wavefronts
|
||||||
|
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||||
|
{
|
||||||
|
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||||
|
|
||||||
|
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,195 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
} CollisionObjectIndices;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 shapeTransform[4]; // column major 4x4 matrix
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
int upAxis;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
|
||||||
|
} CollisionShapeDescription;
|
||||||
|
|
||||||
|
/* From btBroadphaseProxy.h */
|
||||||
|
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
/* Multiply column-major matrix against vector */
|
||||||
|
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||||
|
{
|
||||||
|
float4 returnVector;
|
||||||
|
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||||
|
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||||
|
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||||
|
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||||
|
returnVector.x = dot(row0, vector);
|
||||||
|
returnVector.y = dot(row1, vector);
|
||||||
|
returnVector.z = dot(row2, vector);
|
||||||
|
returnVector.w = dot(row3, vector);
|
||||||
|
return returnVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||||
|
const int numNodes,
|
||||||
|
const float isolverdt,
|
||||||
|
__global int *g_vertexClothIdentifier,
|
||||||
|
__global float4 *g_vertexPreviousPositions,
|
||||||
|
__global float * g_perClothFriction,
|
||||||
|
__global float * g_clothDampingFactor,
|
||||||
|
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||||
|
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||||
|
__global float4 * g_vertexForces,
|
||||||
|
__global float4 *g_vertexVelocities,
|
||||||
|
__global float4 *g_vertexPositions)
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
|
||||||
|
if( get_global_id(0) < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
|
||||||
|
// Abort if this is not a valid cloth
|
||||||
|
if( clothIdentifier < 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
float3 velocity;
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
|
||||||
|
{
|
||||||
|
velocity = (float3)(15, 0, 0);
|
||||||
|
|
||||||
|
/* We have some possible collisions to deal with */
|
||||||
|
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
|
||||||
|
{
|
||||||
|
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
|
||||||
|
float colliderFriction = shapeDescription.friction;
|
||||||
|
|
||||||
|
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
/* Colliding with a capsule */
|
||||||
|
|
||||||
|
float capsuleHalfHeight = shapeDescription.halfHeight;
|
||||||
|
float capsuleRadius = shapeDescription.radius;
|
||||||
|
float capsuleMargin = shapeDescription.margin;
|
||||||
|
int capsuleupAxis = shapeDescription.upAxis;
|
||||||
|
|
||||||
|
/* Four columns of worldTransform matrix */
|
||||||
|
float4 worldTransform[4];
|
||||||
|
worldTransform[0] = shapeDescription.shapeTransform[0];
|
||||||
|
worldTransform[1] = shapeDescription.shapeTransform[1];
|
||||||
|
worldTransform[2] = shapeDescription.shapeTransform[2];
|
||||||
|
worldTransform[3] = shapeDescription.shapeTransform[3];
|
||||||
|
|
||||||
|
// Correctly define capsule centerline vector
|
||||||
|
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||||
|
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||||
|
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||||
|
c2.x = -c1.x;
|
||||||
|
c2.y = -c1.y;
|
||||||
|
c2.z = -c1.z;
|
||||||
|
|
||||||
|
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||||
|
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||||
|
float3 segment = (worldC2 - worldC1).xyz;
|
||||||
|
|
||||||
|
/* compute distance of tangent to vertex along line segment in capsule */
|
||||||
|
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
/* Final distance from collision, point to push from, direction to push in
|
||||||
|
for impulse force */
|
||||||
|
float dist;
|
||||||
|
float3 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = normalize(position - worldC1).xyz;
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = normalize(position - worldC2).xyz;
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = normalize(position - closestPoint).xyz;
|
||||||
|
}
|
||||||
|
|
||||||
|
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
|
||||||
|
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
|
||||||
|
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
/* In case of no collision, this is the value of velocity */
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
// Check for a collision
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
/* Project back to surface along normal */
|
||||||
|
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float3 p1 = normalize(cross(normalVector, segment));
|
||||||
|
float3 p2 = normalize(cross(p1, normalVector));
|
||||||
|
/* Full friction is sum of velocities in each direction of plane */
|
||||||
|
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||||
|
|
||||||
|
/* Real friction is peak friction corrected by friction coefficients */
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0f )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Update velocity */
|
||||||
|
float3 difference = position.xyz - previousPosition.xyz;
|
||||||
|
velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||||
|
|
||||||
|
/* Update external force */
|
||||||
|
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
} CollisionObjectIndices;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 shapeTransform[4]; /* column major 4x4 matrix */
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
int upAxis;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
|
||||||
|
} CollisionShapeDescription;
|
||||||
|
|
||||||
|
/* From btBroadphaseProxy.h */
|
||||||
|
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
|
||||||
|
/* Multiply column-major matrix against vector */
|
||||||
|
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||||
|
{
|
||||||
|
float4 returnVector;
|
||||||
|
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||||
|
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||||
|
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||||
|
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||||
|
returnVector.x = dot(row0, vector);
|
||||||
|
returnVector.y = dot(row1, vector);
|
||||||
|
returnVector.z = dot(row2, vector);
|
||||||
|
returnVector.w = dot(row3, vector);
|
||||||
|
return returnVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||||
|
const int numNodes,
|
||||||
|
const float isolverdt,
|
||||||
|
__global int *g_vertexClothIdentifier,
|
||||||
|
__global float4 *g_vertexPreviousPositions,
|
||||||
|
__global float * g_perClothFriction,
|
||||||
|
__global float * g_clothDampingFactor,
|
||||||
|
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||||
|
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||||
|
__global float4 * g_vertexForces,
|
||||||
|
__global float4 *g_vertexVelocities,
|
||||||
|
__global float4 *g_vertexPositions,
|
||||||
|
__local CollisionShapeDescription *localCollisionShapes)
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
|
||||||
|
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
|
||||||
|
// Abort if this is not a valid cloth
|
||||||
|
if( clothIdentifier < 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
float3 velocity;
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
/* We have some possible collisions to deal with */
|
||||||
|
|
||||||
|
/* First load all of the collision objects into LDS */
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( get_local_id(0) < numObjects )
|
||||||
|
{
|
||||||
|
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Safe as the vertices are padded so that not more than one soft body is in a group */
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
/* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
velocity = (float3)(0, 0, 0);
|
||||||
|
|
||||||
|
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
for( int collision = 0; collision < numObjects; ++collision )
|
||||||
|
{
|
||||||
|
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
|
||||||
|
float colliderFriction = shapeDescription.friction;
|
||||||
|
|
||||||
|
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
/* Colliding with a capsule */
|
||||||
|
|
||||||
|
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
|
||||||
|
float capsuleRadius = localCollisionShapes[collision].radius;
|
||||||
|
float capsuleMargin = localCollisionShapes[collision].margin;
|
||||||
|
int capsuleupAxis = localCollisionShapes[collision].upAxis;
|
||||||
|
|
||||||
|
float4 worldTransform[4];
|
||||||
|
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
|
||||||
|
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
|
||||||
|
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
|
||||||
|
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
|
||||||
|
|
||||||
|
// Correctly define capsule centerline vector
|
||||||
|
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||||
|
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||||
|
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||||
|
c2.x = -c1.x;
|
||||||
|
c2.y = -c1.y;
|
||||||
|
c2.z = -c1.z;
|
||||||
|
|
||||||
|
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||||
|
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||||
|
float3 segment = (worldC2 - worldC1).xyz;
|
||||||
|
|
||||||
|
|
||||||
|
/* compute distance of tangent to vertex along line segment in capsule */
|
||||||
|
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
/* Final distance from collision, point to push from, direction to push in
|
||||||
|
for impulse force */
|
||||||
|
float dist;
|
||||||
|
float3 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = normalize(position - worldC1).xyz;
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = normalize(position - worldC2).xyz;
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = normalize(position - closestPoint).xyz;
|
||||||
|
}
|
||||||
|
|
||||||
|
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
|
||||||
|
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
|
||||||
|
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
/* In case of no collision, this is the value of velocity */
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
/* Check for a collision */
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
/* Project back to surface along normal */
|
||||||
|
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
|
||||||
|
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||||
|
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float3 p1 = normalize(cross(normalVector, segment));
|
||||||
|
float3 p2 = normalize(cross(p1, normalVector));
|
||||||
|
/* Full friction is sum of velocities in each direction of plane */
|
||||||
|
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||||
|
|
||||||
|
/* Real friction is peak friction corrected by friction coefficients */
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0f )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Update velocity */
|
||||||
|
float3 difference = position.xyz - previousPosition.xyz;
|
||||||
|
velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||||
|
|
||||||
|
/* Update external force */
|
||||||
|
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
ComputeBoundsKernel(
|
||||||
|
const int numNodes,
|
||||||
|
const int numSoftBodies,
|
||||||
|
__global int * g_vertexClothIdentifier,
|
||||||
|
__global float4 * g_vertexPositions,
|
||||||
|
/* Unfortunately, to get the atomics below to work these arrays cannot be */
|
||||||
|
/* uint4, though that is the layout of the data */
|
||||||
|
/* Therefore this is little-endian-only code */
|
||||||
|
volatile __global uint * g_clothMinBounds,
|
||||||
|
volatile __global uint * g_clothMaxBounds,
|
||||||
|
volatile __local uint * clothMinBounds,
|
||||||
|
volatile __local uint * clothMaxBounds)
|
||||||
|
{
|
||||||
|
// Init min and max bounds arrays
|
||||||
|
if( get_local_id(0) < numSoftBodies )
|
||||||
|
{
|
||||||
|
|
||||||
|
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
|
||||||
|
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
|
||||||
|
clothMaxBounds[get_local_id(0)*4] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+1] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+2] = 0;
|
||||||
|
clothMaxBounds[get_local_id(0)*4+3] = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
if( clothIdentifier >= 0 )
|
||||||
|
{
|
||||||
|
|
||||||
|
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
|
||||||
|
|
||||||
|
/* Reinterpret position as uint */
|
||||||
|
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
|
||||||
|
|
||||||
|
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
|
||||||
|
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
|
||||||
|
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
|
||||||
|
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
|
||||||
|
|
||||||
|
// Min/max with the LDS values
|
||||||
|
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
|
||||||
|
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||||
|
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||||
|
|
||||||
|
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
|
||||||
|
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||||
|
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
/* Use global atomics to update the global versions of the data */
|
||||||
|
if( get_local_id(0) < numSoftBodies )
|
||||||
|
{
|
||||||
|
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
|
||||||
|
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
|
||||||
|
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
|
||||||
|
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
|
||||||
|
|
||||||
|
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
|
||||||
|
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
|
||||||
|
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
OutputToVertexArrayWithNormalsKernel(
|
||||||
|
const int startNode, const int numNodes, __global float *g_vertexBuffer,
|
||||||
|
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions,
|
||||||
|
const int normalOffset, const int normalStride, const __global float4* g_vertexNormals )
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
float4 position = g_vertexPositions[nodeID + startNode];
|
||||||
|
float4 normal = g_vertexNormals[nodeID + startNode];
|
||||||
|
|
||||||
|
// Stride should account for the float->float4 conversion
|
||||||
|
int positionDestination = nodeID * positionStride + positionOffset;
|
||||||
|
g_vertexBuffer[positionDestination] = position.x;
|
||||||
|
g_vertexBuffer[positionDestination+1] = position.y;
|
||||||
|
g_vertexBuffer[positionDestination+2] = position.z;
|
||||||
|
|
||||||
|
int normalDestination = nodeID * normalStride + normalOffset;
|
||||||
|
g_vertexBuffer[normalDestination] = normal.x;
|
||||||
|
g_vertexBuffer[normalDestination+1] = normal.y;
|
||||||
|
g_vertexBuffer[normalDestination+2] = normal.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
OutputToVertexArrayWithoutNormalsKernel(
|
||||||
|
const int startNode, const int numNodes, __global float *g_vertexBuffer,
|
||||||
|
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
if( nodeID < numNodes )
|
||||||
|
{
|
||||||
|
float4 position = g_vertexPositions[nodeID + startNode];
|
||||||
|
|
||||||
|
// Stride should account for the float->float4 conversion
|
||||||
|
int positionDestination = nodeID * positionStride + positionOffset;
|
||||||
|
g_vertexBuffer[positionDestination] = position.x;
|
||||||
|
g_vertexBuffer[positionDestination+1] = position.y;
|
||||||
|
g_vertexBuffer[positionDestination+2] = position.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
float mydot3(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
|
||||||
|
void
|
||||||
|
SolvePositionsFromLinksKernel(
|
||||||
|
const int startWaveInBatch,
|
||||||
|
const int numWaves,
|
||||||
|
const float kst,
|
||||||
|
const float ti,
|
||||||
|
__global int2 *g_wavefrontBatchCountsVertexCounts,
|
||||||
|
__global int *g_vertexAddressesPerWavefront,
|
||||||
|
__global int2 * g_linksVertexIndices,
|
||||||
|
__global float * g_linksMassLSC,
|
||||||
|
__global float * g_linksRestLengthSquared,
|
||||||
|
__global float * g_verticesInverseMass,
|
||||||
|
__global float4 * g_vertexPositions,
|
||||||
|
__local int2 *wavefrontBatchCountsVertexCounts,
|
||||||
|
__local float4 *vertexPositionSharedData,
|
||||||
|
__local float *vertexInverseMassSharedData)
|
||||||
|
{
|
||||||
|
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
|
||||||
|
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
|
||||||
|
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
|
||||||
|
const int localWavefront = wavefront - firstWavefrontInBlock;
|
||||||
|
|
||||||
|
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
|
||||||
|
if( wavefront < (startWaveInBatch + numWaves) )
|
||||||
|
{
|
||||||
|
// Load the batch counts for the wavefronts
|
||||||
|
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
|
||||||
|
if( laneInWavefront == 0 )
|
||||||
|
{
|
||||||
|
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
|
||||||
|
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
|
||||||
|
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
|
||||||
|
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
|
||||||
|
|
||||||
|
// Load the vertices for the wavefronts
|
||||||
|
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||||
|
{
|
||||||
|
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||||
|
|
||||||
|
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
|
||||||
|
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
// Loop through the batches performing the solve on each in LDS
|
||||||
|
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
|
||||||
|
|
||||||
|
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
|
||||||
|
|
||||||
|
int batch = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
|
||||||
|
int locationOfValue = baseDataLocation + laneInWavefront;
|
||||||
|
|
||||||
|
|
||||||
|
// These loads should all be perfectly linear across the WF
|
||||||
|
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
|
||||||
|
float massLSC = g_linksMassLSC[locationOfValue];
|
||||||
|
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
|
||||||
|
|
||||||
|
// LDS vertex addresses based on logical wavefront number in block and loaded index
|
||||||
|
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
|
||||||
|
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
|
||||||
|
|
||||||
|
float4 position0 = vertexPositionSharedData[vertexAddress0];
|
||||||
|
float4 position1 = vertexPositionSharedData[vertexAddress1];
|
||||||
|
|
||||||
|
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
|
||||||
|
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
|
||||||
|
|
||||||
|
float4 del = position1 - position0;
|
||||||
|
float len = mydot3(del, del);
|
||||||
|
|
||||||
|
float k = 0;
|
||||||
|
if( massLSC > 0.0f )
|
||||||
|
{
|
||||||
|
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
|
||||||
|
}
|
||||||
|
|
||||||
|
position0 = position0 - del*(k*inverseMass0);
|
||||||
|
position1 = position1 + del*(k*inverseMass1);
|
||||||
|
|
||||||
|
// Ensure compiler does not re-order memory operations
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
vertexPositionSharedData[vertexAddress0] = position0;
|
||||||
|
vertexPositionSharedData[vertexAddress1] = position1;
|
||||||
|
|
||||||
|
// Ensure compiler does not re-order memory operations
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
++batch;
|
||||||
|
} while( batch < batchesWithinWavefront );
|
||||||
|
|
||||||
|
// Update the global memory vertices for the wavefronts
|
||||||
|
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||||
|
{
|
||||||
|
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||||
|
|
||||||
|
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,204 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable\n
|
||||||
|
|
||||||
|
float mydot3(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
} CollisionObjectIndices;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 shapeTransform[4]; // column major 4x4 matrix
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
int upAxis;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
|
||||||
|
} CollisionShapeDescription;
|
||||||
|
|
||||||
|
// From btBroadphaseProxy.h
|
||||||
|
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
// Multiply column-major matrix against vector
|
||||||
|
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||||
|
{
|
||||||
|
float4 returnVector;
|
||||||
|
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||||
|
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||||
|
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||||
|
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||||
|
returnVector.x = dot(row0, vector);
|
||||||
|
returnVector.y = dot(row1, vector);
|
||||||
|
returnVector.z = dot(row2, vector);
|
||||||
|
returnVector.w = dot(row3, vector);
|
||||||
|
return returnVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||||
|
const int numNodes,
|
||||||
|
const float isolverdt,
|
||||||
|
__global int *g_vertexClothIdentifier,
|
||||||
|
__global float4 *g_vertexPreviousPositions,
|
||||||
|
__global float * g_perClothFriction,
|
||||||
|
__global float * g_clothDampingFactor,
|
||||||
|
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||||
|
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||||
|
__global float4 * g_vertexForces,
|
||||||
|
__global float4 *g_vertexVelocities,
|
||||||
|
__global float4 *g_vertexPositions)
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||||
|
|
||||||
|
if( get_global_id(0) < numNodes )
|
||||||
|
{
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
|
||||||
|
// Abort if this is not a valid cloth
|
||||||
|
if( clothIdentifier < 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
|
||||||
|
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
float4 difference = position - previousPosition;
|
||||||
|
float4 velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
|
||||||
|
{
|
||||||
|
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
|
||||||
|
float colliderFriction = shapeDescription.friction;
|
||||||
|
|
||||||
|
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
// Colliding with a capsule
|
||||||
|
|
||||||
|
float capsuleHalfHeight = shapeDescription.halfHeight;
|
||||||
|
float capsuleRadius = shapeDescription.radius;
|
||||||
|
float capsuleMargin = shapeDescription.margin;
|
||||||
|
int capsuleupAxis = shapeDescription.upAxis;
|
||||||
|
|
||||||
|
// Four columns of worldTransform matrix
|
||||||
|
float4 worldTransform[4];
|
||||||
|
worldTransform[0] = shapeDescription.shapeTransform[0];
|
||||||
|
worldTransform[1] = shapeDescription.shapeTransform[1];
|
||||||
|
worldTransform[2] = shapeDescription.shapeTransform[2];
|
||||||
|
worldTransform[3] = shapeDescription.shapeTransform[3];
|
||||||
|
|
||||||
|
// Correctly define capsule centerline vector
|
||||||
|
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||||
|
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||||
|
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||||
|
c2.x = -c1.x;
|
||||||
|
c2.y = -c1.y;
|
||||||
|
c2.z = -c1.z;
|
||||||
|
|
||||||
|
|
||||||
|
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||||
|
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||||
|
float4 segment = (worldC2 - worldC1);
|
||||||
|
|
||||||
|
// compute distance of tangent to vertex along line segment in capsule
|
||||||
|
float distanceAlongSegment = -( mydot3( (worldC1 - position), segment ) / mydot3(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
// Final distance from collision, point to push from, direction to push in
|
||||||
|
// for impulse force
|
||||||
|
float dist;
|
||||||
|
float4 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
|
||||||
|
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
|
||||||
|
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
// In case of no collision, this is the value of velocity
|
||||||
|
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
// Check for a collision
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
// Project back to surface along normal
|
||||||
|
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
|
||||||
|
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
|
||||||
|
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float4 p1 = normalize(cross(normalVector, segment));
|
||||||
|
float4 p2 = normalize(cross(p1, normalVector));
|
||||||
|
// Full friction is sum of velocities in each direction of plane
|
||||||
|
float4 frictionVector = p1*mydot3(relativeVelocity, p1) + p2*mydot3(relativeVelocity, p2);
|
||||||
|
|
||||||
|
// Real friction is peak friction corrected by friction coefficients
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0f )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
|
||||||
|
|
||||||
|
// Update external force
|
||||||
|
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,219 @@
|
|||||||
|
MSTRINGIFY(
|
||||||
|
float mydot3(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int firstObject;
|
||||||
|
int endObject;
|
||||||
|
} CollisionObjectIndices;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 shapeTransform[4]; // column major 4x4 matrix
|
||||||
|
float4 linearVelocity;
|
||||||
|
float4 angularVelocity;
|
||||||
|
|
||||||
|
int softBodyIdentifier;
|
||||||
|
int collisionShapeType;
|
||||||
|
|
||||||
|
|
||||||
|
// Shape information
|
||||||
|
// Compressed from the union
|
||||||
|
float radius;
|
||||||
|
float halfHeight;
|
||||||
|
int upAxis;
|
||||||
|
|
||||||
|
float margin;
|
||||||
|
float friction;
|
||||||
|
|
||||||
|
int padding0;
|
||||||
|
|
||||||
|
} CollisionShapeDescription;
|
||||||
|
|
||||||
|
// From btBroadphaseProxy.h
|
||||||
|
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||||
|
|
||||||
|
|
||||||
|
/* Multiply column-major matrix against vector */
|
||||||
|
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||||
|
{
|
||||||
|
float4 returnVector;
|
||||||
|
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||||
|
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||||
|
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||||
|
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||||
|
returnVector.x = dot(row0, vector);
|
||||||
|
returnVector.y = dot(row1, vector);
|
||||||
|
returnVector.z = dot(row2, vector);
|
||||||
|
returnVector.w = dot(row3, vector);
|
||||||
|
return returnVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||||
|
const int numNodes,
|
||||||
|
const float isolverdt,
|
||||||
|
__global int *g_vertexClothIdentifier,
|
||||||
|
__global float4 *g_vertexPreviousPositions,
|
||||||
|
__global float * g_perClothFriction,
|
||||||
|
__global float * g_clothDampingFactor,
|
||||||
|
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||||
|
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||||
|
__global float4 * g_vertexForces,
|
||||||
|
__global float4 *g_vertexVelocities,
|
||||||
|
__global float4 *g_vertexPositions,
|
||||||
|
__local CollisionShapeDescription *localCollisionShapes)
|
||||||
|
{
|
||||||
|
int nodeID = get_global_id(0);
|
||||||
|
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||||
|
|
||||||
|
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||||
|
|
||||||
|
// Abort if this is not a valid cloth
|
||||||
|
if( clothIdentifier < 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
|
||||||
|
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||||
|
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||||
|
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||||
|
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||||
|
float velocityCoefficient = (1.f - dampingFactor);
|
||||||
|
|
||||||
|
// Update velocity
|
||||||
|
float4 difference = position - previousPosition;
|
||||||
|
float4 velocity = difference*velocityCoefficient*isolverdt;
|
||||||
|
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||||
|
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
|
||||||
|
// First load all of the collision objects into LDS
|
||||||
|
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||||
|
if( get_local_id(0) < numObjects )
|
||||||
|
{
|
||||||
|
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe as the vertices are padded so that not more than one soft body is in a group
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
|
||||||
|
if( numObjects > 0 )
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
// We have some possible collisions to deal with
|
||||||
|
for( int collision = 0; collision < numObjects; ++collision )
|
||||||
|
{
|
||||||
|
//CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
|
||||||
|
float colliderFriction = localCollisionShapes[collision].friction;
|
||||||
|
|
||||||
|
if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||||
|
{
|
||||||
|
// Colliding with a capsule
|
||||||
|
|
||||||
|
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
|
||||||
|
float capsuleRadius = localCollisionShapes[collision].radius;
|
||||||
|
float capsuleMargin = localCollisionShapes[collision].margin;
|
||||||
|
int capsuleupAxis = localCollisionShapes[collision].upAxis;
|
||||||
|
|
||||||
|
float4 worldTransform[4];
|
||||||
|
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
|
||||||
|
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
|
||||||
|
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
|
||||||
|
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
|
||||||
|
|
||||||
|
//float4 c1 = (float4)(0.f, -capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
//float4 c2 = (float4)(0.f, +capsuleHalfHeight, 0.f, 1.f);
|
||||||
|
// Correctly define capsule centerline vector
|
||||||
|
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||||
|
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||||
|
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||||
|
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||||
|
c2.x = -c1.x;
|
||||||
|
c2.y = -c1.y;
|
||||||
|
c2.z = -c1.z;
|
||||||
|
|
||||||
|
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||||
|
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||||
|
float4 segment = (worldC2 - worldC1);
|
||||||
|
|
||||||
|
|
||||||
|
// compute distance of tangent to vertex along line segment in capsule
|
||||||
|
float distanceAlongSegment = -( mydot3( (worldC1 - position), segment ) / mydot3(segment, segment) );
|
||||||
|
|
||||||
|
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
|
||||||
|
float distanceFromLine = length(position - closestPoint);
|
||||||
|
float distanceFromC1 = length(worldC1 - position);
|
||||||
|
float distanceFromC2 = length(worldC2 - position);
|
||||||
|
|
||||||
|
// Final distance from collision, point to push from, direction to push in
|
||||||
|
// for impulse force
|
||||||
|
float dist;
|
||||||
|
float4 normalVector;
|
||||||
|
if( distanceAlongSegment < 0 )
|
||||||
|
{
|
||||||
|
dist = distanceFromC1;
|
||||||
|
normalVector = normalize(position - worldC1);
|
||||||
|
} else if( distanceAlongSegment > 1.f ) {
|
||||||
|
dist = distanceFromC2;
|
||||||
|
normalVector = normalize(position - worldC2);
|
||||||
|
} else {
|
||||||
|
dist = distanceFromLine;
|
||||||
|
normalVector = normalize(position - closestPoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity;
|
||||||
|
float4 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity;
|
||||||
|
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
|
||||||
|
|
||||||
|
float minDistance = capsuleRadius + capsuleMargin;
|
||||||
|
|
||||||
|
// In case of no collision, this is the value of velocity
|
||||||
|
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
|
||||||
|
|
||||||
|
|
||||||
|
// Check for a collision
|
||||||
|
if( dist < minDistance )
|
||||||
|
{
|
||||||
|
// Project back to surface along normal
|
||||||
|
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
|
||||||
|
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
|
||||||
|
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||||
|
|
||||||
|
float4 p1 = (float4)(normalize(cross(normalVector, segment)).xyz, 0.f);
|
||||||
|
float4 p2 = (float4)(normalize(cross(p1, normalVector)).xyz, 0.f);
|
||||||
|
// Full friction is sum of velocities in each direction of plane
|
||||||
|
float4 frictionVector = p1*mydot3(relativeVelocity, p1) + p2*mydot3(relativeVelocity, p2);
|
||||||
|
|
||||||
|
// Real friction is peak friction corrected by friction coefficients
|
||||||
|
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||||
|
|
||||||
|
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||||
|
|
||||||
|
if( approachSpeed <= 0.0f )
|
||||||
|
forceOnVertex -= frictionVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
|
||||||
|
|
||||||
|
// Update external force
|
||||||
|
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
|
||||||
|
|
||||||
|
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
);
|
||||||
@@ -0,0 +1,169 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
|
||||||
|
#include "btSoftBodySolverBuffer_OpenCL.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
|
||||||
|
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
|
||||||
|
|
||||||
|
|
||||||
|
class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
bool m_onGPU;
|
||||||
|
|
||||||
|
cl_command_queue m_cqCommandQue;
|
||||||
|
|
||||||
|
const int m_wavefrontSize;
|
||||||
|
const int m_linksPerWorkItem;
|
||||||
|
const int m_maxLinksPerWavefront;
|
||||||
|
int m_maxBatchesWithinWave;
|
||||||
|
int m_maxVerticesWithinWave;
|
||||||
|
int m_numWavefronts;
|
||||||
|
|
||||||
|
int m_maxVertex;
|
||||||
|
|
||||||
|
struct NumBatchesVerticesPair
|
||||||
|
{
|
||||||
|
int numBatches;
|
||||||
|
int numVertices;
|
||||||
|
};
|
||||||
|
|
||||||
|
btAlignedObjectArray<int> m_linksPerWavefront;
|
||||||
|
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
|
||||||
|
btOpenCLBuffer< NumBatchesVerticesPair > m_clNumBatchesAndVerticesWithinWaves;
|
||||||
|
|
||||||
|
// All arrays here will contain batches of m_maxLinksPerWavefront links
|
||||||
|
// ordered by wavefront.
|
||||||
|
// with either global vertex pairs or local vertex pairs
|
||||||
|
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
|
||||||
|
btOpenCLBuffer<int> m_clWavefrontVerticesGlobalAddresses;
|
||||||
|
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
|
||||||
|
btOpenCLBuffer<LinkNodePair> m_clLinkVerticesLocalAddresses;
|
||||||
|
btOpenCLBuffer<float> m_clLinkStrength;
|
||||||
|
btOpenCLBuffer<float> m_clLinksMassLSC;
|
||||||
|
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
|
||||||
|
btOpenCLBuffer<float> m_clLinksRestLength;
|
||||||
|
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
|
||||||
|
|
||||||
|
struct BatchPair
|
||||||
|
{
|
||||||
|
int start;
|
||||||
|
int length;
|
||||||
|
|
||||||
|
BatchPair() :
|
||||||
|
start(0),
|
||||||
|
length(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BatchPair( int s, int l ) :
|
||||||
|
start( s ),
|
||||||
|
length( l )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Link addressing information for each cloth.
|
||||||
|
* Allows link locations to be computed independently of data batching.
|
||||||
|
*/
|
||||||
|
btAlignedObjectArray< int > m_linkAddresses;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start and length values for computation batches over link data.
|
||||||
|
*/
|
||||||
|
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
|
||||||
|
|
||||||
|
btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
|
||||||
|
|
||||||
|
virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
|
||||||
|
|
||||||
|
/** Allocate enough space in all link-related arrays to fit numLinks links */
|
||||||
|
virtual void createLinks( int numLinks );
|
||||||
|
|
||||||
|
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
|
||||||
|
virtual void setLinkAt(
|
||||||
|
const LinkDescription &link,
|
||||||
|
int linkIndex );
|
||||||
|
|
||||||
|
virtual bool onAccelerator();
|
||||||
|
|
||||||
|
virtual bool moveToAccelerator();
|
||||||
|
|
||||||
|
virtual bool moveFromAccelerator();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate (and later update) the batching for the entire link set.
|
||||||
|
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
|
||||||
|
* In theory we could delay it until just before we need the cloth.
|
||||||
|
* It's a one-off overhead, though, so that is a later optimisation.
|
||||||
|
*/
|
||||||
|
void generateBatches();
|
||||||
|
|
||||||
|
int getMaxVerticesPerWavefront()
|
||||||
|
{
|
||||||
|
return m_maxVerticesWithinWave;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getWavefrontSize()
|
||||||
|
{
|
||||||
|
return m_wavefrontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getLinksPerWorkItem()
|
||||||
|
{
|
||||||
|
return m_linksPerWorkItem;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMaxLinksPerWavefront()
|
||||||
|
{
|
||||||
|
return m_maxLinksPerWavefront;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMaxBatchesPerWavefront()
|
||||||
|
{
|
||||||
|
return m_maxBatchesWithinWave;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNumWavefronts()
|
||||||
|
{
|
||||||
|
return m_numWavefronts;
|
||||||
|
}
|
||||||
|
|
||||||
|
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
|
||||||
|
{
|
||||||
|
return m_numBatchesAndVerticesWithinWaves[wavefront];
|
||||||
|
}
|
||||||
|
|
||||||
|
int getVertexGlobalAddresses( int vertexIndex )
|
||||||
|
{
|
||||||
|
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
|
||||||
|
*/
|
||||||
|
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
|
||||||
|
{
|
||||||
|
return m_linkVerticesLocalAddresses[linkIndex];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
|
||||||
@@ -0,0 +1,133 @@
|
|||||||
|
#include "btSoftBodySolverOutputCLtoGL.h"
|
||||||
|
#include <stdio.h> //@todo: remove the debugging printf at some stage
|
||||||
|
#include "btSoftBodySolver_OpenCL.h"
|
||||||
|
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
|
||||||
|
#include "btSoftBodySolverVertexBuffer_OpenGL.h"
|
||||||
|
#include "BulletSoftBody/btSoftBody.h"
|
||||||
|
|
||||||
|
#if (0)//CL_VERSION_1_1 == 1)
|
||||||
|
//OpenCL 1.1 kernels use float3
|
||||||
|
#define MSTRINGIFY(A) #A
|
||||||
|
static char* OutputToVertexArrayCLString =
|
||||||
|
#include "OpenCLC/OutputToVertexArray.cl"
|
||||||
|
#else
|
||||||
|
////OpenCL 1.0 kernels don't use float3
|
||||||
|
#define MSTRINGIFY(A) #A
|
||||||
|
static char* OutputToVertexArrayCLString =
|
||||||
|
#include "OpenCLC10/OutputToVertexArray.cl"
|
||||||
|
#endif //CL_VERSION_1_1
|
||||||
|
|
||||||
|
|
||||||
|
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
|
||||||
|
|
||||||
|
static const size_t workGroupSize = 128;
|
||||||
|
|
||||||
|
void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
|
||||||
|
{
|
||||||
|
|
||||||
|
btSoftBodySolver *solver = softBody->getSoftBodySolver();
|
||||||
|
btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
|
||||||
|
btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
|
||||||
|
checkInitialized();
|
||||||
|
btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
|
||||||
|
btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
|
||||||
|
|
||||||
|
const int firstVertex = currentCloth->getFirstVertex();
|
||||||
|
const int lastVertex = firstVertex + currentCloth->getNumVertices();
|
||||||
|
|
||||||
|
if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {
|
||||||
|
|
||||||
|
const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);
|
||||||
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
|
|
||||||
|
cl_mem clBuffer = openGLVertexBuffer->getBuffer();
|
||||||
|
cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
|
||||||
|
if( !vertexBuffer->hasNormals() )
|
||||||
|
outputKernel = outputToVertexArrayWithoutNormalsKernel;
|
||||||
|
|
||||||
|
ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
|
||||||
|
if( ciErrNum != CL_SUCCESS )
|
||||||
|
{
|
||||||
|
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
|
||||||
|
}
|
||||||
|
|
||||||
|
int numVertices = currentCloth->getNumVertices();
|
||||||
|
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
|
||||||
|
if( vertexBuffer->hasVertexPositions() )
|
||||||
|
{
|
||||||
|
int vertexOffset = vertexBuffer->getVertexOffset();
|
||||||
|
int vertexStride = vertexBuffer->getVertexStride();
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
|
||||||
|
|
||||||
|
}
|
||||||
|
if( vertexBuffer->hasNormals() )
|
||||||
|
{
|
||||||
|
int normalOffset = vertexBuffer->getNormalOffset();
|
||||||
|
int normalStride = vertexBuffer->getNormalStride();
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
|
||||||
|
ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
|
||||||
|
|
||||||
|
}
|
||||||
|
size_t numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
|
||||||
|
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||||
|
if( ciErrNum != CL_SUCCESS )
|
||||||
|
{
|
||||||
|
btAssert( 0 && "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
|
||||||
|
}
|
||||||
|
|
||||||
|
ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
|
||||||
|
if( ciErrNum != CL_SUCCESS )
|
||||||
|
{
|
||||||
|
btAssert( 0 && "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
btAssert( "Undefined output for this solver output" == false );
|
||||||
|
}
|
||||||
|
|
||||||
|
// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
|
||||||
|
clFinish(m_cqCommandQue);
|
||||||
|
|
||||||
|
} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
|
||||||
|
|
||||||
|
bool btSoftBodySolverOutputCLtoGL::buildShaders()
|
||||||
|
{
|
||||||
|
// Ensure current kernels are released first
|
||||||
|
releaseKernels();
|
||||||
|
|
||||||
|
bool returnVal = true;
|
||||||
|
|
||||||
|
if( m_shadersInitialized )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" );
|
||||||
|
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" );
|
||||||
|
|
||||||
|
|
||||||
|
if( returnVal )
|
||||||
|
m_shadersInitialized = true;
|
||||||
|
|
||||||
|
return returnVal;
|
||||||
|
} // btSoftBodySolverOutputCLtoGL::buildShaders
|
||||||
|
|
||||||
|
void btSoftBodySolverOutputCLtoGL::releaseKernels()
|
||||||
|
{
|
||||||
|
RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
|
||||||
|
RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
|
||||||
|
|
||||||
|
m_shadersInitialized = false;
|
||||||
|
} // btSoftBodySolverOutputCLtoGL::releaseKernels
|
||||||
|
|
||||||
|
bool btSoftBodySolverOutputCLtoGL::checkInitialized()
|
||||||
|
{
|
||||||
|
if( !m_shadersInitialized )
|
||||||
|
if( buildShaders() )
|
||||||
|
m_shadersInitialized = true;
|
||||||
|
|
||||||
|
return m_shadersInitialized;
|
||||||
|
}
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
|
||||||
|
#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
|
||||||
|
|
||||||
|
#include "btSoftBodySolver_OpenCL.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to manage movement of data from a solver to a given target.
|
||||||
|
* This version is the CL to GL interop version.
|
||||||
|
*/
|
||||||
|
class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
cl_command_queue m_cqCommandQue;
|
||||||
|
cl_context m_cxMainContext;
|
||||||
|
CLFunctions clFunctions;
|
||||||
|
|
||||||
|
cl_kernel outputToVertexArrayWithNormalsKernel;
|
||||||
|
cl_kernel outputToVertexArrayWithoutNormalsKernel;
|
||||||
|
|
||||||
|
bool m_shadersInitialized;
|
||||||
|
|
||||||
|
virtual bool checkInitialized();
|
||||||
|
virtual bool buildShaders();
|
||||||
|
void releaseKernels();
|
||||||
|
public:
|
||||||
|
btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
|
||||||
|
m_cqCommandQue( cqCommandQue ),
|
||||||
|
m_cxMainContext( cxMainContext ),
|
||||||
|
clFunctions(cqCommandQue, cxMainContext),
|
||||||
|
outputToVertexArrayWithNormalsKernel( 0 ),
|
||||||
|
outputToVertexArrayWithoutNormalsKernel( 0 ),
|
||||||
|
m_shadersInitialized( false )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~btSoftBodySolverOutputCLtoGL()
|
||||||
|
{
|
||||||
|
releaseKernels();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
|
||||||
|
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
|
||||||
@@ -0,0 +1,168 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
|
||||||
|
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
|
||||||
|
|
||||||
|
|
||||||
|
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
|
||||||
|
#ifdef USE_MINICL
|
||||||
|
#include "MiniCL/cl.h"
|
||||||
|
#else //USE_MINICL
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <OpenCL/OpenCL.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif //__APPLE__
|
||||||
|
#endif//USE_MINICL
|
||||||
|
#ifndef USE_MINICL
|
||||||
|
#include <CL/cl_gl.h>
|
||||||
|
#endif //USE_MINICL
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef _WIN32//for glut.h
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//think different
|
||||||
|
#if defined(__APPLE__) && !defined (VMDMESA)
|
||||||
|
#include <OpenGL/OpenGL.h>
|
||||||
|
#include <OpenGL/gl.h>
|
||||||
|
#include <OpenGL/glu.h>
|
||||||
|
#include <GLUT/glut.h>
|
||||||
|
#else
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef _WINDOWS
|
||||||
|
#include <windows.h>
|
||||||
|
#include <GL/gl.h>
|
||||||
|
#include <GL/glu.h>
|
||||||
|
#else
|
||||||
|
#include <GL/glut.h>
|
||||||
|
#endif //_WINDOWS
|
||||||
|
#endif //APPLE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
/** OpenCL context */
|
||||||
|
cl_context m_context;
|
||||||
|
|
||||||
|
/** OpenCL command queue */
|
||||||
|
cl_command_queue m_commandQueue;
|
||||||
|
|
||||||
|
/** OpenCL interop buffer */
|
||||||
|
cl_mem m_buffer;
|
||||||
|
|
||||||
|
/** VBO in GL that is the basis of the interop buffer */
|
||||||
|
GLuint m_openGLVBO;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* context is the OpenCL context this interop buffer will work in.
|
||||||
|
* queue is the command queue that kernels and data movement will be enqueued into.
|
||||||
|
* openGLVBO is the OpenGL vertex buffer data will be copied into.
|
||||||
|
* vertexOffset is the offset in floats to the first vertex.
|
||||||
|
* vertexStride is the stride in floats between vertices.
|
||||||
|
*/
|
||||||
|
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
|
||||||
|
{
|
||||||
|
#ifndef USE_MINICL
|
||||||
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
|
m_context = context;
|
||||||
|
m_commandQueue = cqCommandQue;
|
||||||
|
|
||||||
|
m_vertexOffset = vertexOffset;
|
||||||
|
m_vertexStride = vertexStride;
|
||||||
|
|
||||||
|
m_openGLVBO = openGLVBO;
|
||||||
|
|
||||||
|
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
|
||||||
|
if( ciErrNum != CL_SUCCESS )
|
||||||
|
{
|
||||||
|
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
|
||||||
|
}
|
||||||
|
|
||||||
|
m_hasVertexPositions = true;
|
||||||
|
#else
|
||||||
|
btAssert(0);//MiniCL shouldn't get here
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* context is the OpenCL context this interop buffer will work in.
|
||||||
|
* queue is the command queue that kernels and data movement will be enqueued into.
|
||||||
|
* openGLVBO is the OpenGL vertex buffer data will be copied into.
|
||||||
|
* vertexOffset is the offset in floats to the first vertex.
|
||||||
|
* vertexStride is the stride in floats between vertices.
|
||||||
|
* normalOffset is the offset in floats to the first normal.
|
||||||
|
* normalStride is the stride in floats between normals.
|
||||||
|
*/
|
||||||
|
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
|
||||||
|
{
|
||||||
|
#ifndef USE_MINICL
|
||||||
|
cl_int ciErrNum = CL_SUCCESS;
|
||||||
|
m_context = context;
|
||||||
|
m_commandQueue = cqCommandQue;
|
||||||
|
|
||||||
|
m_openGLVBO = openGLVBO;
|
||||||
|
|
||||||
|
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
|
||||||
|
if( ciErrNum != CL_SUCCESS )
|
||||||
|
{
|
||||||
|
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
|
||||||
|
}
|
||||||
|
|
||||||
|
m_vertexOffset = vertexOffset;
|
||||||
|
m_vertexStride = vertexStride;
|
||||||
|
m_hasVertexPositions = true;
|
||||||
|
|
||||||
|
m_normalOffset = normalOffset;
|
||||||
|
m_normalStride = normalStride;
|
||||||
|
m_hasNormals = true;
|
||||||
|
#else
|
||||||
|
btAssert(0);
|
||||||
|
#endif //USE_MINICL
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~btOpenGLInteropVertexBufferDescriptor()
|
||||||
|
{
|
||||||
|
clReleaseMemObject( m_buffer );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the type of the vertex buffer descriptor.
|
||||||
|
*/
|
||||||
|
virtual BufferTypes getBufferType() const
|
||||||
|
{
|
||||||
|
return OPENGL_BUFFER;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual cl_context getContext() const
|
||||||
|
{
|
||||||
|
return m_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual cl_mem getBuffer() const
|
||||||
|
{
|
||||||
|
return m_buffer;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
Bullet Continuous Collision Detection and Physics Library
|
||||||
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
|
||||||
|
#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
|
||||||
|
|
||||||
|
#include "stddef.h" //for size_t
|
||||||
|
#include "vectormath/vmInclude.h"
|
||||||
|
|
||||||
|
#include "btSoftBodySolver_OpenCL.h"
|
||||||
|
#include "btSoftBodySolverBuffer_OpenCL.h"
|
||||||
|
#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
|
||||||
|
#include "btSoftBodySolverVertexData_OpenCL.h"
|
||||||
|
#include "btSoftBodySolverTriangleData_OpenCL.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
|
||||||
|
|
||||||
|
btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
|
||||||
|
|
||||||
|
|
||||||
|
bool m_shadersInitialized;
|
||||||
|
|
||||||
|
|
||||||
|
bool buildShaders();
|
||||||
|
|
||||||
|
|
||||||
|
void updateConstants( float timeStep );
|
||||||
|
|
||||||
|
float computeTriangleArea(
|
||||||
|
const Vectormath::Aos::Point3 &vertex0,
|
||||||
|
const Vectormath::Aos::Point3 &vertex1,
|
||||||
|
const Vectormath::Aos::Point3 &vertex2 );
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
// Kernel dispatches
|
||||||
|
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
|
||||||
|
|
||||||
|
void solveCollisionsAndUpdateVelocities( float isolverdt );
|
||||||
|
// End kernel dispatches
|
||||||
|
/////////////////////////////////////
|
||||||
|
|
||||||
|
public:
|
||||||
|
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx);
|
||||||
|
|
||||||
|
virtual ~btOpenCLSoftBodySolverSIMDAware();
|
||||||
|
|
||||||
|
virtual SolverTypes getSolverType() const
|
||||||
|
{
|
||||||
|
return CL_SIMD_SOLVER;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
virtual btSoftBodyLinkData &getLinkData();
|
||||||
|
|
||||||
|
|
||||||
|
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
|
||||||
|
|
||||||
|
virtual void solveConstraints( float solverdt );
|
||||||
|
|
||||||
|
}; // btOpenCLSoftBodySolverSIMDAware
|
||||||
|
|
||||||
|
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
|
||||||
Reference in New Issue
Block a user