added missing GPU cloth simulation files and DX11/OpenCL kernels

Thanks to Cameron Hart for the report, see Issue 486
This commit is contained in:
erwin.coumans
2011-02-28 05:29:54 +00:00
parent a522cb98d9
commit 8cb14e178e
19 changed files with 3496 additions and 0 deletions

View File

@@ -0,0 +1,83 @@
MSTRINGIFY(
cbuffer ComputeBoundsCB : register( b0 )
{
int numNodes;
int numSoftBodies;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
groupshared uint4 clothMinBounds[256];
groupshared uint4 clothMaxBounds[256];
[numthreads(128, 1, 1)]
void
ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const unsigned int UINT_MAX = 0xffffffff;
// Init min and max bounds arrays
if( GTid.x < numSoftBodies )
{
clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
clothMaxBounds[GTid.x] = uint4(0,0,0,0);
}
AllMemoryBarrierWithGroupSync();
int nodeID = DTid.x;
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float3 position = g_vertexPositions[nodeID].xyz;
// Reinterpret position as uint
uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
}
}
AllMemoryBarrierWithGroupSync();
// Use global atomics to update the global versions of the data
if( GTid.x < numSoftBodies )
{
InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);
InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
}
}
);

View File

@@ -0,0 +1,170 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
if( DTid.x < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
{
velocity = float3(15, 0, 0);
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
float4x4 worldTransform = shapeDescription.shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
}
);

View File

@@ -0,0 +1,191 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
// A buffer of local collision shapes
// TODO: Iterate to support more than 16
groupshared CollisionShapeDescription localCollisionShapes[16];
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( GTid.x < numObjects )
{
localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
AllMemoryBarrierWithGroupSync();
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
velocity = float3(0, 0, 0);
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
);

View File

@@ -0,0 +1,80 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_amd_printf : enable \n
__kernel void
ComputeBoundsKernel(
int numNodes,
int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_GLOBAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)];
if( clothIdentifier >= 0 )
{
float3 position = g_vertexPositions[get_global_id(0)].xyz;
/* Reinterpret position as uint */
uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z));
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
/* Min/max with the LDS values */
atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data*/
if( get_local_id(0) < numSoftBodies )
{
/*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);

View File

@@ -0,0 +1,57 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
cbuffer OutputToVertexArrayCB : register( b0 )
{
int startNode;
int numNodes;
int offsetX;
int strideX;
int offsetN;
int strideN;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_nodesx : register( t0 );
StructuredBuffer<float4> g_nodesn : register( t1 );
RWStructuredBuffer<float> g_vertexBuffer : register( u0 );
[numthreads(128, 1, 1)]
void
OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 nodeX = g_nodesx[nodeID + startNode];
float4 nodeN = g_nodesn[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * strideX + offsetX;
g_vertexBuffer[positionDestination] = nodeX.x;
g_vertexBuffer[positionDestination+1] = nodeX.y;
g_vertexBuffer[positionDestination+2] = nodeX.z;
int normalDestination = nodeID * strideN + offsetN;
g_vertexBuffer[normalDestination] = nodeN.x;
g_vertexBuffer[normalDestination+1] = nodeN.y;
g_vertexBuffer[normalDestination+2] = nodeN.z;
}
}

View File

@@ -0,0 +1,139 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
SolvePositionsFromLinksKernel(
const int startWaveInBatch,
const int numWaves,
const float kst,
const float ti,
__global int2 *g_wavefrontBatchCountsVertexCounts,
__global int *g_vertexAddressesPerWavefront,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions,
__local int2 *wavefrontBatchCountsVertexCounts,
__local float4 *vertexPositionSharedData,
__local float *vertexInverseMassSharedData)
{
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( laneInWavefront == 0 )
{
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
}
mem_fence(CLK_LOCAL_MEM_FENCE);
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
mem_fence(CLK_LOCAL_MEM_FENCE);
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float4 position0 = vertexPositionSharedData[vertexAddress0];
float4 position1 = vertexPositionSharedData[vertexAddress1];
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
mem_fence(CLK_LOCAL_MEM_FENCE);
vertexPositionSharedData[vertexAddress0] = position0;
vertexPositionSharedData[vertexAddress1] = position1;
// Ensure compiler does not re-order memory operations
mem_fence(CLK_LOCAL_MEM_FENCE);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
}
}
}
);

View File

@@ -0,0 +1,195 @@
MSTRINGIFY(
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
/* From btBroadphaseProxy.h */
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
/* Multiply column-major matrix against vector */
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions)
{
int nodeID = get_global_id(0);
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
{
velocity = (float3)(15, 0, 0);
/* We have some possible collisions to deal with */
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
/* Colliding with a capsule */
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
/* Four columns of worldTransform matrix */
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
/* compute distance of tangent to vertex along line segment in capsule */
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
/* Final distance from collision, point to push from, direction to push in
for impulse force */
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
float minDistance = capsuleRadius + capsuleMargin;
/* In case of no collision, this is the value of velocity */
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
/* Project back to surface along normal */
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
/* Full friction is sum of velocities in each direction of plane */
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
/* Real friction is peak friction corrected by friction coefficients */
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
} else {
/* Update velocity */
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
/* Update external force */
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
}
);

View File

@@ -0,0 +1,213 @@
MSTRINGIFY(
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; /* column major 4x4 matrix */
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
/* From btBroadphaseProxy.h */
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
/* Multiply column-major matrix against vector */
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions,
__local CollisionShapeDescription *localCollisionShapes)
{
int nodeID = get_global_id(0);
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
/* We have some possible collisions to deal with */
/* First load all of the collision objects into LDS */
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( get_local_id(0) < numObjects )
{
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
}
}
/* Safe as the vertices are padded so that not more than one soft body is in a group */
barrier(CLK_LOCAL_MEM_FENCE);
/* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */
if( numObjects > 0 )
{
velocity = (float3)(0, 0, 0);
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
/* Colliding with a capsule */
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
int capsuleupAxis = localCollisionShapes[collision].upAxis;
float4 worldTransform[4];
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
/* compute distance of tangent to vertex along line segment in capsule */
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
/* Final distance from collision, point to push from, direction to push in
for impulse force */
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
float minDistance = capsuleRadius + capsuleMargin;
/* In case of no collision, this is the value of velocity */
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
/* Check for a collision */
if( dist < minDistance )
{
/* Project back to surface along normal */
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
/* Full friction is sum of velocities in each direction of plane */
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
/* Real friction is peak friction corrected by friction coefficients */
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
} else {
/* Update velocity */
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
/* Update external force */
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
);

View File

@@ -0,0 +1,82 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
__kernel void
ComputeBoundsKernel(
const int numNodes,
const int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
/* Unfortunately, to get the atomics below to work these arrays cannot be */
/* uint4, though that is the layout of the data */
/* Therefore this is little-endian-only code */
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
/* Reinterpret position as uint */
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data */
if( get_local_id(0) < numSoftBodies )
{
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);

View File

@@ -0,0 +1,46 @@
MSTRINGIFY(
__kernel void
OutputToVertexArrayWithNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions,
const int normalOffset, const int normalStride, const __global float4* g_vertexNormals )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
int normalDestination = nodeID * normalStride + normalOffset;
g_vertexBuffer[normalDestination] = normal.x;
g_vertexBuffer[normalDestination+1] = normal.y;
g_vertexBuffer[normalDestination+2] = normal.z;
}
}
__kernel void
OutputToVertexArrayWithoutNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
}
}
);

View File

@@ -0,0 +1,140 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
void
SolvePositionsFromLinksKernel(
const int startWaveInBatch,
const int numWaves,
const float kst,
const float ti,
__global int2 *g_wavefrontBatchCountsVertexCounts,
__global int *g_vertexAddressesPerWavefront,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions,
__local int2 *wavefrontBatchCountsVertexCounts,
__local float4 *vertexPositionSharedData,
__local float *vertexInverseMassSharedData)
{
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( laneInWavefront == 0 )
{
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
}
barrier(CLK_LOCAL_MEM_FENCE);
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
barrier(CLK_LOCAL_MEM_FENCE);
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float4 position0 = vertexPositionSharedData[vertexAddress0];
float4 position1 = vertexPositionSharedData[vertexAddress1];
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
vertexPositionSharedData[vertexAddress0] = position0;
vertexPositionSharedData[vertexAddress1] = position1;
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
}
}
}
);

View File

@@ -0,0 +1,204 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_amd_printf : enable\n
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
// Four columns of worldTransform matrix
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3( (worldC1 - position), segment ) / mydot3(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
}
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = normalize(cross(normalVector, segment));
float4 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3(relativeVelocity, p1) + p2*mydot3(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
}
);

View File

@@ -0,0 +1,219 @@
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
/* Multiply column-major matrix against vector */
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions,
__local CollisionShapeDescription *localCollisionShapes)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
// Update velocity
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( get_local_id(0) < numObjects )
{
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
barrier(CLK_LOCAL_MEM_FENCE);
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
//CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = localCollisionShapes[collision].friction;
if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
int capsuleupAxis = localCollisionShapes[collision].upAxis;
float4 worldTransform[4];
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
//float4 c1 = (float4)(0.f, -capsuleHalfHeight, 0.f, 1.f);
//float4 c2 = (float4)(0.f, +capsuleHalfHeight, 0.f, 1.f);
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3( (worldC1 - position), segment ) / mydot3(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2);
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint);
}
float4 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity;
float4 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = (float4)(normalize(cross(normalVector, segment)).xyz, 0.f);
float4 p2 = (float4)(normalize(cross(p1, normalVector)).xyz, 0.f);
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3(relativeVelocity, p1) + p2*mydot3(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
);

View File

@@ -0,0 +1,169 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
cl_command_queue m_cqCommandQue;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btOpenCLBuffer< NumBatchesVerticesPair > m_clNumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btOpenCLBuffer<int> m_clWavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btOpenCLBuffer<LinkNodePair> m_clLinkVerticesLocalAddresses;
btOpenCLBuffer<float> m_clLinkStrength;
btOpenCLBuffer<float> m_clLinksMassLSC;
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt(
const LinkDescription &link,
int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H

View File

@@ -0,0 +1,133 @@
#include "btSoftBodySolverOutputCLtoGL.h"
#include <stdio.h> //@todo: remove the debugging printf at some stage
#include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "btSoftBodySolverVertexBuffer_OpenGL.h"
#include "BulletSoftBody/btSoftBody.h"
#if (0)//CL_VERSION_1_1 == 1)
//OpenCL 1.1 kernels use float3
#define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString =
#include "OpenCLC/OutputToVertexArray.cl"
#else
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl"
#endif //CL_VERSION_1_1
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
static const size_t workGroupSize = 128;
void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
{
btSoftBodySolver *solver = softBody->getSoftBodySolver();
btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
checkInitialized();
btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {
const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);
cl_int ciErrNum = CL_SUCCESS;
cl_mem clBuffer = openGLVertexBuffer->getBuffer();
cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
if( !vertexBuffer->hasNormals() )
outputKernel = outputToVertexArrayWithoutNormalsKernel;
ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
int numVertices = currentCloth->getNumVertices();
ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
if( vertexBuffer->hasVertexPositions() )
{
int vertexOffset = vertexBuffer->getVertexOffset();
int vertexStride = vertexBuffer->getVertexStride();
ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
}
if( vertexBuffer->hasNormals() )
{
int normalOffset = vertexBuffer->getNormalOffset();
int normalStride = vertexBuffer->getNormalStride();
ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
}
size_t numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
}
ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
}
} else {
btAssert( "Undefined output for this solver output" == false );
}
// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
clFinish(m_cqCommandQue);
} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
bool btSoftBodySolverOutputCLtoGL::buildShaders()
{
// Ensure current kernels are released first
releaseKernels();
bool returnVal = true;
if( m_shadersInitialized )
return true;
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" );
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" );
if( returnVal )
m_shadersInitialized = true;
return returnVal;
} // btSoftBodySolverOutputCLtoGL::buildShaders
void btSoftBodySolverOutputCLtoGL::releaseKernels()
{
RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
m_shadersInitialized = false;
} // btSoftBodySolverOutputCLtoGL::releaseKernels
bool btSoftBodySolverOutputCLtoGL::checkInitialized()
{
if( !m_shadersInitialized )
if( buildShaders() )
m_shadersInitialized = true;
return m_shadersInitialized;
}

View File

@@ -0,0 +1,62 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#include "btSoftBodySolver_OpenCL.h"
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to GL interop version.
*/
class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
CLFunctions clFunctions;
cl_kernel outputToVertexArrayWithNormalsKernel;
cl_kernel outputToVertexArrayWithoutNormalsKernel;
bool m_shadersInitialized;
virtual bool checkInitialized();
virtual bool buildShaders();
void releaseKernels();
public:
btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext ),
clFunctions(cqCommandQue, cxMainContext),
outputToVertexArrayWithNormalsKernel( 0 ),
outputToVertexArrayWithoutNormalsKernel( 0 ),
m_shadersInitialized( false )
{
}
virtual ~btSoftBodySolverOutputCLtoGL()
{
releaseKernels();
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H

View File

@@ -0,0 +1,168 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifndef USE_MINICL
#include <CL/cl_gl.h>
#endif //USE_MINICL
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
{
protected:
/** OpenCL context */
cl_context m_context;
/** OpenCL command queue */
cl_command_queue m_commandQueue;
/** OpenCL interop buffer */
cl_mem m_buffer;
/** VBO in GL that is the basis of the interop buffer */
GLuint m_openGLVBO;
public:
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_hasVertexPositions = true;
#else
btAssert(0);//MiniCL shouldn't get here
#endif
}
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
* normalOffset is the offset in floats to the first normal.
* normalStride is the stride in floats between normals.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
m_normalOffset = normalOffset;
m_normalStride = normalStride;
m_hasNormals = true;
#else
btAssert(0);
#endif //USE_MINICL
}
virtual ~btOpenGLInteropVertexBufferDescriptor()
{
clReleaseMemObject( m_buffer );
}
/**
* Return the type of the vertex buffer descriptor.
*/
virtual BufferTypes getBufferType() const
{
return OPENGL_BUFFER;
}
virtual cl_context getContext() const
{
return m_context;
}
virtual cl_mem getBuffer() const
{
return m_buffer;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H

View File

@@ -0,0 +1,82 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h"
#include "btSoftBodySolver_OpenCL.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
{
protected:
btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
bool m_shadersInitialized;
bool buildShaders();
void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
//////////////////////////////////////
// Kernel dispatches
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
public:
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx);
virtual ~btOpenCLSoftBodySolverSIMDAware();
virtual SolverTypes getSolverType() const
{
return CL_SIMD_SOLVER;
}
virtual btSoftBodyLinkData &getLinkData();
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void solveConstraints( float solverdt );
}; // btOpenCLSoftBodySolverSIMDAware
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H