added OpenCL cloth demo, contributed by AMD.

updated GpuSoftBodySolvers
updated DirectCompute cloth demo
This commit is contained in:
erwin.coumans
2010-08-14 00:56:17 +00:00
parent 40958f2b4a
commit 4f9b450200
72 changed files with 7524 additions and 843 deletions

View File

@@ -0,0 +1,128 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startWaveInBatch;
int numWaves;
float kst;
float ti;
};
// Number of batches per wavefront stored one element per logical wavefront
StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
// Set of up to maxNumVertices vertex addresses per wavefront
StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
StructuredBuffer<float> g_verticesInverseMass : register( t2 );
// Per-link data layed out structured in terms of sub batches within wavefronts
StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
StructuredBuffer<float> g_linksMassLSC : register( t4 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
// Data loaded on a per-wave basis
groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
// Storing the vertex addresses actually slowed things down a little
//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
[numthreads(BLOCK_SIZE, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( laneInWavefront == 0 )
{
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[firstWavefrontInBlock + localWavefront];
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
}
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
}
}
}
);