From e83b5dac75c2cfe591cf87ea4aabe4cdf8ee30ed Mon Sep 17 00:00:00 2001 From: "erwin.coumans" Date: Wed, 3 Aug 2011 20:21:23 +0000 Subject: [PATCH] fixes in the DX11 cloth (for devices with physical wavefronts smaller than 32) another fix for out-of-bounds check (there was no assert for this previously) --- .../DX11/HLSL/SolvePositionsSIMDBatched.hlsl | 26 ++++++++++++++----- .../DX11/btSoftBodySolver_DX11SIMDAware.cpp | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl index a67758ff5..4834dc150 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl @@ -1,5 +1,7 @@ MSTRINGIFY( + + cbuffer SolvePositionsFromLinksKernelCB : register( b0 ) { int startWaveInBatch; @@ -41,16 +43,20 @@ SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchT const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER; const int localWavefront = wavefront - firstWavefrontInBlock; + int batchesWithinWavefront = 0; + int verticesUsedByWave = 0; + int cond = wavefront < (startWaveInBatch + numWaves); + // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier - if( wavefront < (startWaveInBatch + numWaves) ) + if( cond) { // Load the batch counts for the wavefronts int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; - int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; - int verticesUsedByWave = batchesAndVerticesWithinWavefront.y; + batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; + verticesUsedByWave = batchesAndVerticesWithinWavefront.y; // Load the vertices for the wavefronts for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) @@ -62,10 +68,13 @@ SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchT vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress]; } + } // Ensure compiler does not re-order memory operations - AllMemoryBarrier(); - + //AllMemoryBarrier(); + AllMemoryBarrierWithGroupSync (); + if( cond) + { // Loop through the batches performing the solve on each in LDS int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE; @@ -128,6 +137,11 @@ SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchT } } + } -); \ No newline at end of file + + + +); + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp index 8229d1a29..b74c8d248 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp @@ -620,7 +620,7 @@ static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectA mapOfVerticesInBatches.resize( batch + 1 ); // Resize maps with total number of vertices - mapOfVerticesInBatches[batch].resize( numVertices, false ); + mapOfVerticesInBatches[batch].resize( numVertices+1, false ); // Insert vertices into this batch too for( int link = 0; link < wavefront.size(); ++link )