Stop using wavefront/warp synchronization inside OpenCL/DirectCompute, it is not future proof.

Similar to this commit for DX11: http://code.google.com/p/bullet/source/detail?spec=svn2343&r=2330 See also http://forum.beyond3d.com/archive/index.php/t-55751.html
2011-03-15 22:41:55 +00:00
parent 21162f77e3
commit 1a222f642a
2 changed files with 2 additions and 22 deletions
--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl
@@ -46,18 +46,8 @@ SolvePositionsFromLinksKernel(
 	if( wavefront < (startWaveInBatch + numWaves) )
 	{	
 		// Load the batch counts for the wavefronts
-		// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
-		if( laneInWavefront == 0 )
-		{
-			int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
-			wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
-		}
-
 		
-		mem_fence(CLK_LOCAL_MEM_FENCE);
-		
-
-		int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
 		int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
 		int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;

--- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
+++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
@@ -47,18 +47,8 @@ SolvePositionsFromLinksKernel(
 	if( wavefront < (startWaveInBatch + numWaves) )
 	{	
 		// Load the batch counts for the wavefronts
-		// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
-		if( laneInWavefront == 0 )
-		{
-			int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
-			wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
-		}
-
 		
-		barrier(CLK_LOCAL_MEM_FENCE);
-		
-
-		int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
 		int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
 		int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;