From 1a222f642ad1da94af02e0f95af5e04a91a9149e Mon Sep 17 00:00:00 2001 From: "erwin.coumans" Date: Tue, 15 Mar 2011 22:41:55 +0000 Subject: [PATCH] Stop using wavefront/warp synchronization inside OpenCL/DirectCompute, it is not future proof. Similar to this commit for DX11: http://code.google.com/p/bullet/source/detail?spec=svn2343&r=2330 See also http://forum.beyond3d.com/archive/index.php/t-55751.html --- .../OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl | 12 +----------- .../OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl | 12 +----------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl index 28255300e..9a45570ac 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl @@ -46,18 +46,8 @@ SolvePositionsFromLinksKernel( if( wavefront < (startWaveInBatch + numWaves) ) { // Load the batch counts for the wavefronts - // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier - if( laneInWavefront == 0 ) - { - int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; - wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront; - } - - mem_fence(CLK_LOCAL_MEM_FENCE); - - - int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront]; + int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; int verticesUsedByWave = batchesAndVerticesWithinWavefront.y; diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl index aaed72988..331634244 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl @@ -47,18 +47,8 @@ SolvePositionsFromLinksKernel( if( wavefront < (startWaveInBatch + numWaves) ) { // Load the batch counts for the wavefronts - // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier - if( laneInWavefront == 0 ) - { - int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; - wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - - int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront]; + int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;