fix thread index going out of range bug
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
// Ogre (www.ogre3d.org).
|
||||
|
||||
#include "btQuickprof.h"
|
||||
|
||||
#include "btThreads.h"
|
||||
|
||||
|
||||
|
||||
@@ -685,6 +685,9 @@ void CProfileManager::dumpAll()
|
||||
|
||||
unsigned int btQuickprofGetCurrentThreadIndex2()
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
return btGetCurrentThreadIndex();
|
||||
#else // #if BT_THREADSAFE
|
||||
const unsigned int kNullIndex = ~0U;
|
||||
#ifdef _WIN32
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
@@ -717,6 +720,7 @@ unsigned int btQuickprofGetCurrentThreadIndex2()
|
||||
sThreadIndex = gThreadCounter++;
|
||||
}
|
||||
return sThreadIndex;
|
||||
#endif // #else // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
void btEnterProfileZoneDefault(const char* name)
|
||||
|
||||
@@ -17,26 +17,25 @@ subject to the following restrictions:
|
||||
#include "btQuickprof.h"
|
||||
#include <algorithm> // for min and max
|
||||
|
||||
#if BT_THREADSAFE
|
||||
|
||||
#if BT_USE_OPENMP
|
||||
#if BT_USE_OPENMP && BT_THREADSAFE
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#endif // #if BT_USE_OPENMP
|
||||
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
|
||||
|
||||
|
||||
#if BT_USE_PPL
|
||||
#if BT_USE_PPL && BT_THREADSAFE
|
||||
|
||||
// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
|
||||
#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
|
||||
// Visual Studio 2010 and later should come with it
|
||||
#include <concrtrm.h> // for GetProcessorCount()
|
||||
|
||||
#endif // #if BT_USE_PPL
|
||||
#endif // #if BT_USE_PPL && BT_THREADSAFE
|
||||
|
||||
|
||||
#if BT_USE_TBB
|
||||
#if BT_USE_TBB && BT_THREADSAFE
|
||||
|
||||
// use Intel Threading Building Blocks for thread management
|
||||
#define __TBB_NO_IMPLICIT_LINKAGE 1
|
||||
@@ -45,224 +44,10 @@ subject to the following restrictions:
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/blocked_range.h>
|
||||
|
||||
#endif // #if BT_USE_TBB
|
||||
|
||||
|
||||
static btITaskScheduler* gBtTaskScheduler;
|
||||
static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
|
||||
static btSpinMutex gThreadsRunningCounterMutex;
|
||||
|
||||
void btPushThreadsAreRunning()
|
||||
{
|
||||
gThreadsRunningCounterMutex.lock();
|
||||
gThreadsRunningCounter++;
|
||||
gThreadsRunningCounterMutex.unlock();
|
||||
}
|
||||
|
||||
void btPopThreadsAreRunning()
|
||||
{
|
||||
gThreadsRunningCounterMutex.lock();
|
||||
gThreadsRunningCounter--;
|
||||
gThreadsRunningCounterMutex.unlock();
|
||||
}
|
||||
|
||||
bool btThreadsAreRunning()
|
||||
{
|
||||
return gThreadsRunningCounter != 0;
|
||||
}
|
||||
|
||||
|
||||
void btSetTaskScheduler( btITaskScheduler* ts )
|
||||
{
|
||||
gBtTaskScheduler = ts;
|
||||
}
|
||||
|
||||
btITaskScheduler* btGetTaskScheduler()
|
||||
{
|
||||
return gBtTaskScheduler;
|
||||
}
|
||||
|
||||
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
|
||||
{
|
||||
gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
|
||||
}
|
||||
|
||||
|
||||
#if BT_USE_OPENMP
|
||||
///
|
||||
/// btTaskSchedulerOpenMP -- OpenMP task scheduler implementation
|
||||
///
|
||||
class btTaskSchedulerOpenMP : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
public:
|
||||
btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
}
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return omp_get_max_threads();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = ( std::max )( 1, numThreads );
|
||||
omp_set_num_threads( m_numThreads );
|
||||
}
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_OpenMP" );
|
||||
btPushThreadsAreRunning();
|
||||
#pragma omp parallel for schedule( static, 1 )
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
BT_PROFILE( "OpenMP_job" );
|
||||
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
|
||||
}
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_OPENMP
|
||||
|
||||
|
||||
#if BT_USE_TBB
|
||||
///
|
||||
/// btTaskSchedulerTBB -- task scheduler implemented via Intel Threaded Building Blocks
|
||||
///
|
||||
class btTaskSchedulerTBB : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
tbb::task_scheduler_init* m_tbbSchedulerInit;
|
||||
|
||||
public:
|
||||
btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
~btTaskSchedulerTBB()
|
||||
{
|
||||
if ( m_tbbSchedulerInit )
|
||||
{
|
||||
delete m_tbbSchedulerInit;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return tbb::task_scheduler_init::default_num_threads();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = ( std::max )( 1, numThreads );
|
||||
if ( m_tbbSchedulerInit )
|
||||
{
|
||||
delete m_tbbSchedulerInit;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
|
||||
}
|
||||
struct BodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
|
||||
void operator()( const tbb::blocked_range<int>& range ) const
|
||||
{
|
||||
BT_PROFILE( "TBB_job" );
|
||||
mBody->forLoop( range.begin(), range.end() );
|
||||
}
|
||||
};
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_TBB" );
|
||||
// TBB dispatch
|
||||
BodyAdapter tbbBody;
|
||||
tbbBody.mBody = &body;
|
||||
btPushThreadsAreRunning();
|
||||
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
|
||||
tbbBody,
|
||||
tbb::simple_partitioner()
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_TBB
|
||||
|
||||
#if BT_USE_PPL
|
||||
///
|
||||
/// btTaskSchedulerPPL -- task scheduler implemented via Microsoft Parallel Patterns Lib
|
||||
///
|
||||
class btTaskSchedulerPPL : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
public:
|
||||
btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
}
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return concurrency::GetProcessorCount();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = ( std::max )( 1, numThreads );
|
||||
using namespace concurrency;
|
||||
if ( CurrentScheduler::Id() != -1 )
|
||||
{
|
||||
CurrentScheduler::Detach();
|
||||
}
|
||||
SchedulerPolicy policy;
|
||||
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
|
||||
CurrentScheduler::Create( policy );
|
||||
}
|
||||
struct BodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
int mGrainSize;
|
||||
int mIndexEnd;
|
||||
|
||||
void operator()( int i ) const
|
||||
{
|
||||
BT_PROFILE( "PPL_job" );
|
||||
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
|
||||
}
|
||||
};
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_PPL" );
|
||||
// PPL dispatch
|
||||
BodyAdapter pplBody;
|
||||
pplBody.mBody = &body;
|
||||
pplBody.mGrainSize = grainSize;
|
||||
pplBody.mIndexEnd = iEnd;
|
||||
btPushThreadsAreRunning();
|
||||
// note: MSVC 2010 doesn't support partitioner args, so avoid them
|
||||
concurrency::parallel_for( iBegin,
|
||||
iEnd,
|
||||
grainSize,
|
||||
pplBody
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_PPL
|
||||
|
||||
#endif // #if BT_USE_TBB && BT_THREADSAFE
|
||||
|
||||
|
||||
#if BT_THREADSAFE
|
||||
//
|
||||
// Lightweight spin-mutex based on atomics
|
||||
// Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
|
||||
@@ -415,28 +200,107 @@ void btSpinMutex::unlock()
|
||||
|
||||
#endif //#else //#elif USE_MSVC_INTRINSICS
|
||||
|
||||
#else //#if BT_THREADSAFE
|
||||
|
||||
// These should not be called ever
|
||||
void btSpinMutex::lock()
|
||||
{
|
||||
btAssert( !"unimplemented btSpinMutex::lock() called" );
|
||||
}
|
||||
|
||||
void btSpinMutex::unlock()
|
||||
{
|
||||
btAssert( !"unimplemented btSpinMutex::unlock() called" );
|
||||
}
|
||||
|
||||
bool btSpinMutex::tryLock()
|
||||
{
|
||||
btAssert( !"unimplemented btSpinMutex::tryLock() called" );
|
||||
return true;
|
||||
}
|
||||
|
||||
#define THREAD_LOCAL_STATIC static
|
||||
|
||||
#endif // #else //#if BT_THREADSAFE
|
||||
|
||||
|
||||
struct ThreadsafeCounter
|
||||
{
|
||||
unsigned int mCounter;
|
||||
btSpinMutex mMutex;
|
||||
|
||||
ThreadsafeCounter() {mCounter=0;}
|
||||
ThreadsafeCounter()
|
||||
{
|
||||
mCounter = 0;
|
||||
--mCounter; // first count should come back 0
|
||||
}
|
||||
|
||||
unsigned int getNext()
|
||||
{
|
||||
// no need to optimize this with atomics, it is only called ONCE per thread!
|
||||
mMutex.lock();
|
||||
unsigned int val = mCounter++;
|
||||
mCounter++;
|
||||
if ( mCounter >= BT_MAX_THREAD_COUNT )
|
||||
{
|
||||
btAssert( !"thread counter exceeded" );
|
||||
// wrap back to the first worker index
|
||||
mCounter = 1;
|
||||
}
|
||||
unsigned int val = mCounter;
|
||||
mMutex.unlock();
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static btITaskScheduler* gBtTaskScheduler;
|
||||
static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
|
||||
static btSpinMutex gThreadsRunningCounterMutex;
|
||||
static ThreadsafeCounter gThreadCounter;
|
||||
|
||||
|
||||
// return a unique index per thread, starting with 0 and counting up
|
||||
//
|
||||
// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
|
||||
//
|
||||
// BT_DETECT_BAD_THREAD_INDEX is a developer option to test if
|
||||
// certain assumptions about how the task scheduler manages its threads
|
||||
// holds true.
|
||||
// The main assumption is:
|
||||
// - when the threadpool is resized, the task scheduler either
|
||||
// 1. destroys all worker threads and creates all new ones in the correct number, OR
|
||||
// 2. never destroys a worker thread
|
||||
//
|
||||
// We make that assumption because we can't easily enumerate the worker threads of a task scheduler
|
||||
// to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed,
|
||||
// so we can't tell when a thread-index is no longer being used.
|
||||
// We allocate thread-indexes as needed with a sequential global thread counter.
|
||||
//
|
||||
// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
|
||||
// continues to re-use other threads and the application repeatedly resizes the thread pool of the
|
||||
// task scheduler.
|
||||
// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
|
||||
// wrap the thread counter back to 1. This should only happen if the worker threads have all been
|
||||
// destroyed and re-created.
|
||||
//
|
||||
// BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now,
|
||||
// but could be adapted to work with pthreads
|
||||
#define BT_DETECT_BAD_THREAD_INDEX 0
|
||||
|
||||
#if BT_DETECT_BAD_THREAD_INDEX
|
||||
|
||||
typedef DWORD ThreadId_t;
|
||||
const static ThreadId_t kInvalidThreadId = 0;
|
||||
ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
|
||||
|
||||
static ThreadId_t getDebugThreadId()
|
||||
{
|
||||
return GetCurrentThreadId();
|
||||
}
|
||||
|
||||
#endif // #if BT_DETECT_BAD_THREAD_INDEX
|
||||
|
||||
|
||||
// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
|
||||
unsigned int btGetCurrentThreadIndex()
|
||||
{
|
||||
const unsigned int kNullIndex = ~0U;
|
||||
@@ -444,7 +308,30 @@ unsigned int btGetCurrentThreadIndex()
|
||||
if ( sThreadIndex == kNullIndex )
|
||||
{
|
||||
sThreadIndex = gThreadCounter.getNext();
|
||||
btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
|
||||
}
|
||||
#if BT_DETECT_BAD_THREAD_INDEX
|
||||
if ( gBtTaskScheduler && sThreadIndex > 0 )
|
||||
{
|
||||
ThreadId_t tid = getDebugThreadId();
|
||||
// if not set
|
||||
if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
|
||||
{
|
||||
// set it
|
||||
gDebugThreadIds[ sThreadIndex ] = tid;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( gDebugThreadIds[ sThreadIndex ] != tid )
|
||||
{
|
||||
// this could indicate the task scheduler is breaking our assumptions about
|
||||
// how threads are managed when threadpool is resized
|
||||
btAssert( !"there are 2 or more threads with the same thread-index!" );
|
||||
__debugbreak();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // #if BT_DETECT_BAD_THREAD_INDEX
|
||||
return sThreadIndex;
|
||||
}
|
||||
|
||||
@@ -453,38 +340,123 @@ bool btIsMainThread()
|
||||
return btGetCurrentThreadIndex() == 0;
|
||||
}
|
||||
|
||||
#else // #if BT_THREADSAFE
|
||||
|
||||
// These should not be called ever
|
||||
void btSpinMutex::lock()
|
||||
void btResetThreadIndexCounter()
|
||||
{
|
||||
btAssert(!"unimplemented btSpinMutex::lock() called");
|
||||
// for when all current worker threads are destroyed
|
||||
btAssert( btIsMainThread() );
|
||||
gThreadCounter.mCounter = 0;
|
||||
}
|
||||
|
||||
void btSpinMutex::unlock()
|
||||
btITaskScheduler::btITaskScheduler( const char* name )
|
||||
{
|
||||
btAssert(!"unimplemented btSpinMutex::unlock() called");
|
||||
m_name = name;
|
||||
m_savedThreadCounter = 0;
|
||||
m_isActive = false;
|
||||
}
|
||||
|
||||
bool btSpinMutex::tryLock()
|
||||
void btITaskScheduler::activate()
|
||||
{
|
||||
btAssert(!"unimplemented btSpinMutex::tryLock() called");
|
||||
return true;
|
||||
// gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
|
||||
// The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
|
||||
// The thread-indexes need to be unique amongst the threads that can be running simultaneously.
|
||||
// Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
|
||||
// task schedulers to share the same thread index because they can't be running at the same time.
|
||||
// So each task scheduler needs to keep its own thread counter value
|
||||
if ( !m_isActive )
|
||||
{
|
||||
gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter
|
||||
m_isActive = true;
|
||||
}
|
||||
}
|
||||
|
||||
// non-parallel version of btParallelFor
|
||||
void btITaskScheduler::deactivate()
|
||||
{
|
||||
if ( m_isActive )
|
||||
{
|
||||
m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter
|
||||
m_isActive = false;
|
||||
}
|
||||
}
|
||||
|
||||
void btPushThreadsAreRunning()
|
||||
{
|
||||
gThreadsRunningCounterMutex.lock();
|
||||
gThreadsRunningCounter++;
|
||||
gThreadsRunningCounterMutex.unlock();
|
||||
}
|
||||
|
||||
void btPopThreadsAreRunning()
|
||||
{
|
||||
gThreadsRunningCounterMutex.lock();
|
||||
gThreadsRunningCounter--;
|
||||
gThreadsRunningCounterMutex.unlock();
|
||||
}
|
||||
|
||||
bool btThreadsAreRunning()
|
||||
{
|
||||
return gThreadsRunningCounter != 0;
|
||||
}
|
||||
|
||||
|
||||
void btSetTaskScheduler( btITaskScheduler* ts )
|
||||
{
|
||||
int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run
|
||||
if ( threadId != 0 )
|
||||
{
|
||||
btAssert( !"btSetTaskScheduler must be called from the main thread!" );
|
||||
return;
|
||||
}
|
||||
if ( gBtTaskScheduler )
|
||||
{
|
||||
// deactivate old task scheduler
|
||||
gBtTaskScheduler->deactivate();
|
||||
}
|
||||
gBtTaskScheduler = ts;
|
||||
if ( ts )
|
||||
{
|
||||
// activate new task scheduler
|
||||
ts->activate();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
btITaskScheduler* btGetTaskScheduler()
|
||||
{
|
||||
return gBtTaskScheduler;
|
||||
}
|
||||
|
||||
|
||||
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
|
||||
{
|
||||
btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
|
||||
body.forLoop( iBegin, iEnd );
|
||||
}
|
||||
#if BT_THREADSAFE
|
||||
|
||||
#endif // #if BT_THREADSAFE
|
||||
#if BT_DETECT_BAD_THREAD_INDEX
|
||||
if ( !btThreadsAreRunning() )
|
||||
{
|
||||
// clear out thread ids
|
||||
for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
|
||||
{
|
||||
gDebugThreadIds[ i ] = kInvalidThreadId;
|
||||
}
|
||||
}
|
||||
#endif // #if BT_DETECT_BAD_THREAD_INDEX
|
||||
|
||||
btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
|
||||
gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
|
||||
|
||||
#else // #if BT_THREADSAFE
|
||||
|
||||
// non-parallel version of btParallelFor
|
||||
btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
|
||||
body.forLoop( iBegin, iEnd );
|
||||
|
||||
#endif #else // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
|
||||
/// (fallback in case no multi-threaded schedulers are available)
|
||||
/// (really just useful for testing performance of single threaded vs multi)
|
||||
///
|
||||
class btTaskSchedulerSequential : public btITaskScheduler
|
||||
{
|
||||
@@ -500,6 +472,211 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#if BT_USE_OPENMP && BT_THREADSAFE
|
||||
///
|
||||
/// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler
|
||||
///
|
||||
class btTaskSchedulerOpenMP : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
public:
|
||||
btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
}
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return omp_get_max_threads();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
// With OpenMP, because it is a standard with various implementations, we can't
|
||||
// know for sure if every implementation has the same behavior of destroying all
|
||||
// previous threads when resizing the threadpool
|
||||
m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
|
||||
omp_set_num_threads( 1 ); // hopefully, all previous threads get destroyed here
|
||||
omp_set_num_threads( m_numThreads );
|
||||
m_savedThreadCounter = 0;
|
||||
if ( m_isActive )
|
||||
{
|
||||
btResetThreadIndexCounter();
|
||||
}
|
||||
}
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_OpenMP" );
|
||||
btPushThreadsAreRunning();
|
||||
#pragma omp parallel for schedule( static, 1 )
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
BT_PROFILE( "OpenMP_job" );
|
||||
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
|
||||
}
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
|
||||
|
||||
|
||||
#if BT_USE_TBB && BT_THREADSAFE
|
||||
///
|
||||
/// btTaskSchedulerTBB -- wrapper around Intel Threaded Building Blocks task scheduler
|
||||
///
|
||||
class btTaskSchedulerTBB : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
tbb::task_scheduler_init* m_tbbSchedulerInit;
|
||||
|
||||
public:
|
||||
btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
~btTaskSchedulerTBB()
|
||||
{
|
||||
if ( m_tbbSchedulerInit )
|
||||
{
|
||||
delete m_tbbSchedulerInit;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return tbb::task_scheduler_init::default_num_threads();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
|
||||
if ( m_tbbSchedulerInit )
|
||||
{
|
||||
// destroys all previous threads
|
||||
delete m_tbbSchedulerInit;
|
||||
m_tbbSchedulerInit = NULL;
|
||||
}
|
||||
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
|
||||
m_savedThreadCounter = 0;
|
||||
if ( m_isActive )
|
||||
{
|
||||
btResetThreadIndexCounter();
|
||||
}
|
||||
}
|
||||
struct BodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
|
||||
void operator()( const tbb::blocked_range<int>& range ) const
|
||||
{
|
||||
BT_PROFILE( "TBB_job" );
|
||||
mBody->forLoop( range.begin(), range.end() );
|
||||
}
|
||||
};
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_TBB" );
|
||||
// TBB dispatch
|
||||
BodyAdapter tbbBody;
|
||||
tbbBody.mBody = &body;
|
||||
btPushThreadsAreRunning();
|
||||
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
|
||||
tbbBody,
|
||||
tbb::simple_partitioner()
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_TBB && BT_THREADSAFE
|
||||
|
||||
|
||||
#if BT_USE_PPL && BT_THREADSAFE
|
||||
///
|
||||
/// btTaskSchedulerPPL -- wrapper around Microsoft Parallel Patterns Lib task scheduler
|
||||
///
|
||||
class btTaskSchedulerPPL : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
public:
|
||||
btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
|
||||
{
|
||||
m_numThreads = 0;
|
||||
}
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return concurrency::GetProcessorCount();
|
||||
}
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
// capping the thread count for PPL due to a thread-index issue
|
||||
const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
|
||||
m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
|
||||
using namespace concurrency;
|
||||
if ( CurrentScheduler::Id() != -1 )
|
||||
{
|
||||
CurrentScheduler::Detach();
|
||||
}
|
||||
SchedulerPolicy policy;
|
||||
{
|
||||
// PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
|
||||
// force it to destroy old threads
|
||||
policy.SetConcurrencyLimits( 1, 1 );
|
||||
CurrentScheduler::Create( policy );
|
||||
CurrentScheduler::Detach();
|
||||
}
|
||||
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
|
||||
CurrentScheduler::Create( policy );
|
||||
m_savedThreadCounter = 0;
|
||||
if ( m_isActive )
|
||||
{
|
||||
btResetThreadIndexCounter();
|
||||
}
|
||||
}
|
||||
struct BodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
int mGrainSize;
|
||||
int mIndexEnd;
|
||||
|
||||
void operator()( int i ) const
|
||||
{
|
||||
BT_PROFILE( "PPL_job" );
|
||||
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
|
||||
}
|
||||
};
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_PPL" );
|
||||
// PPL dispatch
|
||||
BodyAdapter pplBody;
|
||||
pplBody.mBody = &body;
|
||||
pplBody.mGrainSize = grainSize;
|
||||
pplBody.mIndexEnd = iEnd;
|
||||
btPushThreadsAreRunning();
|
||||
// note: MSVC 2010 doesn't support partitioner args, so avoid them
|
||||
concurrency::parallel_for( iBegin,
|
||||
iEnd,
|
||||
grainSize,
|
||||
pplBody
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_PPL && BT_THREADSAFE
|
||||
|
||||
|
||||
// create a non-threaded task scheduler (always available)
|
||||
btITaskScheduler* btGetSequentialTaskScheduler()
|
||||
{
|
||||
|
||||
@@ -28,6 +28,14 @@ subject to the following restrictions:
|
||||
#define BT_OVERRIDE
|
||||
#endif
|
||||
|
||||
const unsigned int BT_MAX_THREAD_COUNT = 64; // only if BT_THREADSAFE is 1
|
||||
|
||||
// for internal use only
|
||||
bool btIsMainThread();
|
||||
bool btThreadsAreRunning();
|
||||
unsigned int btGetCurrentThreadIndex();
|
||||
void btResetThreadIndexCounter(); // notify that all worker threads have been destroyed
|
||||
|
||||
///
|
||||
/// btSpinMutex -- lightweight spin-mutex implemented with atomic ops, never puts
|
||||
/// a thread to sleep because it is designed to be used with a task scheduler
|
||||
@@ -48,38 +56,41 @@ public:
|
||||
bool tryLock();
|
||||
};
|
||||
|
||||
#if BT_THREADSAFE
|
||||
|
||||
// for internal Bullet use only
|
||||
//
|
||||
// NOTE: btMutex* is for internal Bullet use only
|
||||
//
|
||||
// If BT_THREADSAFE is undefined or 0, should optimize away to nothing.
|
||||
// This is good because for the single-threaded build of Bullet, any calls
|
||||
// to these functions will be optimized out.
|
||||
//
|
||||
// However, for users of the multi-threaded build of Bullet this is kind
|
||||
// of bad because if you call any of these functions from external code
|
||||
// (where BT_THREADSAFE is undefined) you will get unexpected race conditions.
|
||||
//
|
||||
SIMD_FORCE_INLINE void btMutexLock( btSpinMutex* mutex )
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
mutex->lock();
|
||||
#endif // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* mutex )
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
mutex->unlock();
|
||||
#endif // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* mutex )
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
return mutex->tryLock();
|
||||
#else
|
||||
return true;
|
||||
#endif // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
// for internal use only
|
||||
bool btIsMainThread();
|
||||
bool btThreadsAreRunning();
|
||||
unsigned int btGetCurrentThreadIndex();
|
||||
const unsigned int BT_MAX_THREAD_COUNT = 64;
|
||||
|
||||
#else
|
||||
|
||||
// for internal Bullet use only
|
||||
// if BT_THREADSAFE is undefined or 0, should optimize away to nothing
|
||||
SIMD_FORCE_INLINE void btMutexLock( btSpinMutex* ) {}
|
||||
SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* ) {}
|
||||
SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* ) {return true;}
|
||||
#endif
|
||||
|
||||
//
|
||||
// btIParallelForBody -- subclass this to express work that can be done in parallel
|
||||
@@ -96,16 +107,24 @@ public:
|
||||
//
|
||||
class btITaskScheduler
|
||||
{
|
||||
const char* m_name;
|
||||
public:
|
||||
btITaskScheduler( const char* name ) : m_name( name ) {}
|
||||
btITaskScheduler( const char* name );
|
||||
virtual ~btITaskScheduler() {}
|
||||
const char* getName() const { return m_name; }
|
||||
|
||||
virtual ~btITaskScheduler() {}
|
||||
virtual int getMaxNumThreads() const = 0;
|
||||
virtual int getNumThreads() const = 0;
|
||||
virtual void setNumThreads( int numThreads ) = 0;
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0;
|
||||
|
||||
// internal use only
|
||||
virtual void activate();
|
||||
virtual void deactivate();
|
||||
|
||||
protected:
|
||||
const char* m_name;
|
||||
unsigned int m_savedThreadCounter;
|
||||
bool m_isActive;
|
||||
};
|
||||
|
||||
// set the task scheduler to use for all calls to btParallelFor()
|
||||
|
||||
Reference in New Issue
Block a user