diff --git a/src/LinearMath/btQuickprof.cpp b/src/LinearMath/btQuickprof.cpp index c690b57c8..aed3104a6 100644 --- a/src/LinearMath/btQuickprof.cpp +++ b/src/LinearMath/btQuickprof.cpp @@ -14,7 +14,7 @@ // Ogre (www.ogre3d.org). #include "btQuickprof.h" - +#include "btThreads.h" @@ -685,6 +685,9 @@ void CProfileManager::dumpAll() unsigned int btQuickprofGetCurrentThreadIndex2() { +#if BT_THREADSAFE + return btGetCurrentThreadIndex(); +#else // #if BT_THREADSAFE const unsigned int kNullIndex = ~0U; #ifdef _WIN32 #if defined(__MINGW32__) || defined(__MINGW64__) @@ -717,6 +720,7 @@ unsigned int btQuickprofGetCurrentThreadIndex2() sThreadIndex = gThreadCounter++; } return sThreadIndex; +#endif // #else // #if BT_THREADSAFE } void btEnterProfileZoneDefault(const char* name) diff --git a/src/LinearMath/btThreads.cpp b/src/LinearMath/btThreads.cpp index 876bc4bb1..89f157c39 100644 --- a/src/LinearMath/btThreads.cpp +++ b/src/LinearMath/btThreads.cpp @@ -17,26 +17,25 @@ subject to the following restrictions: #include "btQuickprof.h" #include // for min and max -#if BT_THREADSAFE -#if BT_USE_OPENMP +#if BT_USE_OPENMP && BT_THREADSAFE #include -#endif // #if BT_USE_OPENMP +#endif // #if BT_USE_OPENMP && BT_THREADSAFE -#if BT_USE_PPL +#if BT_USE_PPL && BT_THREADSAFE // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later) #include // if you get a compile error here, check whether your version of Visual Studio includes PPL // Visual Studio 2010 and later should come with it #include // for GetProcessorCount() -#endif // #if BT_USE_PPL +#endif // #if BT_USE_PPL && BT_THREADSAFE -#if BT_USE_TBB +#if BT_USE_TBB && BT_THREADSAFE // use Intel Threading Building Blocks for thread management #define __TBB_NO_IMPLICIT_LINKAGE 1 @@ -45,224 +44,10 @@ subject to the following restrictions: #include #include -#endif // #if BT_USE_TBB - - -static btITaskScheduler* gBtTaskScheduler; -static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls -static btSpinMutex gThreadsRunningCounterMutex; - -void btPushThreadsAreRunning() -{ - gThreadsRunningCounterMutex.lock(); - gThreadsRunningCounter++; - gThreadsRunningCounterMutex.unlock(); -} - -void btPopThreadsAreRunning() -{ - gThreadsRunningCounterMutex.lock(); - gThreadsRunningCounter--; - gThreadsRunningCounterMutex.unlock(); -} - -bool btThreadsAreRunning() -{ - return gThreadsRunningCounter != 0; -} - - -void btSetTaskScheduler( btITaskScheduler* ts ) -{ - gBtTaskScheduler = ts; -} - -btITaskScheduler* btGetTaskScheduler() -{ - return gBtTaskScheduler; -} - -void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) -{ - gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body ); -} - - -#if BT_USE_OPENMP -/// -/// btTaskSchedulerOpenMP -- OpenMP task scheduler implementation -/// -class btTaskSchedulerOpenMP : public btITaskScheduler -{ - int m_numThreads; -public: - btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" ) - { - m_numThreads = 0; - } - virtual int getMaxNumThreads() const BT_OVERRIDE - { - return omp_get_max_threads(); - } - virtual int getNumThreads() const BT_OVERRIDE - { - return m_numThreads; - } - virtual void setNumThreads( int numThreads ) BT_OVERRIDE - { - m_numThreads = ( std::max )( 1, numThreads ); - omp_set_num_threads( m_numThreads ); - } - virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE - { - BT_PROFILE( "parallelFor_OpenMP" ); - btPushThreadsAreRunning(); -#pragma omp parallel for schedule( static, 1 ) - for ( int i = iBegin; i < iEnd; i += grainSize ) - { - BT_PROFILE( "OpenMP_job" ); - body.forLoop( i, ( std::min )( i + grainSize, iEnd ) ); - } - btPopThreadsAreRunning(); - } -}; -#endif // #if BT_USE_OPENMP - - -#if BT_USE_TBB -/// -/// btTaskSchedulerTBB -- task scheduler implemented via Intel Threaded Building Blocks -/// -class btTaskSchedulerTBB : public btITaskScheduler -{ - int m_numThreads; - tbb::task_scheduler_init* m_tbbSchedulerInit; - -public: - btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" ) - { - m_numThreads = 0; - m_tbbSchedulerInit = NULL; - } - ~btTaskSchedulerTBB() - { - if ( m_tbbSchedulerInit ) - { - delete m_tbbSchedulerInit; - m_tbbSchedulerInit = NULL; - } - } - - virtual int getMaxNumThreads() const BT_OVERRIDE - { - return tbb::task_scheduler_init::default_num_threads(); - } - virtual int getNumThreads() const BT_OVERRIDE - { - return m_numThreads; - } - virtual void setNumThreads( int numThreads ) BT_OVERRIDE - { - m_numThreads = ( std::max )( 1, numThreads ); - if ( m_tbbSchedulerInit ) - { - delete m_tbbSchedulerInit; - m_tbbSchedulerInit = NULL; - } - m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads ); - } - struct BodyAdapter - { - const btIParallelForBody* mBody; - - void operator()( const tbb::blocked_range& range ) const - { - BT_PROFILE( "TBB_job" ); - mBody->forLoop( range.begin(), range.end() ); - } - }; - virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE - { - BT_PROFILE( "parallelFor_TBB" ); - // TBB dispatch - BodyAdapter tbbBody; - tbbBody.mBody = &body; - btPushThreadsAreRunning(); - tbb::parallel_for( tbb::blocked_range( iBegin, iEnd, grainSize ), - tbbBody, - tbb::simple_partitioner() - ); - btPopThreadsAreRunning(); - } -}; -#endif // #if BT_USE_TBB - -#if BT_USE_PPL -/// -/// btTaskSchedulerPPL -- task scheduler implemented via Microsoft Parallel Patterns Lib -/// -class btTaskSchedulerPPL : public btITaskScheduler -{ - int m_numThreads; -public: - btTaskSchedulerPPL() : btITaskScheduler( "PPL" ) - { - m_numThreads = 0; - } - virtual int getMaxNumThreads() const BT_OVERRIDE - { - return concurrency::GetProcessorCount(); - } - virtual int getNumThreads() const BT_OVERRIDE - { - return m_numThreads; - } - virtual void setNumThreads( int numThreads ) BT_OVERRIDE - { - m_numThreads = ( std::max )( 1, numThreads ); - using namespace concurrency; - if ( CurrentScheduler::Id() != -1 ) - { - CurrentScheduler::Detach(); - } - SchedulerPolicy policy; - policy.SetConcurrencyLimits( m_numThreads, m_numThreads ); - CurrentScheduler::Create( policy ); - } - struct BodyAdapter - { - const btIParallelForBody* mBody; - int mGrainSize; - int mIndexEnd; - - void operator()( int i ) const - { - BT_PROFILE( "PPL_job" ); - mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) ); - } - }; - virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE - { - BT_PROFILE( "parallelFor_PPL" ); - // PPL dispatch - BodyAdapter pplBody; - pplBody.mBody = &body; - pplBody.mGrainSize = grainSize; - pplBody.mIndexEnd = iEnd; - btPushThreadsAreRunning(); - // note: MSVC 2010 doesn't support partitioner args, so avoid them - concurrency::parallel_for( iBegin, - iEnd, - grainSize, - pplBody - ); - btPopThreadsAreRunning(); - } -}; -#endif // #if BT_USE_PPL - +#endif // #if BT_USE_TBB && BT_THREADSAFE +#if BT_THREADSAFE // // Lightweight spin-mutex based on atomics // Using ordinary system-provided mutexes like Windows critical sections was noticeably slower @@ -415,28 +200,107 @@ void btSpinMutex::unlock() #endif //#else //#elif USE_MSVC_INTRINSICS +#else //#if BT_THREADSAFE + +// These should not be called ever +void btSpinMutex::lock() +{ + btAssert( !"unimplemented btSpinMutex::lock() called" ); +} + +void btSpinMutex::unlock() +{ + btAssert( !"unimplemented btSpinMutex::unlock() called" ); +} + +bool btSpinMutex::tryLock() +{ + btAssert( !"unimplemented btSpinMutex::tryLock() called" ); + return true; +} + +#define THREAD_LOCAL_STATIC static + +#endif // #else //#if BT_THREADSAFE + struct ThreadsafeCounter { unsigned int mCounter; btSpinMutex mMutex; - ThreadsafeCounter() {mCounter=0;} + ThreadsafeCounter() + { + mCounter = 0; + --mCounter; // first count should come back 0 + } unsigned int getNext() { // no need to optimize this with atomics, it is only called ONCE per thread! mMutex.lock(); - unsigned int val = mCounter++; + mCounter++; + if ( mCounter >= BT_MAX_THREAD_COUNT ) + { + btAssert( !"thread counter exceeded" ); + // wrap back to the first worker index + mCounter = 1; + } + unsigned int val = mCounter; mMutex.unlock(); return val; } }; + +static btITaskScheduler* gBtTaskScheduler; +static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls +static btSpinMutex gThreadsRunningCounterMutex; static ThreadsafeCounter gThreadCounter; -// return a unique index per thread, starting with 0 and counting up +// +// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index. +// +// BT_DETECT_BAD_THREAD_INDEX is a developer option to test if +// certain assumptions about how the task scheduler manages its threads +// holds true. +// The main assumption is: +// - when the threadpool is resized, the task scheduler either +// 1. destroys all worker threads and creates all new ones in the correct number, OR +// 2. never destroys a worker thread +// +// We make that assumption because we can't easily enumerate the worker threads of a task scheduler +// to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed, +// so we can't tell when a thread-index is no longer being used. +// We allocate thread-indexes as needed with a sequential global thread counter. +// +// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but +// continues to re-use other threads and the application repeatedly resizes the thread pool of the +// task scheduler. +// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we +// wrap the thread counter back to 1. This should only happen if the worker threads have all been +// destroyed and re-created. +// +// BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now, +// but could be adapted to work with pthreads +#define BT_DETECT_BAD_THREAD_INDEX 0 + +#if BT_DETECT_BAD_THREAD_INDEX + +typedef DWORD ThreadId_t; +const static ThreadId_t kInvalidThreadId = 0; +ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ]; + +static ThreadId_t getDebugThreadId() +{ + return GetCurrentThreadId(); +} + +#endif // #if BT_DETECT_BAD_THREAD_INDEX + + +// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT) unsigned int btGetCurrentThreadIndex() { const unsigned int kNullIndex = ~0U; @@ -444,7 +308,30 @@ unsigned int btGetCurrentThreadIndex() if ( sThreadIndex == kNullIndex ) { sThreadIndex = gThreadCounter.getNext(); + btAssert( sThreadIndex < BT_MAX_THREAD_COUNT ); } +#if BT_DETECT_BAD_THREAD_INDEX + if ( gBtTaskScheduler && sThreadIndex > 0 ) + { + ThreadId_t tid = getDebugThreadId(); + // if not set + if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId ) + { + // set it + gDebugThreadIds[ sThreadIndex ] = tid; + } + else + { + if ( gDebugThreadIds[ sThreadIndex ] != tid ) + { + // this could indicate the task scheduler is breaking our assumptions about + // how threads are managed when threadpool is resized + btAssert( !"there are 2 or more threads with the same thread-index!" ); + __debugbreak(); + } + } + } +#endif // #if BT_DETECT_BAD_THREAD_INDEX return sThreadIndex; } @@ -453,38 +340,123 @@ bool btIsMainThread() return btGetCurrentThreadIndex() == 0; } -#else // #if BT_THREADSAFE - -// These should not be called ever -void btSpinMutex::lock() +void btResetThreadIndexCounter() { - btAssert(!"unimplemented btSpinMutex::lock() called"); + // for when all current worker threads are destroyed + btAssert( btIsMainThread() ); + gThreadCounter.mCounter = 0; } -void btSpinMutex::unlock() +btITaskScheduler::btITaskScheduler( const char* name ) { - btAssert(!"unimplemented btSpinMutex::unlock() called"); + m_name = name; + m_savedThreadCounter = 0; + m_isActive = false; } -bool btSpinMutex::tryLock() +void btITaskScheduler::activate() { - btAssert(!"unimplemented btSpinMutex::tryLock() called"); - return true; + // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler. + // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1) + // The thread-indexes need to be unique amongst the threads that can be running simultaneously. + // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different + // task schedulers to share the same thread index because they can't be running at the same time. + // So each task scheduler needs to keep its own thread counter value + if ( !m_isActive ) + { + gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter + m_isActive = true; + } } -// non-parallel version of btParallelFor +void btITaskScheduler::deactivate() +{ + if ( m_isActive ) + { + m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter + m_isActive = false; + } +} + +void btPushThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter++; + gThreadsRunningCounterMutex.unlock(); +} + +void btPopThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter--; + gThreadsRunningCounterMutex.unlock(); +} + +bool btThreadsAreRunning() +{ + return gThreadsRunningCounter != 0; +} + + +void btSetTaskScheduler( btITaskScheduler* ts ) +{ + int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run + if ( threadId != 0 ) + { + btAssert( !"btSetTaskScheduler must be called from the main thread!" ); + return; + } + if ( gBtTaskScheduler ) + { + // deactivate old task scheduler + gBtTaskScheduler->deactivate(); + } + gBtTaskScheduler = ts; + if ( ts ) + { + // activate new task scheduler + ts->activate(); + } +} + + +btITaskScheduler* btGetTaskScheduler() +{ + return gBtTaskScheduler; +} + + void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) { - btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE"); - body.forLoop( iBegin, iEnd ); -} +#if BT_THREADSAFE -#endif // #if BT_THREADSAFE +#if BT_DETECT_BAD_THREAD_INDEX + if ( !btThreadsAreRunning() ) + { + // clear out thread ids + for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i ) + { + gDebugThreadIds[ i ] = kInvalidThreadId; + } + } +#endif // #if BT_DETECT_BAD_THREAD_INDEX + + btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first! + gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body ); + +#else // #if BT_THREADSAFE + + // non-parallel version of btParallelFor + btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" ); + body.forLoop( iBegin, iEnd ); + +#endif #else // #if BT_THREADSAFE +} /// /// btTaskSchedulerSequential -- non-threaded implementation of task scheduler -/// (fallback in case no multi-threaded schedulers are available) +/// (really just useful for testing performance of single threaded vs multi) /// class btTaskSchedulerSequential : public btITaskScheduler { @@ -500,6 +472,211 @@ public: } }; + +#if BT_USE_OPENMP && BT_THREADSAFE +/// +/// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler +/// +class btTaskSchedulerOpenMP : public btITaskScheduler +{ + int m_numThreads; +public: + btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" ) + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return omp_get_max_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + // With OpenMP, because it is a standard with various implementations, we can't + // know for sure if every implementation has the same behavior of destroying all + // previous threads when resizing the threadpool + m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) ); + omp_set_num_threads( 1 ); // hopefully, all previous threads get destroyed here + omp_set_num_threads( m_numThreads ); + m_savedThreadCounter = 0; + if ( m_isActive ) + { + btResetThreadIndexCounter(); + } + } + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + BT_PROFILE( "parallelFor_OpenMP" ); + btPushThreadsAreRunning(); +#pragma omp parallel for schedule( static, 1 ) + for ( int i = iBegin; i < iEnd; i += grainSize ) + { + BT_PROFILE( "OpenMP_job" ); + body.forLoop( i, ( std::min )( i + grainSize, iEnd ) ); + } + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_OPENMP && BT_THREADSAFE + + +#if BT_USE_TBB && BT_THREADSAFE +/// +/// btTaskSchedulerTBB -- wrapper around Intel Threaded Building Blocks task scheduler +/// +class btTaskSchedulerTBB : public btITaskScheduler +{ + int m_numThreads; + tbb::task_scheduler_init* m_tbbSchedulerInit; + +public: + btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" ) + { + m_numThreads = 0; + m_tbbSchedulerInit = NULL; + } + ~btTaskSchedulerTBB() + { + if ( m_tbbSchedulerInit ) + { + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + } + + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return tbb::task_scheduler_init::default_num_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) ); + if ( m_tbbSchedulerInit ) + { + // destroys all previous threads + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads ); + m_savedThreadCounter = 0; + if ( m_isActive ) + { + btResetThreadIndexCounter(); + } + } + struct BodyAdapter + { + const btIParallelForBody* mBody; + + void operator()( const tbb::blocked_range& range ) const + { + BT_PROFILE( "TBB_job" ); + mBody->forLoop( range.begin(), range.end() ); + } + }; + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + BT_PROFILE( "parallelFor_TBB" ); + // TBB dispatch + BodyAdapter tbbBody; + tbbBody.mBody = &body; + btPushThreadsAreRunning(); + tbb::parallel_for( tbb::blocked_range( iBegin, iEnd, grainSize ), + tbbBody, + tbb::simple_partitioner() + ); + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_TBB && BT_THREADSAFE + + +#if BT_USE_PPL && BT_THREADSAFE +/// +/// btTaskSchedulerPPL -- wrapper around Microsoft Parallel Patterns Lib task scheduler +/// +class btTaskSchedulerPPL : public btITaskScheduler +{ + int m_numThreads; +public: + btTaskSchedulerPPL() : btITaskScheduler( "PPL" ) + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return concurrency::GetProcessorCount(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + // capping the thread count for PPL due to a thread-index issue + const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31); + m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) ); + using namespace concurrency; + if ( CurrentScheduler::Id() != -1 ) + { + CurrentScheduler::Detach(); + } + SchedulerPolicy policy; + { + // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads + // force it to destroy old threads + policy.SetConcurrencyLimits( 1, 1 ); + CurrentScheduler::Create( policy ); + CurrentScheduler::Detach(); + } + policy.SetConcurrencyLimits( m_numThreads, m_numThreads ); + CurrentScheduler::Create( policy ); + m_savedThreadCounter = 0; + if ( m_isActive ) + { + btResetThreadIndexCounter(); + } + } + struct BodyAdapter + { + const btIParallelForBody* mBody; + int mGrainSize; + int mIndexEnd; + + void operator()( int i ) const + { + BT_PROFILE( "PPL_job" ); + mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) ); + } + }; + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + BT_PROFILE( "parallelFor_PPL" ); + // PPL dispatch + BodyAdapter pplBody; + pplBody.mBody = &body; + pplBody.mGrainSize = grainSize; + pplBody.mIndexEnd = iEnd; + btPushThreadsAreRunning(); + // note: MSVC 2010 doesn't support partitioner args, so avoid them + concurrency::parallel_for( iBegin, + iEnd, + grainSize, + pplBody + ); + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_PPL && BT_THREADSAFE + + // create a non-threaded task scheduler (always available) btITaskScheduler* btGetSequentialTaskScheduler() { diff --git a/src/LinearMath/btThreads.h b/src/LinearMath/btThreads.h index 284702d71..f8fea8b37 100644 --- a/src/LinearMath/btThreads.h +++ b/src/LinearMath/btThreads.h @@ -28,6 +28,14 @@ subject to the following restrictions: #define BT_OVERRIDE #endif +const unsigned int BT_MAX_THREAD_COUNT = 64; // only if BT_THREADSAFE is 1 + +// for internal use only +bool btIsMainThread(); +bool btThreadsAreRunning(); +unsigned int btGetCurrentThreadIndex(); +void btResetThreadIndexCounter(); // notify that all worker threads have been destroyed + /// /// btSpinMutex -- lightweight spin-mutex implemented with atomic ops, never puts /// a thread to sleep because it is designed to be used with a task scheduler @@ -48,39 +56,41 @@ public: bool tryLock(); }; -#if BT_THREADSAFE -// for internal Bullet use only +// +// NOTE: btMutex* is for internal Bullet use only +// +// If BT_THREADSAFE is undefined or 0, should optimize away to nothing. +// This is good because for the single-threaded build of Bullet, any calls +// to these functions will be optimized out. +// +// However, for users of the multi-threaded build of Bullet this is kind +// of bad because if you call any of these functions from external code +// (where BT_THREADSAFE is undefined) you will get unexpected race conditions. +// SIMD_FORCE_INLINE void btMutexLock( btSpinMutex* mutex ) { +#if BT_THREADSAFE mutex->lock(); +#endif // #if BT_THREADSAFE } SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* mutex ) { +#if BT_THREADSAFE mutex->unlock(); +#endif // #if BT_THREADSAFE } SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* mutex ) { +#if BT_THREADSAFE return mutex->tryLock(); +#else + return true; +#endif // #if BT_THREADSAFE } -// for internal use only -bool btIsMainThread(); -bool btThreadsAreRunning(); -unsigned int btGetCurrentThreadIndex(); -const unsigned int BT_MAX_THREAD_COUNT = 64; - -#else - -// for internal Bullet use only -// if BT_THREADSAFE is undefined or 0, should optimize away to nothing -SIMD_FORCE_INLINE void btMutexLock( btSpinMutex* ) {} -SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* ) {} -SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* ) {return true;} -SIMD_FORCE_INLINE bool btThreadsAreRunning() { return false;} -#endif // // btIParallelForBody -- subclass this to express work that can be done in parallel @@ -97,16 +107,24 @@ public: // class btITaskScheduler { - const char* m_name; public: - btITaskScheduler( const char* name ) : m_name( name ) {} + btITaskScheduler( const char* name ); + virtual ~btITaskScheduler() {} const char* getName() const { return m_name; } - virtual ~btITaskScheduler() {} virtual int getMaxNumThreads() const = 0; virtual int getNumThreads() const = 0; virtual void setNumThreads( int numThreads ) = 0; virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0; + + // internal use only + virtual void activate(); + virtual void deactivate(); + +protected: + const char* m_name; + unsigned int m_savedThreadCounter; + bool m_isActive; }; // set the task scheduler to use for all calls to btParallelFor()