From dfe184e8d3bcb7a81f3372c91388895c7e9218cf Mon Sep 17 00:00:00 2001 From: Lunkhound Date: Mon, 22 May 2017 00:47:11 -0700 Subject: [PATCH] Bullet 2 threading refactor: moved parallel-for calls into core libs --- CMakeLists.txt | 42 +- examples/Benchmarks/BenchmarkDemo.cpp | 5 +- examples/ExampleBrowser/CMakeLists.txt | 28 +- .../CommonRigidBodyMTBase.cpp | 576 ++++-------------- examples/MultiThreadedDemo/ParallelFor.h | 336 ---------- src/BulletCollision/CMakeLists.txt | 2 + .../btCollisionDispatcherMt.cpp | 164 +++++ .../btCollisionDispatcherMt.h | 39 ++ .../Dynamics/btDiscreteDynamicsWorldMt.cpp | 165 ++++- .../Dynamics/btDiscreteDynamicsWorldMt.h | 96 ++- .../Dynamics/btSimulationIslandManagerMt.cpp | 39 +- .../Dynamics/btSimulationIslandManagerMt.h | 3 +- src/LinearMath/btThreads.cpp | 304 ++++++++- src/LinearMath/btThreads.h | 60 ++ 14 files changed, 1012 insertions(+), 847 deletions(-) delete mode 100644 examples/MultiThreadedDemo/ParallelFor.h create mode 100644 src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp create mode 100644 src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ad1a9706..65859e663 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,15 @@ OPTION(USE_DOUBLE_PRECISION "Use double precision" OFF) OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON) OPTION(BUILD_SHARED_LIBS "Use shared libraries" OFF) OPTION(USE_SOFT_BODY_MULTI_BODY_DYNAMICS_WORLD "Use btSoftMultiBodyDynamicsWorld" OFF) -OPTION(BULLET2_USE_THREAD_LOCKS "Build Bullet 2 libraries with mutex locking around certain operations" OFF) + +OPTION(BULLET2_USE_THREAD_LOCKS "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF) +IF (BULLET2_USE_THREAD_LOCKS) + OPTION(BULLET2_USE_OPEN_MP_MULTITHREADING "Build Bullet 2 with support for multi-threading with OpenMP (requires a compiler with OpenMP support)" OFF) + OPTION(BULLET2_USE_TBB_MULTITHREADING "Build Bullet 2 with support for multi-threading with Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF) + IF (MSVC) + OPTION(BULLET2_USE_PPL_MULTITHREADING "Build Bullet 2 with support for multi-threading with Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF) + ENDIF (MSVC) +ENDIF (BULLET2_USE_THREAD_LOCKS) OPTION(USE_MSVC_INCREMENTAL_LINKING "Use MSVC Incremental Linking" OFF) OPTION(USE_CUSTOM_VECTOR_MATH "Use custom vectormath library" OFF) @@ -208,6 +216,30 @@ IF(BULLET2_USE_THREAD_LOCKS) ENDIF (NOT MSVC) ENDIF (BULLET2_USE_THREAD_LOCKS) +IF (BULLET2_USE_OPEN_MP_MULTITHREADING) + ADD_DEFINITIONS("-DBT_USE_OPENMP=1") + IF (MSVC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + ELSE (MSVC) + # GCC, Clang + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + ENDIF (MSVC) +ENDIF (BULLET2_USE_OPEN_MP_MULTITHREADING) + +IF (BULLET2_USE_TBB_MULTITHREADING) + SET (BULLET2_TBB_INCLUDE_DIR "not found" CACHE PATH "Directory for Intel TBB includes.") + SET (BULLET2_TBB_LIB_DIR "not found" CACHE PATH "Directory for Intel TBB libraries.") + find_library(TBB_LIBRARY tbb PATHS ${BULLET2_TBB_LIB_DIR}) + find_library(TBBMALLOC_LIBRARY tbbmalloc PATHS ${BULLET2_TBB_LIB_DIR}) + ADD_DEFINITIONS("-DBT_USE_TBB=1") + INCLUDE_DIRECTORIES( ${BULLET2_TBB_INCLUDE_DIR} ) + LINK_LIBRARIES( ${TBB_LIBRARY} ${TBBMALLOC_LIBRARY} ) +ENDIF (BULLET2_USE_TBB_MULTITHREADING) + +IF (BULLET2_USE_PPL_MULTITHREADING) + ADD_DEFINITIONS("-DBT_USE_PPL=1") +ENDIF (BULLET2_USE_PPL_MULTITHREADING) + IF (WIN32) OPTION(USE_GLUT "Use Glut" ON) ADD_DEFINITIONS( -D_CRT_SECURE_NO_WARNINGS ) @@ -350,14 +382,6 @@ IF(BUILD_BULLET2_DEMOS) SUBDIRS(examples) ENDIF() - IF (BULLET2_USE_THREAD_LOCKS) - OPTION(BULLET2_MULTITHREADED_OPEN_MP_DEMO "Build Bullet 2 MultithreadedDemo using OpenMP (requires a compiler with OpenMP support)" OFF) - OPTION(BULLET2_MULTITHREADED_TBB_DEMO "Build Bullet 2 MultithreadedDemo using Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF) - IF (MSVC) - OPTION(BULLET2_MULTITHREADED_PPL_DEMO "Build Bullet 2 MultithreadedDemo using Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF) - ENDIF (MSVC) - ENDIF (BULLET2_USE_THREAD_LOCKS) - ENDIF(BUILD_BULLET2_DEMOS) diff --git a/examples/Benchmarks/BenchmarkDemo.cpp b/examples/Benchmarks/BenchmarkDemo.cpp index d9a1b3ef0..ec443de6b 100644 --- a/examples/Benchmarks/BenchmarkDemo.cpp +++ b/examples/Benchmarks/BenchmarkDemo.cpp @@ -32,7 +32,6 @@ subject to the following restrictions: #include "LinearMath/btAlignedObjectArray.h" #include "LinearMath/btTransform.h" -#include "../MultiThreadedDemo/ParallelFor.h" class btDynamicsWorld; @@ -230,7 +229,7 @@ public: } } - struct CastRaysLoopBody + struct CastRaysLoopBody : public btIParallelForBody { btCollisionWorld* mWorld; btRaycastBar2* mRaycasts; @@ -274,7 +273,7 @@ public: { CastRaysLoopBody rayLooper(cw, this); int grainSize = 20; // number of raycasts per task - parallelFor( 0, NUMRAYS, grainSize, rayLooper ); + btParallelFor( 0, NUMRAYS, grainSize, rayLooper ); } else #endif // USE_PARALLEL_RAYCASTS diff --git a/examples/ExampleBrowser/CMakeLists.txt b/examples/ExampleBrowser/CMakeLists.txt index 23c283fcb..4a4db93db 100644 --- a/examples/ExampleBrowser/CMakeLists.txt +++ b/examples/ExampleBrowser/CMakeLists.txt @@ -110,29 +110,6 @@ ELSE(WIN32) ENDIF(APPLE) ENDIF(WIN32) -IF (BULLET2_MULTITHREADED_OPEN_MP_DEMO) - ADD_DEFINITIONS("-DBT_USE_OPENMP=1") - IF (MSVC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") - ELSE (MSVC) - # GCC, Clang - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") - ENDIF (MSVC) -ENDIF (BULLET2_MULTITHREADED_OPEN_MP_DEMO) - -IF (BULLET2_MULTITHREADED_PPL_DEMO) - ADD_DEFINITIONS("-DBT_USE_PPL=1") -ENDIF (BULLET2_MULTITHREADED_PPL_DEMO) - -IF (BULLET2_MULTITHREADED_TBB_DEMO) - SET (BULLET2_TBB_INCLUDE_DIR "not found" CACHE PATH "Directory for Intel TBB includes.") - SET (BULLET2_TBB_LIB_DIR "not found" CACHE PATH "Directory for Intel TBB libraries.") - find_library(TBB_LIBRARY tbb PATHS ${BULLET2_TBB_LIB_DIR}) - find_library(TBBMALLOC_LIBRARY tbbmalloc PATHS ${BULLET2_TBB_LIB_DIR}) - ADD_DEFINITIONS("-DBT_USE_TBB=1") - INCLUDE_DIRECTORIES( ${BULLET2_TBB_INCLUDE_DIR} ) - LINK_LIBRARIES( ${TBB_LIBRARY} ${TBBMALLOC_LIBRARY} ) -ENDIF (BULLET2_MULTITHREADED_TBB_DEMO) SET(ExtendedTutorialsSources ../ExtendedTutorials/Chain.cpp @@ -207,7 +184,6 @@ SET(BulletExampleBrowser_SRCS ../MultiThreadedDemo/MultiThreadedDemo.h ../MultiThreadedDemo/CommonRigidBodyMTBase.cpp ../MultiThreadedDemo/CommonRigidBodyMTBase.h - ../MultiThreadedDemo/ParallelFor.h ../Tutorial/Tutorial.cpp ../Tutorial/Tutorial.h ../Tutorial/Dof6ConstraintTutorial.cpp @@ -386,7 +362,7 @@ ADD_CUSTOM_COMMAND( COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${BULLET_PHYSICS_SOURCE_DIR}/data ${PROJECT_BINARY_DIR}/data ) -IF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32) +IF (BULLET2_USE_TBB_MULTITHREADING AND WIN32) # add a post build command to copy some dlls to the executable directory set(TBB_VC_VER "vc12") set(TBB_VC_ARCH "ia32") @@ -400,7 +376,7 @@ IF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32) COMMAND ${CMAKE_COMMAND} -E copy_if_different "${BULLET2_TBB_INCLUDE_DIR}/../bin/${TBB_VC_ARCH}/${TBB_VC_VER}/tbbmalloc.dll" $) -ENDIF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32) +ENDIF (BULLET2_USE_TBB_MULTITHREADING AND WIN32) IF (INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) diff --git a/examples/MultiThreadedDemo/CommonRigidBodyMTBase.cpp b/examples/MultiThreadedDemo/CommonRigidBodyMTBase.cpp index 81446ee0b..82797e35e 100644 --- a/examples/MultiThreadedDemo/CommonRigidBodyMTBase.cpp +++ b/examples/MultiThreadedDemo/CommonRigidBodyMTBase.cpp @@ -23,10 +23,10 @@ class btCollisionShape; #include "CommonRigidBodyMTBase.h" #include "../CommonInterfaces/CommonParameterInterface.h" -#include "ParallelFor.h" #include "LinearMath/btAlignedObjectArray.h" #include "LinearMath/btPoolAllocator.h" #include "btBulletCollisionCommon.h" +#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h" #include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands() #include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h" #include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h" @@ -36,20 +36,6 @@ class btCollisionShape; #include "BulletDynamics/MLCPSolvers/btDantzigSolver.h" #include "BulletDynamics/MLCPSolvers/btLemkeSolver.h" -TaskManager gTaskMgr; - -#define USE_PARALLEL_NARROWPHASE 1 // detect collisions in parallel -#define USE_PARALLEL_ISLAND_SOLVER 1 // solve simulation islands in parallel -#define USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS 1 -#define USE_PARALLEL_INTEGRATE_TRANSFORMS 1 -#define USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION 1 - -#if defined (_MSC_VER) && _MSC_VER >= 1600 -// give us a compile error if any signatures of overriden methods is changed -#define BT_OVERRIDE override -#else -#define BT_OVERRIDE -#endif static int gNumIslands = 0; @@ -124,7 +110,7 @@ public: }; -Profiler gProfiler; +static Profiler gProfiler; class ProfileHelper { @@ -141,457 +127,84 @@ public: } }; -int gThreadsRunningCounter = 0; -btSpinMutex gThreadsRunningCounterMutex; - -void btPushThreadsAreRunning() +static void profileBeginCallback( btDynamicsWorld *world, btScalar timeStep ) { - gThreadsRunningCounterMutex.lock(); - gThreadsRunningCounter++; - gThreadsRunningCounterMutex.unlock(); + gProfiler.begin( Profiler::kRecordInternalTimeStep ); } -void btPopThreadsAreRunning() +static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep ) { - gThreadsRunningCounterMutex.lock(); - gThreadsRunningCounter--; - gThreadsRunningCounterMutex.unlock(); -} - -bool btThreadsAreRunning() -{ - return gThreadsRunningCounter != 0; + gProfiler.end( Profiler::kRecordInternalTimeStep ); } -#if USE_PARALLEL_NARROWPHASE - -class MyCollisionDispatcher : public btCollisionDispatcher +/// +/// MyCollisionDispatcher -- subclassed for profiling purposes +/// +class MyCollisionDispatcher : public btCollisionDispatcherMt { - btSpinMutex m_manifoldPtrsMutex; - + typedef btCollisionDispatcherMt ParentClass; public: - MyCollisionDispatcher( btCollisionConfiguration* config ) : btCollisionDispatcher( config ) + MyCollisionDispatcher( btCollisionConfiguration* config, int grainSize ) : btCollisionDispatcherMt( config, grainSize ) { } - virtual ~MyCollisionDispatcher() - { - } - - btPersistentManifold* getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 ) BT_OVERRIDE - { - // added spin-locks - //optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance) - - btScalar contactBreakingThreshold = ( m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD ) ? - btMin( body0->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ), body1->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ) ) - : gContactBreakingThreshold; - - btScalar contactProcessingThreshold = btMin( body0->getContactProcessingThreshold(), body1->getContactProcessingThreshold() ); - - void* mem = m_persistentManifoldPoolAllocator->allocate( sizeof( btPersistentManifold ) ); - if (NULL == mem) - { - //we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert. - if ( ( m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION ) == 0 ) - { - mem = btAlignedAlloc( sizeof( btPersistentManifold ), 16 ); - } - else - { - btAssert( 0 ); - //make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration - return 0; - } - } - btPersistentManifold* manifold = new(mem) btPersistentManifold( body0, body1, 0, contactBreakingThreshold, contactProcessingThreshold ); - m_manifoldPtrsMutex.lock(); - manifold->m_index1a = m_manifoldsPtr.size(); - m_manifoldsPtr.push_back( manifold ); - m_manifoldPtrsMutex.unlock(); - - return manifold; - } - - void releaseManifold( btPersistentManifold* manifold ) BT_OVERRIDE - { - clearManifold( manifold ); - - m_manifoldPtrsMutex.lock(); - int findIndex = manifold->m_index1a; - btAssert( findIndex < m_manifoldsPtr.size() ); - m_manifoldsPtr.swap( findIndex, m_manifoldsPtr.size() - 1 ); - m_manifoldsPtr[ findIndex ]->m_index1a = findIndex; - m_manifoldsPtr.pop_back(); - m_manifoldPtrsMutex.unlock(); - - manifold->~btPersistentManifold(); - if ( m_persistentManifoldPoolAllocator->validPtr( manifold ) ) - { - m_persistentManifoldPoolAllocator->freeMemory( manifold ); - } - else - { - btAlignedFree( manifold ); - } - } - - struct Updater - { - btBroadphasePair* mPairArray; - btNearCallback mCallback; - btCollisionDispatcher* mDispatcher; - const btDispatcherInfo* mInfo; - - Updater() - { - mPairArray = NULL; - mCallback = NULL; - mDispatcher = NULL; - mInfo = NULL; - } - void forLoop( int iBegin, int iEnd ) const - { - for ( int i = iBegin; i < iEnd; ++i ) - { - btBroadphasePair* pair = &mPairArray[ i ]; - mCallback( *pair, *mDispatcher, *mInfo ); - } - } - }; - virtual void dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher ) BT_OVERRIDE { - ProfileHelper prof(Profiler::kRecordDispatchAllCollisionPairs); - int grainSize = 40; // iterations per task - int pairCount = pairCache->getNumOverlappingPairs(); - Updater updater; - updater.mCallback = getNearCallback(); - updater.mPairArray = pairCount > 0 ? pairCache->getOverlappingPairArrayPtr() : NULL; - updater.mDispatcher = this; - updater.mInfo = &info; - - btPushThreadsAreRunning(); - parallelFor( 0, pairCount, grainSize, updater ); - btPopThreadsAreRunning(); - - if (m_manifoldsPtr.size() < 1) - return; - - // reconstruct the manifolds array to ensure determinism - m_manifoldsPtr.resizeNoInitialize(0); - btBroadphasePair* pairs = pairCache->getOverlappingPairArrayPtr(); - for (int i = 0; i < pairCount; ++i) - { - btCollisionAlgorithm* algo = pairs[i].m_algorithm; - if (algo) algo->getAllContactManifolds(m_manifoldsPtr); - } - - // update the indices (used when releasing manifolds) - for (int i = 0; i < m_manifoldsPtr.size(); ++i) - m_manifoldsPtr[i]->m_index1a = i; + ProfileHelper prof( Profiler::kRecordDispatchAllCollisionPairs ); + ParentClass::dispatchAllCollisionPairs( pairCache, info, dispatcher ); } }; -#endif - -#if USE_PARALLEL_ISLAND_SOLVER /// -/// MyConstraintSolverPool - masquerades as a constraint solver, but really it is a threadsafe pool of them. -/// -/// Each solver in the pool is protected by a mutex. When solveGroup is called from a thread, -/// the pool looks for a solver that isn't being used by another thread, locks it, and dispatches the -/// call to the solver. -/// So long as there are at least as many solvers as there are hardware threads, it should never need to -/// spin wait. -/// -class MyConstraintSolverPool : public btConstraintSolver +/// myParallelIslandDispatch -- wrap default parallel dispatch for profiling and to get the number of simulation islands +// +void myParallelIslandDispatch( btAlignedObjectArray* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback ) { - const static size_t kCacheLineSize = 128; - struct ThreadSolver - { - btConstraintSolver* solver; - btSpinMutex mutex; - char _cachelinePadding[ kCacheLineSize - sizeof( btSpinMutex ) - sizeof( void* ) ]; // keep mutexes from sharing a cache line - }; - btAlignedObjectArray m_solvers; - btConstraintSolverType m_solverType; - - ThreadSolver* getAndLockThreadSolver() - { - while ( true ) - { - for ( int i = 0; i < m_solvers.size(); ++i ) - { - ThreadSolver& solver = m_solvers[ i ]; - if ( solver.mutex.tryLock() ) - { - return &solver; - } - } - } - return NULL; - } - void init( btConstraintSolver** solvers, int numSolvers ) - { - m_solverType = BT_SEQUENTIAL_IMPULSE_SOLVER; - m_solvers.resize( numSolvers ); - for ( int i = 0; i < numSolvers; ++i ) - { - m_solvers[ i ].solver = solvers[ i ]; - } - if ( numSolvers > 0 ) - { - m_solverType = solvers[ 0 ]->getSolverType(); - } - } -public: - // create the solvers for me - explicit MyConstraintSolverPool( int numSolvers ) - { - btAlignedObjectArray solvers; - solvers.reserve( numSolvers ); - for ( int i = 0; i < numSolvers; ++i ) - { - btConstraintSolver* solver = new btSequentialImpulseConstraintSolver(); - solvers.push_back( solver ); - } - init( &solvers[ 0 ], numSolvers ); - } - - // pass in fully constructed solvers (destructor will delete them) - MyConstraintSolverPool( btConstraintSolver** solvers, int numSolvers ) - { - init( solvers, numSolvers ); - } - virtual ~MyConstraintSolverPool() - { - // delete all solvers - for ( int i = 0; i < m_solvers.size(); ++i ) - { - ThreadSolver& solver = m_solvers[ i ]; - delete solver.solver; - solver.solver = NULL; - } - } - - //virtual void prepareSolve( int /* numBodies */, int /* numManifolds */ ) { ; } // does nothing - - ///solve a group of constraints - virtual btScalar solveGroup( btCollisionObject** bodies, - int numBodies, - btPersistentManifold** manifolds, - int numManifolds, - btTypedConstraint** constraints, - int numConstraints, - const btContactSolverInfo& info, - btIDebugDraw* debugDrawer, - btDispatcher* dispatcher - ) - { - ThreadSolver* solver = getAndLockThreadSolver(); - solver->solver->solveGroup( bodies, numBodies, manifolds, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher ); - solver->mutex.unlock(); - return 0.0f; - } - - //virtual void allSolved( const btContactSolverInfo& /* info */, class btIDebugDraw* /* debugDrawer */ ) { ; } // does nothing - - ///clear internal cached data and reset random seed - virtual void reset() - { - for ( int i = 0; i < m_solvers.size(); ++i ) - { - ThreadSolver& solver = m_solvers[ i ]; - solver.mutex.lock(); - solver.solver->reset(); - solver.mutex.unlock(); - } - } - - virtual btConstraintSolverType getSolverType() const - { - return m_solverType; - } -}; - -struct UpdateIslandDispatcher -{ - btAlignedObjectArray* islandsPtr; - btSimulationIslandManagerMt::IslandCallback* callback; - - void forLoop( int iBegin, int iEnd ) const - { - for ( int i = iBegin; i < iEnd; ++i ) - { - btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ]; - btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL; - btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL; - callback->processIsland( &island->bodyArray[ 0 ], - island->bodyArray.size(), - manifolds, - island->manifoldArray.size(), - constraintsPtr, - island->constraintArray.size(), - island->id - ); - } - } -}; - -void parallelIslandDispatch( btAlignedObjectArray* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback ) -{ - ProfileHelper prof(Profiler::kRecordDispatchIslands); + ProfileHelper prof( Profiler::kRecordDispatchIslands ); gNumIslands = islandsPtr->size(); - int grainSize = 1; // iterations per task - UpdateIslandDispatcher dispatcher; - dispatcher.islandsPtr = islandsPtr; - dispatcher.callback = callback; - btPushThreadsAreRunning(); - parallelFor( 0, islandsPtr->size(), grainSize, dispatcher ); - btPopThreadsAreRunning(); -} -#endif //#if USE_PARALLEL_ISLAND_SOLVER - - -void profileBeginCallback(btDynamicsWorld *world, btScalar timeStep) -{ - gProfiler.begin(Profiler::kRecordInternalTimeStep); + btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, callback ); } -void profileEndCallback(btDynamicsWorld *world, btScalar timeStep) -{ - gProfiler.end(Profiler::kRecordInternalTimeStep); -} /// -/// MyDiscreteDynamicsWorld -/// -/// Should function exactly like btDiscreteDynamicsWorld. -/// 3 methods that iterate over all of the rigidbodies can run in parallel: -/// - predictUnconstraintMotion -/// - integrateTransforms -/// - createPredictiveContacts +/// MyDiscreteDynamicsWorld -- subclassed for profiling purposes /// ATTRIBUTE_ALIGNED16( class ) MyDiscreteDynamicsWorld : public btDiscreteDynamicsWorldMt { typedef btDiscreteDynamicsWorld ParentClass; protected: -#if USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION - struct UpdaterUnconstrainedMotion - { - btScalar timeStep; - btRigidBody** rigidBodies; - - void forLoop( int iBegin, int iEnd ) const - { - for ( int i = iBegin; i < iEnd; ++i ) - { - btRigidBody* body = rigidBodies[ i ]; - if ( !body->isStaticOrKinematicObject() ) - { - //don't integrate/update velocities here, it happens in the constraint solver - body->applyDamping( timeStep ); - body->predictIntegratedTransform( timeStep, body->getInterpolationWorldTransform() ); - } - } - } - }; virtual void predictUnconstraintMotion( btScalar timeStep ) BT_OVERRIDE { ProfileHelper prof( Profiler::kRecordPredictUnconstrainedMotion ); - BT_PROFILE( "predictUnconstraintMotion" ); - int grainSize = 50; // num of iterations per task for TBB - int bodyCount = m_nonStaticRigidBodies.size(); - UpdaterUnconstrainedMotion update; - update.timeStep = timeStep; - update.rigidBodies = bodyCount ? &m_nonStaticRigidBodies[ 0 ] : NULL; - btPushThreadsAreRunning(); - parallelFor( 0, bodyCount, grainSize, update ); - btPopThreadsAreRunning(); + ParentClass::predictUnconstraintMotion( timeStep ); } -#endif // #if USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION - -#if USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS - struct UpdaterCreatePredictiveContacts - { - btScalar timeStep; - btRigidBody** rigidBodies; - MyDiscreteDynamicsWorld* world; - - void forLoop( int iBegin, int iEnd ) const - { - world->createPredictiveContactsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep ); - } - }; - - virtual void createPredictiveContacts( btScalar timeStep ) + virtual void createPredictiveContacts( btScalar timeStep ) BT_OVERRIDE { ProfileHelper prof( Profiler::kRecordCreatePredictiveContacts ); - releasePredictiveContacts(); - int grainSize = 50; // num of iterations per task for TBB or OPENMP - if ( int bodyCount = m_nonStaticRigidBodies.size() ) - { - UpdaterCreatePredictiveContacts update; - update.world = this; - update.timeStep = timeStep; - update.rigidBodies = &m_nonStaticRigidBodies[ 0 ]; - btPushThreadsAreRunning(); - parallelFor( 0, bodyCount, grainSize, update ); - btPopThreadsAreRunning(); - } + ParentClass::createPredictiveContacts( timeStep ); } -#endif // #if USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS - -#if USE_PARALLEL_INTEGRATE_TRANSFORMS - struct UpdaterIntegrateTransforms - { - btScalar timeStep; - btRigidBody** rigidBodies; - MyDiscreteDynamicsWorld* world; - - void forLoop( int iBegin, int iEnd ) const - { - world->integrateTransformsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep ); - } - }; - virtual void integrateTransforms( btScalar timeStep ) BT_OVERRIDE { ProfileHelper prof( Profiler::kRecordIntegrateTransforms ); - BT_PROFILE( "integrateTransforms" ); - int grainSize = 50; // num of iterations per task for TBB or OPENMP - if ( int bodyCount = m_nonStaticRigidBodies.size() ) - { - UpdaterIntegrateTransforms update; - update.world = this; - update.timeStep = timeStep; - update.rigidBodies = &m_nonStaticRigidBodies[ 0 ]; - btPushThreadsAreRunning(); - parallelFor( 0, bodyCount, grainSize, update ); - btPopThreadsAreRunning(); - } + ParentClass::integrateTransforms( timeStep ); } -#endif // #if USE_PARALLEL_INTEGRATE_TRANSFORMS public: BT_DECLARE_ALIGNED_ALLOCATOR(); MyDiscreteDynamicsWorld( btDispatcher* dispatcher, btBroadphaseInterface* pairCache, - btConstraintSolver* constraintSolver, + btConstraintSolverPoolMt* constraintSolver, btCollisionConfiguration* collisionConfiguration ) : btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, collisionConfiguration ) { -#if USE_PARALLEL_ISLAND_SOLVER btSimulationIslandManagerMt* islandMgr = static_cast( m_islandManager ); - islandMgr->setIslandDispatchFunction( parallelIslandDispatch ); -#endif //#if USE_PARALLEL_ISLAND_SOLVER + islandMgr->setIslandDispatchFunction( myParallelIslandDispatch ); } }; @@ -625,6 +238,47 @@ btConstraintSolver* createSolverByType( SolverType t ) } +/// +/// btTaskSchedulerManager -- manage a number of task schedulers so we can switch between them +/// +class btTaskSchedulerManager +{ + btAlignedObjectArray m_taskSchedulers; + +public: + btTaskSchedulerManager() {} + void init() + { + addTaskScheduler( btGetSequentialTaskScheduler() ); + addTaskScheduler( btGetOpenMPTaskScheduler() ); + addTaskScheduler( btGetTBBTaskScheduler() ); + addTaskScheduler( btGetPPLTaskScheduler() ); + if ( getNumTaskSchedulers() > 1 ) + { + // prefer a non-sequential scheduler if available + btSetTaskScheduler( m_taskSchedulers[ 1 ] ); + } + else + { + btSetTaskScheduler( m_taskSchedulers[ 0 ] ); + } + btGetTaskScheduler()->setNumThreads( btGetTaskScheduler()->getMaxNumThreads() ); + } + + void addTaskScheduler( btITaskScheduler* ts ) + { + if ( ts ) + { + m_taskSchedulers.push_back( ts ); + } + } + int getNumTaskSchedulers() const { return m_taskSchedulers.size(); } + btITaskScheduler* getTaskScheduler( int i ) { return m_taskSchedulers[ i ]; } +}; + + +static btTaskSchedulerManager gTaskSchedulerMgr; + static bool gMultithreadedWorld = false; static bool gDisplayProfileInfo = false; static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE; @@ -652,15 +306,17 @@ CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper { m_multithreadedWorld = false; m_multithreadCapable = false; - gTaskMgr.init(); + if ( gTaskSchedulerMgr.getNumTaskSchedulers() == 0 ) + { + gTaskSchedulerMgr.init(); + } } CommonRigidBodyMTBase::~CommonRigidBodyMTBase() { - gTaskMgr.shutdown(); } -void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer) +static void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer) { if (bool* val = static_cast(userPointer)) { @@ -668,7 +324,7 @@ void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer) } } -void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer) +static void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer) { if (buttonState) { @@ -687,7 +343,7 @@ void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer) } } -void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer) +static void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer) { if (buttonId >= 0 && buttonId < SOLVER_TYPE_COUNT) { @@ -695,32 +351,30 @@ void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer) } } -void apiSelectButtonCallback(int buttonId, bool buttonState, void* userPointer) +static void setNumThreads( int numThreads ) { - gTaskMgr.setApi(static_cast(buttonId)); - if (gTaskMgr.getApi()==TaskManager::apiNone) + int newNumThreads = ( std::min )( numThreads, int( BT_MAX_THREAD_COUNT ) ); + int oldNumThreads = btGetTaskScheduler()->getNumThreads(); + // only call when the thread count is different + if ( newNumThreads != oldNumThreads ) { - gSliderNumThreads = 1.0f; - } - else - { - gSliderNumThreads = float(gTaskMgr.getNumThreads()); + btGetTaskScheduler()->setNumThreads( newNumThreads ); } } -void setThreadCountCallback(float val, void* userPtr) +static void apiSelectButtonCallback(int buttonId, bool buttonState, void* userPointer) { - if (gTaskMgr.getApi()==TaskManager::apiNone) - { - gSliderNumThreads = 1.0f; - } - else - { - gTaskMgr.setNumThreads( int( gSliderNumThreads ) ); - } + // change the task scheduler + btSetTaskScheduler( gTaskSchedulerMgr.getTaskScheduler( buttonId ) ); + setNumThreads( int( gSliderNumThreads ) ); } -void setSolverIterationCountCallback(float val, void* userPtr) +static void setThreadCountCallback(float val, void* userPtr) +{ + setNumThreads( int( gSliderNumThreads ) ); +} + +static void setSolverIterationCountCallback(float val, void* userPtr) { if (btDiscreteDynamicsWorld* world = reinterpret_cast(userPtr)) { @@ -733,6 +387,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld() gNumIslands = 0; m_solverType = gSolverType; #if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB) + btAssert( btGetTaskScheduler() != NULL ); m_multithreadCapable = true; #endif if ( gMultithreadedWorld ) @@ -743,30 +398,24 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld() cci.m_defaultMaxCollisionAlgorithmPoolSize = 80000; m_collisionConfiguration = new btDefaultCollisionConfiguration( cci ); -#if USE_PARALLEL_NARROWPHASE - m_dispatcher = new MyCollisionDispatcher( m_collisionConfiguration ); -#else - m_dispatcher = new btCollisionDispatcher( m_collisionConfiguration ); -#endif //USE_PARALLEL_NARROWPHASE - + m_dispatcher = new MyCollisionDispatcher( m_collisionConfiguration, 40 ); m_broadphase = new btDbvtBroadphase(); -#if BT_THREADSAFE && USE_PARALLEL_ISLAND_SOLVER + btConstraintSolverPoolMt* solverPool; { btConstraintSolver* solvers[ BT_MAX_THREAD_COUNT ]; - int maxThreadCount = btMin( int(BT_MAX_THREAD_COUNT), TaskManager::getMaxNumThreads() ); + int maxThreadCount = BT_MAX_THREAD_COUNT; for ( int i = 0; i < maxThreadCount; ++i ) { solvers[ i ] = createSolverByType( m_solverType ); } - m_solver = new MyConstraintSolverPool( solvers, maxThreadCount ); + solverPool = new btConstraintSolverPoolMt( solvers, maxThreadCount ); + m_solver = solverPool; } -#else - m_solver = createSolverByType( m_solverType ); -#endif //#if USE_PARALLEL_ISLAND_SOLVER - btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration ); + btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, m_collisionConfiguration ); m_dynamicsWorld = world; m_multithreadedWorld = true; + btAssert( btGetTaskScheduler() != NULL ); } else { @@ -886,24 +535,25 @@ void CommonRigidBodyMTBase::createDefaultParameters() if (m_multithreadedWorld) { // create a button for each supported threading API - for (int iApi = 0; iApi < TaskManager::apiCount; ++iApi) + for ( int iApi = 0; iApi < gTaskSchedulerMgr.getNumTaskSchedulers(); ++iApi ) { - TaskManager::Api api = static_cast(iApi); - if (gTaskMgr.isSupported(api)) - { - char str[1024]; - sprintf(str, "API %s", gTaskMgr.getApiName(api)); - ButtonParams button( str, iApi, false ); - button.m_callback = apiSelectButtonCallback; - m_guiHelper->getParameterInterface()->registerButtonParameter( button ); - } + char str[ 1024 ]; + sprintf( str, "API %s", gTaskSchedulerMgr.getTaskScheduler(iApi)->getName() ); + ButtonParams button( str, iApi, false ); + button.m_callback = apiSelectButtonCallback; + m_guiHelper->getParameterInterface()->registerButtonParameter( button ); } { // create a slider to set the number of threads to use - gSliderNumThreads = float(gTaskMgr.getNumThreads()); + int numThreads = btGetTaskScheduler()->getNumThreads(); + // if slider has not been set yet (by another demo), + if ( gSliderNumThreads <= 1.0f ) + { + gSliderNumThreads = float( numThreads ); + } SliderParams slider("Thread count", &gSliderNumThreads); slider.m_minVal = 1.0f; - slider.m_maxVal = float(gTaskMgr.getMaxNumThreads()*2); + slider.m_maxVal = float( BT_MAX_THREAD_COUNT ); slider.m_callback = setThreadCountCallback; slider.m_clampToIntegers = true; m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider ); @@ -946,14 +596,14 @@ void CommonRigidBodyMTBase::drawScreenText() const btPersistentManifold* man = m_dispatcher->getManifoldByIndexInternal( i ); numContacts += man->getNumContacts(); } - const char* mtApi = TaskManager::getApiName( gTaskMgr.getApi() ); + const char* mtApi = btGetTaskScheduler()->getName(); sprintf( msg, "islands=%d bodies=%d manifolds=%d contacts=%d [%s] threads=%d", gNumIslands, m_dynamicsWorld->getNumCollisionObjects(), numManifolds, numContacts, mtApi, - gTaskMgr.getApi() == TaskManager::apiNone ? 1 : gTaskMgr.getNumThreads() + btGetTaskScheduler()->getNumThreads() ); m_guiHelper->getAppInterface()->drawText( msg, 100, yCoord, 0.4f ); yCoord += yStep; diff --git a/examples/MultiThreadedDemo/ParallelFor.h b/examples/MultiThreadedDemo/ParallelFor.h deleted file mode 100644 index cb1325025..000000000 --- a/examples/MultiThreadedDemo/ParallelFor.h +++ /dev/null @@ -1,336 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include //printf debugging -#include - - -// choose threading providers: -#if BT_USE_TBB -#define USE_TBB 1 // use Intel Threading Building Blocks for thread management -#endif - -#if BT_USE_PPL -#define USE_PPL 1 // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later) -#endif // BT_USE_PPL - -#if BT_USE_OPENMP -#define USE_OPENMP 1 // use OpenMP (also need to change compiler options for OpenMP support) -#endif - - -#if USE_OPENMP - -#include - -#endif // #if USE_OPENMP - - -#if USE_PPL - -#include // if you get a compile error here, check whether your version of Visual Studio includes PPL -// Visual Studio 2010 and later should come with it -#include // for GetProcessorCount() -#endif // #if USE_PPL - - -#if USE_TBB - -#define __TBB_NO_IMPLICIT_LINKAGE 1 -#include -#include -#include -#include - -#endif // #if USE_TBB - - - -class TaskManager -{ -public: - enum Api - { - apiNone, - apiOpenMP, - apiTbb, - apiPpl, - apiCount - }; - static const char* getApiName( Api api ) - { - switch ( api ) - { - case apiNone: return "None"; - case apiOpenMP: return "OpenMP"; - case apiTbb: return "Intel TBB"; - case apiPpl: return "MS PPL"; - default: return "unknown"; - } - } - - TaskManager() - { - m_api = apiNone; - m_numThreads = 0; -#if USE_TBB - m_tbbSchedulerInit = NULL; -#endif // #if USE_TBB - } - - Api getApi() const - { - return m_api; - } - - bool isSupported( Api api ) const - { -#if USE_OPENMP - if ( api == apiOpenMP ) - { - return true; - } -#endif -#if USE_TBB - if ( api == apiTbb ) - { - return true; - } -#endif -#if USE_PPL - if ( api == apiPpl ) - { - return true; - } -#endif - // apiNone is always "supported" - return api == apiNone; - } - - void setApi( Api api ) - { - if (isSupported(api)) - { - m_api = api; - } - else - { - // no compile time support for selected API, fallback to "none" - m_api = apiNone; - } - } - - static int getMaxNumThreads() - { -#if USE_OPENMP - return omp_get_max_threads(); -#elif USE_PPL - return concurrency::GetProcessorCount(); -#elif USE_TBB - return tbb::task_scheduler_init::default_num_threads(); -#endif - return 1; - } - - int getNumThreads() const - { - return m_numThreads; - } - - int setNumThreads( int numThreads ) - { - m_numThreads = ( std::max )( 1, numThreads ); - -#if USE_OPENMP - omp_set_num_threads( m_numThreads ); -#endif - -#if USE_PPL - { - using namespace concurrency; - if ( CurrentScheduler::Id() != -1 ) - { - CurrentScheduler::Detach(); - } - SchedulerPolicy policy; - policy.SetConcurrencyLimits( m_numThreads, m_numThreads ); - CurrentScheduler::Create( policy ); - } -#endif - -#if USE_TBB - if ( m_tbbSchedulerInit ) - { - delete m_tbbSchedulerInit; - m_tbbSchedulerInit = NULL; - } - m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads ); -#endif - return m_numThreads; - } - - void init() - { - if (m_numThreads == 0) - { -#if USE_PPL - setApi( apiPpl ); -#endif -#if USE_TBB - setApi( apiTbb ); -#endif -#if USE_OPENMP - setApi( apiOpenMP ); -#endif - setNumThreads(getMaxNumThreads()); - } - else - { - setNumThreads(m_numThreads); - } - } - - void shutdown() - { -#if USE_TBB - if ( m_tbbSchedulerInit ) - { - delete m_tbbSchedulerInit; - m_tbbSchedulerInit = NULL; - } -#endif - } - -private: - Api m_api; - int m_numThreads; -#if USE_TBB - tbb::task_scheduler_init* m_tbbSchedulerInit; -#endif // #if USE_TBB -}; - -extern TaskManager gTaskMgr; - - -inline static void initTaskScheduler() -{ - gTaskMgr.init(); -} - -inline static void cleanupTaskScheduler() -{ - gTaskMgr.shutdown(); -} - - -#if USE_TBB -/// -/// TbbBodyAdapter -- Converts a body object that implements the -/// "forLoop(int iBegin, int iEnd) const" function -/// into a TBB compatible object that takes a tbb::blocked_range type. -/// -template -struct TbbBodyAdapter -{ - const TBody* mBody; - - void operator()( const tbb::blocked_range& range ) const - { - mBody->forLoop( range.begin(), range.end() ); - } -}; -#endif // #if USE_TBB - -#if USE_PPL -/// -/// PplBodyAdapter -- Converts a body object that implements the -/// "forLoop(int iBegin, int iEnd) const" function -/// into a PPL compatible object that implements "void operator()( int ) const" -/// -template -struct PplBodyAdapter -{ - const TBody* mBody; - int mGrainSize; - int mIndexEnd; - - void operator()( int i ) const - { - mBody->forLoop( i, (std::min)(i + mGrainSize, mIndexEnd) ); - } -}; -#endif // #if USE_PPL - - -/// -/// parallelFor -- interface for submitting work expressed as a for loop to the worker threads -/// -template -void parallelFor( int iBegin, int iEnd, int grainSize, const TBody& body ) -{ -#if USE_OPENMP - if ( gTaskMgr.getApi() == TaskManager::apiOpenMP ) - { -#pragma omp parallel for schedule(static, 1) - for ( int i = iBegin; i < iEnd; i += grainSize ) - { - body.forLoop( i, (std::min)( i + grainSize, iEnd ) ); - } - return; - } -#endif // #if USE_OPENMP - -#if USE_PPL - if ( gTaskMgr.getApi() == TaskManager::apiPpl ) - { - // PPL dispatch - PplBodyAdapter pplBody; - pplBody.mBody = &body; - pplBody.mGrainSize = grainSize; - pplBody.mIndexEnd = iEnd; - // note: MSVC 2010 doesn't support partitioner args, so avoid them - concurrency::parallel_for( iBegin, - iEnd, - grainSize, - pplBody - ); - return; - } -#endif //#if USE_PPL - -#if USE_TBB - if ( gTaskMgr.getApi() == TaskManager::apiTbb ) - { - // TBB dispatch - TbbBodyAdapter tbbBody; - tbbBody.mBody = &body; - tbb::parallel_for( tbb::blocked_range( iBegin, iEnd, grainSize ), - tbbBody, - tbb::simple_partitioner() - ); - return; - } -#endif // #if USE_TBB - - { - // run on main thread - body.forLoop( iBegin, iEnd ); - } - -} - - - - diff --git a/src/BulletCollision/CMakeLists.txt b/src/BulletCollision/CMakeLists.txt index 67afc7e0c..85c5fc8b6 100644 --- a/src/BulletCollision/CMakeLists.txt +++ b/src/BulletCollision/CMakeLists.txt @@ -15,6 +15,7 @@ SET(BulletCollision_SRCS CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp CollisionDispatch/btBoxBoxDetector.cpp CollisionDispatch/btCollisionDispatcher.cpp + CollisionDispatch/btCollisionDispatcherMt.cpp CollisionDispatch/btCollisionObject.cpp CollisionDispatch/btCollisionWorld.cpp CollisionDispatch/btCollisionWorldImporter.cpp @@ -123,6 +124,7 @@ SET(CollisionDispatch_HDRS CollisionDispatch/btCollisionConfiguration.h CollisionDispatch/btCollisionCreateFunc.h CollisionDispatch/btCollisionDispatcher.h + CollisionDispatch/btCollisionDispatcherMt.h CollisionDispatch/btCollisionObject.h CollisionDispatch/btCollisionObjectWrapper.h CollisionDispatch/btCollisionWorld.h diff --git a/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp b/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp new file mode 100644 index 000000000..57cd050e5 --- /dev/null +++ b/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp @@ -0,0 +1,164 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#include "btCollisionDispatcherMt.h" +#include "LinearMath/btQuickprof.h" + +#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h" + +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" +#include "LinearMath/btPoolAllocator.h" +#include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h" +#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h" + + +btCollisionDispatcherMt::btCollisionDispatcherMt( btCollisionConfiguration* config, int grainSize ) + : btCollisionDispatcher( config ) +{ + m_batchUpdating = false; + m_grainSize = grainSize; // iterations per task +} + + +btPersistentManifold* btCollisionDispatcherMt::getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 ) +{ + //optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance) + + btScalar contactBreakingThreshold = ( m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD ) ? + btMin( body0->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ), body1->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ) ) + : gContactBreakingThreshold; + + btScalar contactProcessingThreshold = btMin( body0->getContactProcessingThreshold(), body1->getContactProcessingThreshold() ); + + void* mem = m_persistentManifoldPoolAllocator->allocate( sizeof( btPersistentManifold ) ); + if ( NULL == mem ) + { + //we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert. + if ( ( m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION ) == 0 ) + { + mem = btAlignedAlloc( sizeof( btPersistentManifold ), 16 ); + } + else + { + btAssert( 0 ); + //make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration + return 0; + } + } + btPersistentManifold* manifold = new( mem ) btPersistentManifold( body0, body1, 0, contactBreakingThreshold, contactProcessingThreshold ); + if ( !m_batchUpdating ) + { + // batch updater will update manifold pointers array after finishing, so + // only need to update array when not batch-updating + btAssert( !btThreadsAreRunning() ); + manifold->m_index1a = m_manifoldsPtr.size(); + m_manifoldsPtr.push_back( manifold ); + } + + return manifold; +} + +void btCollisionDispatcherMt::releaseManifold( btPersistentManifold* manifold ) +{ + clearManifold( manifold ); + btAssert( !btThreadsAreRunning() ); + if ( !m_batchUpdating ) + { + // batch updater will update manifold pointers array after finishing, so + // only need to update array when not batch-updating + int findIndex = manifold->m_index1a; + btAssert( findIndex < m_manifoldsPtr.size() ); + m_manifoldsPtr.swap( findIndex, m_manifoldsPtr.size() - 1 ); + m_manifoldsPtr[ findIndex ]->m_index1a = findIndex; + m_manifoldsPtr.pop_back(); + } + + manifold->~btPersistentManifold(); + if ( m_persistentManifoldPoolAllocator->validPtr( manifold ) ) + { + m_persistentManifoldPoolAllocator->freeMemory( manifold ); + } + else + { + btAlignedFree( manifold ); + } +} + +struct CollisionDispatcherUpdater : public btIParallelForBody +{ + btBroadphasePair* mPairArray; + btNearCallback mCallback; + btCollisionDispatcher* mDispatcher; + const btDispatcherInfo* mInfo; + + CollisionDispatcherUpdater() + { + mPairArray = NULL; + mCallback = NULL; + mDispatcher = NULL; + mInfo = NULL; + } + void forLoop( int iBegin, int iEnd ) const + { + for ( int i = iBegin; i < iEnd; ++i ) + { + btBroadphasePair* pair = &mPairArray[ i ]; + mCallback( *pair, *mDispatcher, *mInfo ); + } + } +}; + + +void btCollisionDispatcherMt::dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher ) +{ + int pairCount = pairCache->getNumOverlappingPairs(); + if ( pairCount == 0 ) + { + return; + } + CollisionDispatcherUpdater updater; + updater.mCallback = getNearCallback(); + updater.mPairArray = pairCache->getOverlappingPairArrayPtr(); + updater.mDispatcher = this; + updater.mInfo = &info; + + m_batchUpdating = true; + btParallelFor( 0, pairCount, m_grainSize, updater ); + m_batchUpdating = false; + + // reconstruct the manifolds array to ensure determinism + m_manifoldsPtr.resizeNoInitialize( 0 ); + + btBroadphasePair* pairs = pairCache->getOverlappingPairArrayPtr(); + for ( int i = 0; i < pairCount; ++i ) + { + if (btCollisionAlgorithm* algo = pairs[ i ].m_algorithm) + { + algo->getAllContactManifolds( m_manifoldsPtr ); + } + } + + // update the indices (used when releasing manifolds) + for ( int i = 0; i < m_manifoldsPtr.size(); ++i ) + { + m_manifoldsPtr[ i ]->m_index1a = i; + } +} + + diff --git a/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h b/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h new file mode 100644 index 000000000..f1d7eafdc --- /dev/null +++ b/src/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h @@ -0,0 +1,39 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BT_COLLISION_DISPATCHER_MT_H +#define BT_COLLISION_DISPATCHER_MT_H + +#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h" +#include "LinearMath/btThreads.h" + + +class btCollisionDispatcherMt : public btCollisionDispatcher +{ +public: + btCollisionDispatcherMt( btCollisionConfiguration* config, int grainSize = 40 ); + + virtual btPersistentManifold* getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 ) BT_OVERRIDE; + virtual void releaseManifold( btPersistentManifold* manifold ) BT_OVERRIDE; + + virtual void dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher ) BT_OVERRIDE; + +protected: + bool m_batchUpdating; + int m_grainSize; +}; + +#endif //BT_COLLISION_DISPATCHER_MT_H + diff --git a/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp b/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp index 5e51a994c..faa8193dd 100644 --- a/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp +++ b/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp @@ -108,8 +108,105 @@ struct InplaceSolverIslandCallbackMt : public btSimulationIslandManagerMt::Islan }; +/// +/// btConstraintSolverPoolMt +/// -btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration) +btConstraintSolverPoolMt::ThreadSolver* btConstraintSolverPoolMt::getAndLockThreadSolver() +{ + int i = btGetCurrentThreadIndex() % m_solvers.size(); + while ( true ) + { + ThreadSolver& solver = m_solvers[ i ]; + if ( solver.mutex.tryLock() ) + { + return &solver; + } + // failed, try the next one + i = ( i + 1 ) % m_solvers.size(); + } + return NULL; +} + +void btConstraintSolverPoolMt::init( btConstraintSolver** solvers, int numSolvers ) +{ + m_solverType = BT_SEQUENTIAL_IMPULSE_SOLVER; + m_solvers.resize( numSolvers ); + for ( int i = 0; i < numSolvers; ++i ) + { + m_solvers[ i ].solver = solvers[ i ]; + } + if ( numSolvers > 0 ) + { + m_solverType = solvers[ 0 ]->getSolverType(); + } +} + +// create the solvers for me +btConstraintSolverPoolMt::btConstraintSolverPoolMt( int numSolvers ) +{ + btAlignedObjectArray solvers; + solvers.reserve( numSolvers ); + for ( int i = 0; i < numSolvers; ++i ) + { + btConstraintSolver* solver = new btSequentialImpulseConstraintSolver(); + solvers.push_back( solver ); + } + init( &solvers[ 0 ], numSolvers ); +} + +// pass in fully constructed solvers (destructor will delete them) +btConstraintSolverPoolMt::btConstraintSolverPoolMt( btConstraintSolver** solvers, int numSolvers ) +{ + init( solvers, numSolvers ); +} + +btConstraintSolverPoolMt::~btConstraintSolverPoolMt() +{ + // delete all solvers + for ( int i = 0; i < m_solvers.size(); ++i ) + { + ThreadSolver& solver = m_solvers[ i ]; + delete solver.solver; + solver.solver = NULL; + } +} + +///solve a group of constraints +btScalar btConstraintSolverPoolMt::solveGroup( btCollisionObject** bodies, + int numBodies, + btPersistentManifold** manifolds, + int numManifolds, + btTypedConstraint** constraints, + int numConstraints, + const btContactSolverInfo& info, + btIDebugDraw* debugDrawer, + btDispatcher* dispatcher +) +{ + ThreadSolver* ts = getAndLockThreadSolver(); + ts->solver->solveGroup( bodies, numBodies, manifolds, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher ); + ts->mutex.unlock(); + return 0.0f; +} + +void btConstraintSolverPoolMt::reset() +{ + for ( int i = 0; i < m_solvers.size(); ++i ) + { + ThreadSolver& solver = m_solvers[ i ]; + solver.mutex.lock(); + solver.solver->reset(); + solver.mutex.unlock(); + } +} + + +/// +/// btDiscreteDynamicsWorldMt +/// + +btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btConstraintSolverPoolMt* constraintSolver, btCollisionConfiguration* collisionConfiguration) : btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration) { if (m_ownsIslandManager) @@ -124,8 +221,8 @@ btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,bt { void* mem = btAlignedAlloc(sizeof(btSimulationIslandManagerMt),16); btSimulationIslandManagerMt* im = new (mem) btSimulationIslandManagerMt(); - m_islandManager = im; im->setMinimumSolverBatchSize( m_solverInfo.m_minimumSolverBatchSize ); + m_islandManager = im; } } @@ -145,7 +242,7 @@ btDiscreteDynamicsWorldMt::~btDiscreteDynamicsWorldMt() } -void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo) +void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo) { BT_PROFILE("solveConstraints"); @@ -160,3 +257,65 @@ void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo } +struct UpdaterUnconstrainedMotion : public btIParallelForBody +{ + btScalar timeStep; + btRigidBody** rigidBodies; + + void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE + { + for ( int i = iBegin; i < iEnd; ++i ) + { + btRigidBody* body = rigidBodies[ i ]; + if ( !body->isStaticOrKinematicObject() ) + { + //don't integrate/update velocities here, it happens in the constraint solver + body->applyDamping( timeStep ); + body->predictIntegratedTransform( timeStep, body->getInterpolationWorldTransform() ); + } + } + } +}; + + +void btDiscreteDynamicsWorldMt::predictUnconstraintMotion( btScalar timeStep ) +{ + BT_PROFILE( "predictUnconstraintMotion" ); + int grainSize = 50; // num of iterations per task for TBB + int bodyCount = m_nonStaticRigidBodies.size(); + UpdaterUnconstrainedMotion update; + update.timeStep = timeStep; + update.rigidBodies = bodyCount ? &m_nonStaticRigidBodies[ 0 ] : NULL; + btParallelFor( 0, bodyCount, grainSize, update ); +} + + +void btDiscreteDynamicsWorldMt::createPredictiveContacts( btScalar timeStep ) +{ + releasePredictiveContacts(); + int grainSize = 50; // num of iterations per task for TBB or OPENMP + if ( int bodyCount = m_nonStaticRigidBodies.size() ) + { + UpdaterCreatePredictiveContacts update; + update.world = this; + update.timeStep = timeStep; + update.rigidBodies = &m_nonStaticRigidBodies[ 0 ]; + btParallelFor( 0, bodyCount, grainSize, update ); + } +} + + +void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep ) +{ + BT_PROFILE( "integrateTransforms" ); + int grainSize = 50; // num of iterations per task for TBB or OPENMP + if ( int bodyCount = m_nonStaticRigidBodies.size() ) + { + UpdaterIntegrateTransforms update; + update.world = this; + update.timeStep = timeStep; + update.rigidBodies = &m_nonStaticRigidBodies[ 0 ]; + btParallelFor( 0, bodyCount, grainSize, update ); + } +} + diff --git a/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h b/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h index b28371b51..2f144cdda 100644 --- a/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h +++ b/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h @@ -18,24 +18,116 @@ subject to the following restrictions: #define BT_DISCRETE_DYNAMICS_WORLD_MT_H #include "btDiscreteDynamicsWorld.h" +#include "btSimulationIslandManagerMt.h" +#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h" struct InplaceSolverIslandCallbackMt; +/// +/// btConstraintSolverPoolMt - masquerades as a constraint solver, but really it is a threadsafe pool of them. +/// +/// Each solver in the pool is protected by a mutex. When solveGroup is called from a thread, +/// the pool looks for a solver that isn't being used by another thread, locks it, and dispatches the +/// call to the solver. +/// So long as there are at least as many solvers as there are hardware threads, it should never need to +/// spin wait. +/// +class btConstraintSolverPoolMt : public btConstraintSolver +{ +public: + // create the solvers for me + explicit btConstraintSolverPoolMt( int numSolvers ); + + // pass in fully constructed solvers (destructor will delete them) + btConstraintSolverPoolMt( btConstraintSolver** solvers, int numSolvers ); + + virtual ~btConstraintSolverPoolMt(); + + ///solve a group of constraints + virtual btScalar solveGroup( btCollisionObject** bodies, + int numBodies, + btPersistentManifold** manifolds, + int numManifolds, + btTypedConstraint** constraints, + int numConstraints, + const btContactSolverInfo& info, + btIDebugDraw* debugDrawer, + btDispatcher* dispatcher + ) BT_OVERRIDE; + + virtual void reset() BT_OVERRIDE; + virtual btConstraintSolverType getSolverType() const BT_OVERRIDE { return m_solverType; } + +private: + const static size_t kCacheLineSize = 128; + struct ThreadSolver + { + btConstraintSolver* solver; + btSpinMutex mutex; + char _cachelinePadding[ kCacheLineSize - sizeof( btSpinMutex ) - sizeof( void* ) ]; // keep mutexes from sharing a cache line + }; + btAlignedObjectArray m_solvers; + btConstraintSolverType m_solverType; + + ThreadSolver* getAndLockThreadSolver(); + void init( btConstraintSolver** solvers, int numSolvers ); +}; + + + /// /// btDiscreteDynamicsWorldMt -- a version of DiscreteDynamicsWorld with some minor changes to support /// solving simulation islands on multiple threads. /// +/// Should function exactly like btDiscreteDynamicsWorld. +/// Also 3 methods that iterate over all of the rigidbodies can run in parallel: +/// - predictUnconstraintMotion +/// - integrateTransforms +/// - createPredictiveContacts +/// ATTRIBUTE_ALIGNED16(class) btDiscreteDynamicsWorldMt : public btDiscreteDynamicsWorld { protected: InplaceSolverIslandCallbackMt* m_solverIslandCallbackMt; - virtual void solveConstraints(btContactSolverInfo& solverInfo); + virtual void solveConstraints(btContactSolverInfo& solverInfo) BT_OVERRIDE; + + virtual void predictUnconstraintMotion( btScalar timeStep ) BT_OVERRIDE; + + struct UpdaterCreatePredictiveContacts : public btIParallelForBody + { + btScalar timeStep; + btRigidBody** rigidBodies; + btDiscreteDynamicsWorldMt* world; + + void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE + { + world->createPredictiveContactsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep ); + } + }; + virtual void createPredictiveContacts( btScalar timeStep ) BT_OVERRIDE; + + struct UpdaterIntegrateTransforms : public btIParallelForBody + { + btScalar timeStep; + btRigidBody** rigidBodies; + btDiscreteDynamicsWorldMt* world; + + void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE + { + world->integrateTransformsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep ); + } + }; + virtual void integrateTransforms( btScalar timeStep ) BT_OVERRIDE; public: BT_DECLARE_ALIGNED_ALLOCATOR(); - btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration); + btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, + btBroadphaseInterface* pairCache, + btConstraintSolverPoolMt* constraintSolver, // Note this should be a solver-pool for multi-threading + btCollisionConfiguration* collisionConfiguration + ); virtual ~btDiscreteDynamicsWorldMt(); }; diff --git a/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp b/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp index ad63b6ee0..6373f60ff 100644 --- a/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp +++ b/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp @@ -15,6 +15,7 @@ subject to the following restrictions: #include "LinearMath/btScalar.h" +#include "LinearMath/btThreads.h" #include "btSimulationIslandManagerMt.h" #include "BulletCollision/BroadphaseCollision/btDispatcher.h" #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h" @@ -44,7 +45,7 @@ btSimulationIslandManagerMt::btSimulationIslandManagerMt() { m_minimumSolverBatchSize = calcBatchCost(0, 128, 0); m_batchIslandMinBodyCount = 32; - m_islandDispatch = defaultIslandDispatch; + m_islandDispatch = parallelIslandDispatch; m_batchIsland = NULL; } @@ -545,7 +546,7 @@ void btSimulationIslandManagerMt::mergeIslands() } -void btSimulationIslandManagerMt::defaultIslandDispatch( btAlignedObjectArray* islandsPtr, IslandCallback* callback ) +void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray* islandsPtr, IslandCallback* callback ) { // serial dispatch btAlignedObjectArray& islands = *islandsPtr; @@ -565,6 +566,40 @@ void btSimulationIslandManagerMt::defaultIslandDispatch( btAlignedObjectArray* islandsPtr; + btSimulationIslandManagerMt::IslandCallback* callback; + + void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE + { + for ( int i = iBegin; i < iEnd; ++i ) + { + btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ]; + btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL; + btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL; + callback->processIsland( &island->bodyArray[ 0 ], + island->bodyArray.size(), + manifolds, + island->manifoldArray.size(), + constraintsPtr, + island->constraintArray.size(), + island->id + ); + } + } +}; + +void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray* islandsPtr, IslandCallback* callback ) +{ + int grainSize = 1; // iterations per task + UpdateIslandDispatcher dispatcher; + dispatcher.islandsPtr = islandsPtr; + dispatcher.callback = callback; + btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher ); +} + + ///@todo: this is random access, it can be walked 'cache friendly'! void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatcher, btCollisionWorld* collisionWorld, diff --git a/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.h b/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.h index 117061623..e3e0ea523 100644 --- a/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.h +++ b/src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.h @@ -59,7 +59,8 @@ public: ) = 0; }; typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray* islands, IslandCallback* callback ); - static void defaultIslandDispatch( btAlignedObjectArray* islands, IslandCallback* callback ); + static void serialIslandDispatch( btAlignedObjectArray* islands, IslandCallback* callback ); + static void parallelIslandDispatch( btAlignedObjectArray* islandsPtr, IslandCallback* callback ); protected: btAlignedObjectArray m_allocatedIslands; // owner of all Islands btAlignedObjectArray m_activeIslands; // islands actively in use diff --git a/src/LinearMath/btThreads.cpp b/src/LinearMath/btThreads.cpp index b72301a2e..3c73255a1 100644 --- a/src/LinearMath/btThreads.cpp +++ b/src/LinearMath/btThreads.cpp @@ -14,6 +14,247 @@ subject to the following restrictions: #include "btThreads.h" +#include // for min and max + +#if BT_THREADSAFE + +#if BT_USE_OPENMP + +#include + +#endif // #if BT_USE_OPENMP + + +#if BT_USE_PPL + +// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later) +#include // if you get a compile error here, check whether your version of Visual Studio includes PPL +// Visual Studio 2010 and later should come with it +#include // for GetProcessorCount() + +#endif // #if BT_USE_PPL + + +#if BT_USE_TBB + +// use Intel Threading Building Blocks for thread management +#define __TBB_NO_IMPLICIT_LINKAGE 1 +#include +#include +#include +#include + +#endif // #if BT_USE_TBB + + +static btITaskScheduler* gBtTaskScheduler; +static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls +static btSpinMutex gThreadsRunningCounterMutex; + +void btPushThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter++; + gThreadsRunningCounterMutex.unlock(); +} + +void btPopThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter--; + gThreadsRunningCounterMutex.unlock(); +} + +bool btThreadsAreRunning() +{ + return gThreadsRunningCounter != 0; +} + + +void btSetTaskScheduler( btITaskScheduler* ts ) +{ + gBtTaskScheduler = ts; +} + +btITaskScheduler* btGetTaskScheduler() +{ + return gBtTaskScheduler; +} + +void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) +{ + gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body ); +} + + +#if BT_USE_OPENMP +/// +/// btTaskSchedulerOpenMP -- OpenMP task scheduler implementation +/// +class btTaskSchedulerOpenMP : public btITaskScheduler +{ + int m_numThreads; +public: + btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" ) + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return omp_get_max_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + m_numThreads = ( std::max )( 1, numThreads ); + omp_set_num_threads( m_numThreads ); + } + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + btPushThreadsAreRunning(); +#pragma omp parallel for schedule( static, 1 ) + for ( int i = iBegin; i < iEnd; i += grainSize ) + { + body.forLoop( i, ( std::min )( i + grainSize, iEnd ) ); + } + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_OPENMP + + +#if BT_USE_TBB +/// +/// btTaskSchedulerTBB -- task scheduler implemented via Intel Threaded Building Blocks +/// +class btTaskSchedulerTBB : public btITaskScheduler +{ + int m_numThreads; + tbb::task_scheduler_init* m_tbbSchedulerInit; + +public: + btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" ) + { + m_numThreads = 0; + m_tbbSchedulerInit = NULL; + } + ~btTaskSchedulerTBB() + { + if ( m_tbbSchedulerInit ) + { + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + } + + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return tbb::task_scheduler_init::default_num_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + m_numThreads = ( std::max )( 1, numThreads ); + if ( m_tbbSchedulerInit ) + { + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads ); + } + struct BodyAdapter + { + const btIParallelForBody* mBody; + + void operator()( const tbb::blocked_range& range ) const + { + mBody->forLoop( range.begin(), range.end() ); + } + }; + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + // TBB dispatch + BodyAdapter tbbBody; + tbbBody.mBody = &body; + btPushThreadsAreRunning(); + tbb::parallel_for( tbb::blocked_range( iBegin, iEnd, grainSize ), + tbbBody, + tbb::simple_partitioner() + ); + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_TBB + +#if BT_USE_PPL +/// +/// btTaskSchedulerPPL -- task scheduler implemented via Microsoft Parallel Patterns Lib +/// +class btTaskSchedulerPPL : public btITaskScheduler +{ + int m_numThreads; +public: + btTaskSchedulerPPL() : btITaskScheduler( "PPL" ) + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return concurrency::GetProcessorCount(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE + { + m_numThreads = ( std::max )( 1, numThreads ); + using namespace concurrency; + if ( CurrentScheduler::Id() != -1 ) + { + CurrentScheduler::Detach(); + } + SchedulerPolicy policy; + policy.SetConcurrencyLimits( m_numThreads, m_numThreads ); + CurrentScheduler::Create( policy ); + } + struct BodyAdapter + { + const btIParallelForBody* mBody; + int mGrainSize; + int mIndexEnd; + + void operator()( int i ) const + { + mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) ); + } + }; + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + // PPL dispatch + BodyAdapter pplBody; + pplBody.mBody = &body; + pplBody.mGrainSize = grainSize; + pplBody.mIndexEnd = iEnd; + btPushThreadsAreRunning(); + // note: MSVC 2010 doesn't support partitioner args, so avoid them + concurrency::parallel_for( iBegin, + iEnd, + grainSize, + pplBody + ); + btPopThreadsAreRunning(); + } +}; +#endif // #if BT_USE_PPL + + // // Lightweight spin-mutex based on atomics @@ -22,8 +263,6 @@ subject to the following restrictions: // context switching. // -#if BT_THREADSAFE - #if __cplusplus >= 201103L // for anything claiming full C++11 compliance, use C++11 atomics @@ -229,3 +468,64 @@ bool btSpinMutex::tryLock() #endif // #if BT_THREADSAFE + +/// +/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler +/// (fallback in case no multi-threaded schedulers are available) +/// +class btTaskSchedulerSequential : public btITaskScheduler +{ +public: + btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {} + virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; } + virtual int getNumThreads() const BT_OVERRIDE { return 1; } + virtual void setNumThreads( int numThreads ) BT_OVERRIDE {} + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE + { + body.forLoop( iBegin, iEnd ); + } +}; + +// create a non-threaded task scheduler (always available) +btITaskScheduler* btGetSequentialTaskScheduler() +{ + static btTaskSchedulerSequential sTaskScheduler; + return &sTaskScheduler; +} + + +// create an OpenMP task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetOpenMPTaskScheduler() +{ +#if BT_USE_OPENMP && BT_THREADSAFE + static btTaskSchedulerOpenMP sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} + + +// create an Intel TBB task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetTBBTaskScheduler() +{ +#if BT_USE_TBB && BT_THREADSAFE + static btTaskSchedulerTBB sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} + + +// create a PPL task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetPPLTaskScheduler() +{ +#if BT_USE_PPL && BT_THREADSAFE + static btTaskSchedulerPPL sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} + diff --git a/src/LinearMath/btThreads.h b/src/LinearMath/btThreads.h index a15e5250b..b592ec3e5 100644 --- a/src/LinearMath/btThreads.h +++ b/src/LinearMath/btThreads.h @@ -19,6 +19,15 @@ subject to the following restrictions: #include "btScalar.h" // has definitions like SIMD_FORCE_INLINE +#if defined (_MSC_VER) && _MSC_VER >= 1600 +// give us a compile error if any signatures of overriden methods is changed +#define BT_OVERRIDE override +#endif + +#ifndef BT_OVERRIDE +#define BT_OVERRIDE +#endif + /// /// btSpinMutex -- lightweight spin-mutex implemented with atomic ops, never puts /// a thread to sleep because it is designed to be used with a task scheduler @@ -59,6 +68,7 @@ SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* mutex ) // for internal use only bool btIsMainThread(); +bool btThreadsAreRunning(); unsigned int btGetCurrentThreadIndex(); const unsigned int BT_MAX_THREAD_COUNT = 64; @@ -71,5 +81,55 @@ SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* ) {} SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* ) {return true;} #endif +// +// btIParallelForBody -- subclass this to express work that can be done in parallel +// +class btIParallelForBody +{ +public: + virtual void forLoop( int iBegin, int iEnd ) const = 0; +}; + +// +// btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to +// worker threads +// +class btITaskScheduler +{ + const char* m_name; +public: + btITaskScheduler( const char* name ) : m_name( name ) {} + const char* getName() const { return m_name; } + + virtual ~btITaskScheduler() {} + virtual int getMaxNumThreads() const = 0; + virtual int getNumThreads() const = 0; + virtual void setNumThreads( int numThreads ) = 0; + virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0; +}; + +// set the task scheduler to use for all calls to btParallelFor() +// NOTE: you must set this prior to using any of the multi-threaded "Mt" classes +void btSetTaskScheduler( btITaskScheduler* ts ); + +// get the current task scheduler +btITaskScheduler* btGetTaskScheduler(); + +// get non-threaded task scheduler (always available) +btITaskScheduler* btGetSequentialTaskScheduler(); + +// get OpenMP task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetOpenMPTaskScheduler(); + +// get Intel TBB task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetTBBTaskScheduler(); + +// get PPL task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetPPLTaskScheduler(); + +// btParallelFor -- call this to dispatch work like a for-loop +// (iterations may be done out of order, so no dependencies are allowed) +void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ); + #endif //BT_THREADS_H