Bullet 2 threading refactor: moved parallel-for calls into core libs

This commit is contained in:
Lunkhound
2017-05-22 00:47:11 -07:00
parent 2f3844e5db
commit dfe184e8d3
14 changed files with 1012 additions and 847 deletions

View File

@@ -32,7 +32,6 @@ subject to the following restrictions:
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btTransform.h"
#include "../MultiThreadedDemo/ParallelFor.h"
class btDynamicsWorld;
@@ -230,7 +229,7 @@ public:
}
}
struct CastRaysLoopBody
struct CastRaysLoopBody : public btIParallelForBody
{
btCollisionWorld* mWorld;
btRaycastBar2* mRaycasts;
@@ -274,7 +273,7 @@ public:
{
CastRaysLoopBody rayLooper(cw, this);
int grainSize = 20; // number of raycasts per task
parallelFor( 0, NUMRAYS, grainSize, rayLooper );
btParallelFor( 0, NUMRAYS, grainSize, rayLooper );
}
else
#endif // USE_PARALLEL_RAYCASTS

View File

@@ -110,29 +110,6 @@ ELSE(WIN32)
ENDIF(APPLE)
ENDIF(WIN32)
IF (BULLET2_MULTITHREADED_OPEN_MP_DEMO)
ADD_DEFINITIONS("-DBT_USE_OPENMP=1")
IF (MSVC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
ELSE (MSVC)
# GCC, Clang
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
ENDIF (MSVC)
ENDIF (BULLET2_MULTITHREADED_OPEN_MP_DEMO)
IF (BULLET2_MULTITHREADED_PPL_DEMO)
ADD_DEFINITIONS("-DBT_USE_PPL=1")
ENDIF (BULLET2_MULTITHREADED_PPL_DEMO)
IF (BULLET2_MULTITHREADED_TBB_DEMO)
SET (BULLET2_TBB_INCLUDE_DIR "not found" CACHE PATH "Directory for Intel TBB includes.")
SET (BULLET2_TBB_LIB_DIR "not found" CACHE PATH "Directory for Intel TBB libraries.")
find_library(TBB_LIBRARY tbb PATHS ${BULLET2_TBB_LIB_DIR})
find_library(TBBMALLOC_LIBRARY tbbmalloc PATHS ${BULLET2_TBB_LIB_DIR})
ADD_DEFINITIONS("-DBT_USE_TBB=1")
INCLUDE_DIRECTORIES( ${BULLET2_TBB_INCLUDE_DIR} )
LINK_LIBRARIES( ${TBB_LIBRARY} ${TBBMALLOC_LIBRARY} )
ENDIF (BULLET2_MULTITHREADED_TBB_DEMO)
SET(ExtendedTutorialsSources
../ExtendedTutorials/Chain.cpp
@@ -207,7 +184,6 @@ SET(BulletExampleBrowser_SRCS
../MultiThreadedDemo/MultiThreadedDemo.h
../MultiThreadedDemo/CommonRigidBodyMTBase.cpp
../MultiThreadedDemo/CommonRigidBodyMTBase.h
../MultiThreadedDemo/ParallelFor.h
../Tutorial/Tutorial.cpp
../Tutorial/Tutorial.h
../Tutorial/Dof6ConstraintTutorial.cpp
@@ -386,7 +362,7 @@ ADD_CUSTOM_COMMAND(
COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${BULLET_PHYSICS_SOURCE_DIR}/data ${PROJECT_BINARY_DIR}/data
)
IF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32)
IF (BULLET2_USE_TBB_MULTITHREADING AND WIN32)
# add a post build command to copy some dlls to the executable directory
set(TBB_VC_VER "vc12")
set(TBB_VC_ARCH "ia32")
@@ -400,7 +376,7 @@ IF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32)
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${BULLET2_TBB_INCLUDE_DIR}/../bin/${TBB_VC_ARCH}/${TBB_VC_VER}/tbbmalloc.dll"
$<TARGET_FILE_DIR:App_ExampleBrowser>)
ENDIF (BULLET2_MULTITHREADED_TBB_DEMO AND WIN32)
ENDIF (BULLET2_USE_TBB_MULTITHREADING AND WIN32)
IF (INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES)

View File

@@ -23,10 +23,10 @@ class btCollisionShape;
#include "CommonRigidBodyMTBase.h"
#include "../CommonInterfaces/CommonParameterInterface.h"
#include "ParallelFor.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btPoolAllocator.h"
#include "btBulletCollisionCommon.h"
#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h"
#include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands()
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
@@ -36,20 +36,6 @@ class btCollisionShape;
#include "BulletDynamics/MLCPSolvers/btDantzigSolver.h"
#include "BulletDynamics/MLCPSolvers/btLemkeSolver.h"
TaskManager gTaskMgr;
#define USE_PARALLEL_NARROWPHASE 1 // detect collisions in parallel
#define USE_PARALLEL_ISLAND_SOLVER 1 // solve simulation islands in parallel
#define USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS 1
#define USE_PARALLEL_INTEGRATE_TRANSFORMS 1
#define USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION 1
#if defined (_MSC_VER) && _MSC_VER >= 1600
// give us a compile error if any signatures of overriden methods is changed
#define BT_OVERRIDE override
#else
#define BT_OVERRIDE
#endif
static int gNumIslands = 0;
@@ -124,7 +110,7 @@ public:
};
Profiler gProfiler;
static Profiler gProfiler;
class ProfileHelper
{
@@ -141,457 +127,84 @@ public:
}
};
int gThreadsRunningCounter = 0;
btSpinMutex gThreadsRunningCounterMutex;
void btPushThreadsAreRunning()
static void profileBeginCallback( btDynamicsWorld *world, btScalar timeStep )
{
gThreadsRunningCounterMutex.lock();
gThreadsRunningCounter++;
gThreadsRunningCounterMutex.unlock();
gProfiler.begin( Profiler::kRecordInternalTimeStep );
}
void btPopThreadsAreRunning()
static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep )
{
gThreadsRunningCounterMutex.lock();
gThreadsRunningCounter--;
gThreadsRunningCounterMutex.unlock();
}
bool btThreadsAreRunning()
{
return gThreadsRunningCounter != 0;
gProfiler.end( Profiler::kRecordInternalTimeStep );
}
#if USE_PARALLEL_NARROWPHASE
class MyCollisionDispatcher : public btCollisionDispatcher
///
/// MyCollisionDispatcher -- subclassed for profiling purposes
///
class MyCollisionDispatcher : public btCollisionDispatcherMt
{
btSpinMutex m_manifoldPtrsMutex;
typedef btCollisionDispatcherMt ParentClass;
public:
MyCollisionDispatcher( btCollisionConfiguration* config ) : btCollisionDispatcher( config )
MyCollisionDispatcher( btCollisionConfiguration* config, int grainSize ) : btCollisionDispatcherMt( config, grainSize )
{
}
virtual ~MyCollisionDispatcher()
{
}
btPersistentManifold* getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 ) BT_OVERRIDE
{
// added spin-locks
//optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance)
btScalar contactBreakingThreshold = ( m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD ) ?
btMin( body0->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ), body1->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ) )
: gContactBreakingThreshold;
btScalar contactProcessingThreshold = btMin( body0->getContactProcessingThreshold(), body1->getContactProcessingThreshold() );
void* mem = m_persistentManifoldPoolAllocator->allocate( sizeof( btPersistentManifold ) );
if (NULL == mem)
{
//we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert.
if ( ( m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION ) == 0 )
{
mem = btAlignedAlloc( sizeof( btPersistentManifold ), 16 );
}
else
{
btAssert( 0 );
//make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration
return 0;
}
}
btPersistentManifold* manifold = new(mem) btPersistentManifold( body0, body1, 0, contactBreakingThreshold, contactProcessingThreshold );
m_manifoldPtrsMutex.lock();
manifold->m_index1a = m_manifoldsPtr.size();
m_manifoldsPtr.push_back( manifold );
m_manifoldPtrsMutex.unlock();
return manifold;
}
void releaseManifold( btPersistentManifold* manifold ) BT_OVERRIDE
{
clearManifold( manifold );
m_manifoldPtrsMutex.lock();
int findIndex = manifold->m_index1a;
btAssert( findIndex < m_manifoldsPtr.size() );
m_manifoldsPtr.swap( findIndex, m_manifoldsPtr.size() - 1 );
m_manifoldsPtr[ findIndex ]->m_index1a = findIndex;
m_manifoldsPtr.pop_back();
m_manifoldPtrsMutex.unlock();
manifold->~btPersistentManifold();
if ( m_persistentManifoldPoolAllocator->validPtr( manifold ) )
{
m_persistentManifoldPoolAllocator->freeMemory( manifold );
}
else
{
btAlignedFree( manifold );
}
}
struct Updater
{
btBroadphasePair* mPairArray;
btNearCallback mCallback;
btCollisionDispatcher* mDispatcher;
const btDispatcherInfo* mInfo;
Updater()
{
mPairArray = NULL;
mCallback = NULL;
mDispatcher = NULL;
mInfo = NULL;
}
void forLoop( int iBegin, int iEnd ) const
{
for ( int i = iBegin; i < iEnd; ++i )
{
btBroadphasePair* pair = &mPairArray[ i ];
mCallback( *pair, *mDispatcher, *mInfo );
}
}
};
virtual void dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher ) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordDispatchAllCollisionPairs);
int grainSize = 40; // iterations per task
int pairCount = pairCache->getNumOverlappingPairs();
Updater updater;
updater.mCallback = getNearCallback();
updater.mPairArray = pairCount > 0 ? pairCache->getOverlappingPairArrayPtr() : NULL;
updater.mDispatcher = this;
updater.mInfo = &info;
btPushThreadsAreRunning();
parallelFor( 0, pairCount, grainSize, updater );
btPopThreadsAreRunning();
if (m_manifoldsPtr.size() < 1)
return;
// reconstruct the manifolds array to ensure determinism
m_manifoldsPtr.resizeNoInitialize(0);
btBroadphasePair* pairs = pairCache->getOverlappingPairArrayPtr();
for (int i = 0; i < pairCount; ++i)
{
btCollisionAlgorithm* algo = pairs[i].m_algorithm;
if (algo) algo->getAllContactManifolds(m_manifoldsPtr);
}
// update the indices (used when releasing manifolds)
for (int i = 0; i < m_manifoldsPtr.size(); ++i)
m_manifoldsPtr[i]->m_index1a = i;
ProfileHelper prof( Profiler::kRecordDispatchAllCollisionPairs );
ParentClass::dispatchAllCollisionPairs( pairCache, info, dispatcher );
}
};
#endif
#if USE_PARALLEL_ISLAND_SOLVER
///
/// MyConstraintSolverPool - masquerades as a constraint solver, but really it is a threadsafe pool of them.
///
/// Each solver in the pool is protected by a mutex. When solveGroup is called from a thread,
/// the pool looks for a solver that isn't being used by another thread, locks it, and dispatches the
/// call to the solver.
/// So long as there are at least as many solvers as there are hardware threads, it should never need to
/// spin wait.
///
class MyConstraintSolverPool : public btConstraintSolver
/// myParallelIslandDispatch -- wrap default parallel dispatch for profiling and to get the number of simulation islands
//
void myParallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback )
{
const static size_t kCacheLineSize = 128;
struct ThreadSolver
{
btConstraintSolver* solver;
btSpinMutex mutex;
char _cachelinePadding[ kCacheLineSize - sizeof( btSpinMutex ) - sizeof( void* ) ]; // keep mutexes from sharing a cache line
};
btAlignedObjectArray<ThreadSolver> m_solvers;
btConstraintSolverType m_solverType;
ThreadSolver* getAndLockThreadSolver()
{
while ( true )
{
for ( int i = 0; i < m_solvers.size(); ++i )
{
ThreadSolver& solver = m_solvers[ i ];
if ( solver.mutex.tryLock() )
{
return &solver;
}
}
}
return NULL;
}
void init( btConstraintSolver** solvers, int numSolvers )
{
m_solverType = BT_SEQUENTIAL_IMPULSE_SOLVER;
m_solvers.resize( numSolvers );
for ( int i = 0; i < numSolvers; ++i )
{
m_solvers[ i ].solver = solvers[ i ];
}
if ( numSolvers > 0 )
{
m_solverType = solvers[ 0 ]->getSolverType();
}
}
public:
// create the solvers for me
explicit MyConstraintSolverPool( int numSolvers )
{
btAlignedObjectArray<btConstraintSolver*> solvers;
solvers.reserve( numSolvers );
for ( int i = 0; i < numSolvers; ++i )
{
btConstraintSolver* solver = new btSequentialImpulseConstraintSolver();
solvers.push_back( solver );
}
init( &solvers[ 0 ], numSolvers );
}
// pass in fully constructed solvers (destructor will delete them)
MyConstraintSolverPool( btConstraintSolver** solvers, int numSolvers )
{
init( solvers, numSolvers );
}
virtual ~MyConstraintSolverPool()
{
// delete all solvers
for ( int i = 0; i < m_solvers.size(); ++i )
{
ThreadSolver& solver = m_solvers[ i ];
delete solver.solver;
solver.solver = NULL;
}
}
//virtual void prepareSolve( int /* numBodies */, int /* numManifolds */ ) { ; } // does nothing
///solve a group of constraints
virtual btScalar solveGroup( btCollisionObject** bodies,
int numBodies,
btPersistentManifold** manifolds,
int numManifolds,
btTypedConstraint** constraints,
int numConstraints,
const btContactSolverInfo& info,
btIDebugDraw* debugDrawer,
btDispatcher* dispatcher
)
{
ThreadSolver* solver = getAndLockThreadSolver();
solver->solver->solveGroup( bodies, numBodies, manifolds, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher );
solver->mutex.unlock();
return 0.0f;
}
//virtual void allSolved( const btContactSolverInfo& /* info */, class btIDebugDraw* /* debugDrawer */ ) { ; } // does nothing
///clear internal cached data and reset random seed
virtual void reset()
{
for ( int i = 0; i < m_solvers.size(); ++i )
{
ThreadSolver& solver = m_solvers[ i ];
solver.mutex.lock();
solver.solver->reset();
solver.mutex.unlock();
}
}
virtual btConstraintSolverType getSolverType() const
{
return m_solverType;
}
};
struct UpdateIslandDispatcher
{
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr;
btSimulationIslandManagerMt::IslandCallback* callback;
void forLoop( int iBegin, int iEnd ) const
{
for ( int i = iBegin; i < iEnd; ++i )
{
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ];
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
callback->processIsland( &island->bodyArray[ 0 ],
island->bodyArray.size(),
manifolds,
island->manifoldArray.size(),
constraintsPtr,
island->constraintArray.size(),
island->id
);
}
}
};
void parallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback )
{
ProfileHelper prof(Profiler::kRecordDispatchIslands);
ProfileHelper prof( Profiler::kRecordDispatchIslands );
gNumIslands = islandsPtr->size();
int grainSize = 1; // iterations per task
UpdateIslandDispatcher dispatcher;
dispatcher.islandsPtr = islandsPtr;
dispatcher.callback = callback;
btPushThreadsAreRunning();
parallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
btPopThreadsAreRunning();
}
#endif //#if USE_PARALLEL_ISLAND_SOLVER
void profileBeginCallback(btDynamicsWorld *world, btScalar timeStep)
{
gProfiler.begin(Profiler::kRecordInternalTimeStep);
btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, callback );
}
void profileEndCallback(btDynamicsWorld *world, btScalar timeStep)
{
gProfiler.end(Profiler::kRecordInternalTimeStep);
}
///
/// MyDiscreteDynamicsWorld
///
/// Should function exactly like btDiscreteDynamicsWorld.
/// 3 methods that iterate over all of the rigidbodies can run in parallel:
/// - predictUnconstraintMotion
/// - integrateTransforms
/// - createPredictiveContacts
/// MyDiscreteDynamicsWorld -- subclassed for profiling purposes
///
ATTRIBUTE_ALIGNED16( class ) MyDiscreteDynamicsWorld : public btDiscreteDynamicsWorldMt
{
typedef btDiscreteDynamicsWorld ParentClass;
protected:
#if USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION
struct UpdaterUnconstrainedMotion
{
btScalar timeStep;
btRigidBody** rigidBodies;
void forLoop( int iBegin, int iEnd ) const
{
for ( int i = iBegin; i < iEnd; ++i )
{
btRigidBody* body = rigidBodies[ i ];
if ( !body->isStaticOrKinematicObject() )
{
//don't integrate/update velocities here, it happens in the constraint solver
body->applyDamping( timeStep );
body->predictIntegratedTransform( timeStep, body->getInterpolationWorldTransform() );
}
}
}
};
virtual void predictUnconstraintMotion( btScalar timeStep ) BT_OVERRIDE
{
ProfileHelper prof( Profiler::kRecordPredictUnconstrainedMotion );
BT_PROFILE( "predictUnconstraintMotion" );
int grainSize = 50; // num of iterations per task for TBB
int bodyCount = m_nonStaticRigidBodies.size();
UpdaterUnconstrainedMotion update;
update.timeStep = timeStep;
update.rigidBodies = bodyCount ? &m_nonStaticRigidBodies[ 0 ] : NULL;
btPushThreadsAreRunning();
parallelFor( 0, bodyCount, grainSize, update );
btPopThreadsAreRunning();
ParentClass::predictUnconstraintMotion( timeStep );
}
#endif // #if USE_PARALLEL_PREDICT_UNCONSTRAINED_MOTION
#if USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS
struct UpdaterCreatePredictiveContacts
{
btScalar timeStep;
btRigidBody** rigidBodies;
MyDiscreteDynamicsWorld* world;
void forLoop( int iBegin, int iEnd ) const
{
world->createPredictiveContactsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep );
}
};
virtual void createPredictiveContacts( btScalar timeStep )
virtual void createPredictiveContacts( btScalar timeStep ) BT_OVERRIDE
{
ProfileHelper prof( Profiler::kRecordCreatePredictiveContacts );
releasePredictiveContacts();
int grainSize = 50; // num of iterations per task for TBB or OPENMP
if ( int bodyCount = m_nonStaticRigidBodies.size() )
{
UpdaterCreatePredictiveContacts update;
update.world = this;
update.timeStep = timeStep;
update.rigidBodies = &m_nonStaticRigidBodies[ 0 ];
btPushThreadsAreRunning();
parallelFor( 0, bodyCount, grainSize, update );
btPopThreadsAreRunning();
}
ParentClass::createPredictiveContacts( timeStep );
}
#endif // #if USE_PARALLEL_CREATE_PREDICTIVE_CONTACTS
#if USE_PARALLEL_INTEGRATE_TRANSFORMS
struct UpdaterIntegrateTransforms
{
btScalar timeStep;
btRigidBody** rigidBodies;
MyDiscreteDynamicsWorld* world;
void forLoop( int iBegin, int iEnd ) const
{
world->integrateTransformsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep );
}
};
virtual void integrateTransforms( btScalar timeStep ) BT_OVERRIDE
{
ProfileHelper prof( Profiler::kRecordIntegrateTransforms );
BT_PROFILE( "integrateTransforms" );
int grainSize = 50; // num of iterations per task for TBB or OPENMP
if ( int bodyCount = m_nonStaticRigidBodies.size() )
{
UpdaterIntegrateTransforms update;
update.world = this;
update.timeStep = timeStep;
update.rigidBodies = &m_nonStaticRigidBodies[ 0 ];
btPushThreadsAreRunning();
parallelFor( 0, bodyCount, grainSize, update );
btPopThreadsAreRunning();
}
ParentClass::integrateTransforms( timeStep );
}
#endif // #if USE_PARALLEL_INTEGRATE_TRANSFORMS
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
MyDiscreteDynamicsWorld( btDispatcher* dispatcher,
btBroadphaseInterface* pairCache,
btConstraintSolver* constraintSolver,
btConstraintSolverPoolMt* constraintSolver,
btCollisionConfiguration* collisionConfiguration
) :
btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, collisionConfiguration )
{
#if USE_PARALLEL_ISLAND_SOLVER
btSimulationIslandManagerMt* islandMgr = static_cast<btSimulationIslandManagerMt*>( m_islandManager );
islandMgr->setIslandDispatchFunction( parallelIslandDispatch );
#endif //#if USE_PARALLEL_ISLAND_SOLVER
islandMgr->setIslandDispatchFunction( myParallelIslandDispatch );
}
};
@@ -625,6 +238,47 @@ btConstraintSolver* createSolverByType( SolverType t )
}
///
/// btTaskSchedulerManager -- manage a number of task schedulers so we can switch between them
///
class btTaskSchedulerManager
{
btAlignedObjectArray<btITaskScheduler*> m_taskSchedulers;
public:
btTaskSchedulerManager() {}
void init()
{
addTaskScheduler( btGetSequentialTaskScheduler() );
addTaskScheduler( btGetOpenMPTaskScheduler() );
addTaskScheduler( btGetTBBTaskScheduler() );
addTaskScheduler( btGetPPLTaskScheduler() );
if ( getNumTaskSchedulers() > 1 )
{
// prefer a non-sequential scheduler if available
btSetTaskScheduler( m_taskSchedulers[ 1 ] );
}
else
{
btSetTaskScheduler( m_taskSchedulers[ 0 ] );
}
btGetTaskScheduler()->setNumThreads( btGetTaskScheduler()->getMaxNumThreads() );
}
void addTaskScheduler( btITaskScheduler* ts )
{
if ( ts )
{
m_taskSchedulers.push_back( ts );
}
}
int getNumTaskSchedulers() const { return m_taskSchedulers.size(); }
btITaskScheduler* getTaskScheduler( int i ) { return m_taskSchedulers[ i ]; }
};
static btTaskSchedulerManager gTaskSchedulerMgr;
static bool gMultithreadedWorld = false;
static bool gDisplayProfileInfo = false;
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
@@ -652,15 +306,17 @@ CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper
{
m_multithreadedWorld = false;
m_multithreadCapable = false;
gTaskMgr.init();
if ( gTaskSchedulerMgr.getNumTaskSchedulers() == 0 )
{
gTaskSchedulerMgr.init();
}
}
CommonRigidBodyMTBase::~CommonRigidBodyMTBase()
{
gTaskMgr.shutdown();
}
void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer)
static void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer)
{
if (bool* val = static_cast<bool*>(userPointer))
{
@@ -668,7 +324,7 @@ void boolPtrButtonCallback(int buttonId, bool buttonState, void* userPointer)
}
}
void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer)
static void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer)
{
if (buttonState)
{
@@ -687,7 +343,7 @@ void toggleSolverModeCallback(int buttonId, bool buttonState, void* userPointer)
}
}
void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer)
static void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer)
{
if (buttonId >= 0 && buttonId < SOLVER_TYPE_COUNT)
{
@@ -695,32 +351,30 @@ void setSolverTypeCallback(int buttonId, bool buttonState, void* userPointer)
}
}
void apiSelectButtonCallback(int buttonId, bool buttonState, void* userPointer)
static void setNumThreads( int numThreads )
{
gTaskMgr.setApi(static_cast<TaskManager::Api>(buttonId));
if (gTaskMgr.getApi()==TaskManager::apiNone)
int newNumThreads = ( std::min )( numThreads, int( BT_MAX_THREAD_COUNT ) );
int oldNumThreads = btGetTaskScheduler()->getNumThreads();
// only call when the thread count is different
if ( newNumThreads != oldNumThreads )
{
gSliderNumThreads = 1.0f;
}
else
{
gSliderNumThreads = float(gTaskMgr.getNumThreads());
btGetTaskScheduler()->setNumThreads( newNumThreads );
}
}
void setThreadCountCallback(float val, void* userPtr)
static void apiSelectButtonCallback(int buttonId, bool buttonState, void* userPointer)
{
if (gTaskMgr.getApi()==TaskManager::apiNone)
{
gSliderNumThreads = 1.0f;
}
else
{
gTaskMgr.setNumThreads( int( gSliderNumThreads ) );
}
// change the task scheduler
btSetTaskScheduler( gTaskSchedulerMgr.getTaskScheduler( buttonId ) );
setNumThreads( int( gSliderNumThreads ) );
}
void setSolverIterationCountCallback(float val, void* userPtr)
static void setThreadCountCallback(float val, void* userPtr)
{
setNumThreads( int( gSliderNumThreads ) );
}
static void setSolverIterationCountCallback(float val, void* userPtr)
{
if (btDiscreteDynamicsWorld* world = reinterpret_cast<btDiscreteDynamicsWorld*>(userPtr))
{
@@ -733,6 +387,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
gNumIslands = 0;
m_solverType = gSolverType;
#if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB)
btAssert( btGetTaskScheduler() != NULL );
m_multithreadCapable = true;
#endif
if ( gMultithreadedWorld )
@@ -743,30 +398,24 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
cci.m_defaultMaxCollisionAlgorithmPoolSize = 80000;
m_collisionConfiguration = new btDefaultCollisionConfiguration( cci );
#if USE_PARALLEL_NARROWPHASE
m_dispatcher = new MyCollisionDispatcher( m_collisionConfiguration );
#else
m_dispatcher = new btCollisionDispatcher( m_collisionConfiguration );
#endif //USE_PARALLEL_NARROWPHASE
m_dispatcher = new MyCollisionDispatcher( m_collisionConfiguration, 40 );
m_broadphase = new btDbvtBroadphase();
#if BT_THREADSAFE && USE_PARALLEL_ISLAND_SOLVER
btConstraintSolverPoolMt* solverPool;
{
btConstraintSolver* solvers[ BT_MAX_THREAD_COUNT ];
int maxThreadCount = btMin( int(BT_MAX_THREAD_COUNT), TaskManager::getMaxNumThreads() );
int maxThreadCount = BT_MAX_THREAD_COUNT;
for ( int i = 0; i < maxThreadCount; ++i )
{
solvers[ i ] = createSolverByType( m_solverType );
}
m_solver = new MyConstraintSolverPool( solvers, maxThreadCount );
solverPool = new btConstraintSolverPoolMt( solvers, maxThreadCount );
m_solver = solverPool;
}
#else
m_solver = createSolverByType( m_solverType );
#endif //#if USE_PARALLEL_ISLAND_SOLVER
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration );
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, m_collisionConfiguration );
m_dynamicsWorld = world;
m_multithreadedWorld = true;
btAssert( btGetTaskScheduler() != NULL );
}
else
{
@@ -886,24 +535,25 @@ void CommonRigidBodyMTBase::createDefaultParameters()
if (m_multithreadedWorld)
{
// create a button for each supported threading API
for (int iApi = 0; iApi < TaskManager::apiCount; ++iApi)
for ( int iApi = 0; iApi < gTaskSchedulerMgr.getNumTaskSchedulers(); ++iApi )
{
TaskManager::Api api = static_cast<TaskManager::Api>(iApi);
if (gTaskMgr.isSupported(api))
{
char str[1024];
sprintf(str, "API %s", gTaskMgr.getApiName(api));
ButtonParams button( str, iApi, false );
button.m_callback = apiSelectButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
char str[ 1024 ];
sprintf( str, "API %s", gTaskSchedulerMgr.getTaskScheduler(iApi)->getName() );
ButtonParams button( str, iApi, false );
button.m_callback = apiSelectButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
// create a slider to set the number of threads to use
gSliderNumThreads = float(gTaskMgr.getNumThreads());
int numThreads = btGetTaskScheduler()->getNumThreads();
// if slider has not been set yet (by another demo),
if ( gSliderNumThreads <= 1.0f )
{
gSliderNumThreads = float( numThreads );
}
SliderParams slider("Thread count", &gSliderNumThreads);
slider.m_minVal = 1.0f;
slider.m_maxVal = float(gTaskMgr.getMaxNumThreads()*2);
slider.m_maxVal = float( BT_MAX_THREAD_COUNT );
slider.m_callback = setThreadCountCallback;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
@@ -946,14 +596,14 @@ void CommonRigidBodyMTBase::drawScreenText()
const btPersistentManifold* man = m_dispatcher->getManifoldByIndexInternal( i );
numContacts += man->getNumContacts();
}
const char* mtApi = TaskManager::getApiName( gTaskMgr.getApi() );
const char* mtApi = btGetTaskScheduler()->getName();
sprintf( msg, "islands=%d bodies=%d manifolds=%d contacts=%d [%s] threads=%d",
gNumIslands,
m_dynamicsWorld->getNumCollisionObjects(),
numManifolds,
numContacts,
mtApi,
gTaskMgr.getApi() == TaskManager::apiNone ? 1 : gTaskMgr.getNumThreads()
btGetTaskScheduler()->getNumThreads()
);
m_guiHelper->getAppInterface()->drawText( msg, 100, yCoord, 0.4f );
yCoord += yStep;

View File

@@ -1,336 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h> //printf debugging
#include <algorithm>
// choose threading providers:
#if BT_USE_TBB
#define USE_TBB 1 // use Intel Threading Building Blocks for thread management
#endif
#if BT_USE_PPL
#define USE_PPL 1 // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
#endif // BT_USE_PPL
#if BT_USE_OPENMP
#define USE_OPENMP 1 // use OpenMP (also need to change compiler options for OpenMP support)
#endif
#if USE_OPENMP
#include <omp.h>
#endif // #if USE_OPENMP
#if USE_PPL
#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
// Visual Studio 2010 and later should come with it
#include <concrtrm.h> // for GetProcessorCount()
#endif // #if USE_PPL
#if USE_TBB
#define __TBB_NO_IMPLICIT_LINKAGE 1
#include <tbb/tbb.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#endif // #if USE_TBB
class TaskManager
{
public:
enum Api
{
apiNone,
apiOpenMP,
apiTbb,
apiPpl,
apiCount
};
static const char* getApiName( Api api )
{
switch ( api )
{
case apiNone: return "None";
case apiOpenMP: return "OpenMP";
case apiTbb: return "Intel TBB";
case apiPpl: return "MS PPL";
default: return "unknown";
}
}
TaskManager()
{
m_api = apiNone;
m_numThreads = 0;
#if USE_TBB
m_tbbSchedulerInit = NULL;
#endif // #if USE_TBB
}
Api getApi() const
{
return m_api;
}
bool isSupported( Api api ) const
{
#if USE_OPENMP
if ( api == apiOpenMP )
{
return true;
}
#endif
#if USE_TBB
if ( api == apiTbb )
{
return true;
}
#endif
#if USE_PPL
if ( api == apiPpl )
{
return true;
}
#endif
// apiNone is always "supported"
return api == apiNone;
}
void setApi( Api api )
{
if (isSupported(api))
{
m_api = api;
}
else
{
// no compile time support for selected API, fallback to "none"
m_api = apiNone;
}
}
static int getMaxNumThreads()
{
#if USE_OPENMP
return omp_get_max_threads();
#elif USE_PPL
return concurrency::GetProcessorCount();
#elif USE_TBB
return tbb::task_scheduler_init::default_num_threads();
#endif
return 1;
}
int getNumThreads() const
{
return m_numThreads;
}
int setNumThreads( int numThreads )
{
m_numThreads = ( std::max )( 1, numThreads );
#if USE_OPENMP
omp_set_num_threads( m_numThreads );
#endif
#if USE_PPL
{
using namespace concurrency;
if ( CurrentScheduler::Id() != -1 )
{
CurrentScheduler::Detach();
}
SchedulerPolicy policy;
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
CurrentScheduler::Create( policy );
}
#endif
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
#endif
return m_numThreads;
}
void init()
{
if (m_numThreads == 0)
{
#if USE_PPL
setApi( apiPpl );
#endif
#if USE_TBB
setApi( apiTbb );
#endif
#if USE_OPENMP
setApi( apiOpenMP );
#endif
setNumThreads(getMaxNumThreads());
}
else
{
setNumThreads(m_numThreads);
}
}
void shutdown()
{
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
#endif
}
private:
Api m_api;
int m_numThreads;
#if USE_TBB
tbb::task_scheduler_init* m_tbbSchedulerInit;
#endif // #if USE_TBB
};
extern TaskManager gTaskMgr;
inline static void initTaskScheduler()
{
gTaskMgr.init();
}
inline static void cleanupTaskScheduler()
{
gTaskMgr.shutdown();
}
#if USE_TBB
///
/// TbbBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a TBB compatible object that takes a tbb::blocked_range<int> type.
///
template <class TBody>
struct TbbBodyAdapter
{
const TBody* mBody;
void operator()( const tbb::blocked_range<int>& range ) const
{
mBody->forLoop( range.begin(), range.end() );
}
};
#endif // #if USE_TBB
#if USE_PPL
///
/// PplBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a PPL compatible object that implements "void operator()( int ) const"
///
template <class TBody>
struct PplBodyAdapter
{
const TBody* mBody;
int mGrainSize;
int mIndexEnd;
void operator()( int i ) const
{
mBody->forLoop( i, (std::min)(i + mGrainSize, mIndexEnd) );
}
};
#endif // #if USE_PPL
///
/// parallelFor -- interface for submitting work expressed as a for loop to the worker threads
///
template <class TBody>
void parallelFor( int iBegin, int iEnd, int grainSize, const TBody& body )
{
#if USE_OPENMP
if ( gTaskMgr.getApi() == TaskManager::apiOpenMP )
{
#pragma omp parallel for schedule(static, 1)
for ( int i = iBegin; i < iEnd; i += grainSize )
{
body.forLoop( i, (std::min)( i + grainSize, iEnd ) );
}
return;
}
#endif // #if USE_OPENMP
#if USE_PPL
if ( gTaskMgr.getApi() == TaskManager::apiPpl )
{
// PPL dispatch
PplBodyAdapter<TBody> pplBody;
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
return;
}
#endif //#if USE_PPL
#if USE_TBB
if ( gTaskMgr.getApi() == TaskManager::apiTbb )
{
// TBB dispatch
TbbBodyAdapter<TBody> tbbBody;
tbbBody.mBody = &body;
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody,
tbb::simple_partitioner()
);
return;
}
#endif // #if USE_TBB
{
// run on main thread
body.forLoop( iBegin, iEnd );
}
}