Bullet 2 threading refactor: moved parallel-for calls into core libs

This commit is contained in:
Lunkhound
2017-05-22 00:47:11 -07:00
parent 2f3844e5db
commit dfe184e8d3
14 changed files with 1012 additions and 847 deletions

View File

@@ -15,6 +15,7 @@ SET(BulletCollision_SRCS
CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
CollisionDispatch/btBoxBoxDetector.cpp
CollisionDispatch/btCollisionDispatcher.cpp
CollisionDispatch/btCollisionDispatcherMt.cpp
CollisionDispatch/btCollisionObject.cpp
CollisionDispatch/btCollisionWorld.cpp
CollisionDispatch/btCollisionWorldImporter.cpp
@@ -123,6 +124,7 @@ SET(CollisionDispatch_HDRS
CollisionDispatch/btCollisionConfiguration.h
CollisionDispatch/btCollisionCreateFunc.h
CollisionDispatch/btCollisionDispatcher.h
CollisionDispatch/btCollisionDispatcherMt.h
CollisionDispatch/btCollisionObject.h
CollisionDispatch/btCollisionObjectWrapper.h
CollisionDispatch/btCollisionWorld.h

View File

@@ -0,0 +1,164 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "btCollisionDispatcherMt.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "LinearMath/btPoolAllocator.h"
#include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h"
#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
btCollisionDispatcherMt::btCollisionDispatcherMt( btCollisionConfiguration* config, int grainSize )
: btCollisionDispatcher( config )
{
m_batchUpdating = false;
m_grainSize = grainSize; // iterations per task
}
btPersistentManifold* btCollisionDispatcherMt::getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 )
{
//optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance)
btScalar contactBreakingThreshold = ( m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD ) ?
btMin( body0->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ), body1->getCollisionShape()->getContactBreakingThreshold( gContactBreakingThreshold ) )
: gContactBreakingThreshold;
btScalar contactProcessingThreshold = btMin( body0->getContactProcessingThreshold(), body1->getContactProcessingThreshold() );
void* mem = m_persistentManifoldPoolAllocator->allocate( sizeof( btPersistentManifold ) );
if ( NULL == mem )
{
//we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert.
if ( ( m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION ) == 0 )
{
mem = btAlignedAlloc( sizeof( btPersistentManifold ), 16 );
}
else
{
btAssert( 0 );
//make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration
return 0;
}
}
btPersistentManifold* manifold = new( mem ) btPersistentManifold( body0, body1, 0, contactBreakingThreshold, contactProcessingThreshold );
if ( !m_batchUpdating )
{
// batch updater will update manifold pointers array after finishing, so
// only need to update array when not batch-updating
btAssert( !btThreadsAreRunning() );
manifold->m_index1a = m_manifoldsPtr.size();
m_manifoldsPtr.push_back( manifold );
}
return manifold;
}
void btCollisionDispatcherMt::releaseManifold( btPersistentManifold* manifold )
{
clearManifold( manifold );
btAssert( !btThreadsAreRunning() );
if ( !m_batchUpdating )
{
// batch updater will update manifold pointers array after finishing, so
// only need to update array when not batch-updating
int findIndex = manifold->m_index1a;
btAssert( findIndex < m_manifoldsPtr.size() );
m_manifoldsPtr.swap( findIndex, m_manifoldsPtr.size() - 1 );
m_manifoldsPtr[ findIndex ]->m_index1a = findIndex;
m_manifoldsPtr.pop_back();
}
manifold->~btPersistentManifold();
if ( m_persistentManifoldPoolAllocator->validPtr( manifold ) )
{
m_persistentManifoldPoolAllocator->freeMemory( manifold );
}
else
{
btAlignedFree( manifold );
}
}
struct CollisionDispatcherUpdater : public btIParallelForBody
{
btBroadphasePair* mPairArray;
btNearCallback mCallback;
btCollisionDispatcher* mDispatcher;
const btDispatcherInfo* mInfo;
CollisionDispatcherUpdater()
{
mPairArray = NULL;
mCallback = NULL;
mDispatcher = NULL;
mInfo = NULL;
}
void forLoop( int iBegin, int iEnd ) const
{
for ( int i = iBegin; i < iEnd; ++i )
{
btBroadphasePair* pair = &mPairArray[ i ];
mCallback( *pair, *mDispatcher, *mInfo );
}
}
};
void btCollisionDispatcherMt::dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher )
{
int pairCount = pairCache->getNumOverlappingPairs();
if ( pairCount == 0 )
{
return;
}
CollisionDispatcherUpdater updater;
updater.mCallback = getNearCallback();
updater.mPairArray = pairCache->getOverlappingPairArrayPtr();
updater.mDispatcher = this;
updater.mInfo = &info;
m_batchUpdating = true;
btParallelFor( 0, pairCount, m_grainSize, updater );
m_batchUpdating = false;
// reconstruct the manifolds array to ensure determinism
m_manifoldsPtr.resizeNoInitialize( 0 );
btBroadphasePair* pairs = pairCache->getOverlappingPairArrayPtr();
for ( int i = 0; i < pairCount; ++i )
{
if (btCollisionAlgorithm* algo = pairs[ i ].m_algorithm)
{
algo->getAllContactManifolds( m_manifoldsPtr );
}
}
// update the indices (used when releasing manifolds)
for ( int i = 0; i < m_manifoldsPtr.size(); ++i )
{
m_manifoldsPtr[ i ]->m_index1a = i;
}
}

View File

@@ -0,0 +1,39 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_COLLISION_DISPATCHER_MT_H
#define BT_COLLISION_DISPATCHER_MT_H
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
#include "LinearMath/btThreads.h"
class btCollisionDispatcherMt : public btCollisionDispatcher
{
public:
btCollisionDispatcherMt( btCollisionConfiguration* config, int grainSize = 40 );
virtual btPersistentManifold* getNewManifold( const btCollisionObject* body0, const btCollisionObject* body1 ) BT_OVERRIDE;
virtual void releaseManifold( btPersistentManifold* manifold ) BT_OVERRIDE;
virtual void dispatchAllCollisionPairs( btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher ) BT_OVERRIDE;
protected:
bool m_batchUpdating;
int m_grainSize;
};
#endif //BT_COLLISION_DISPATCHER_MT_H

View File

@@ -108,8 +108,105 @@ struct InplaceSolverIslandCallbackMt : public btSimulationIslandManagerMt::Islan
};
///
/// btConstraintSolverPoolMt
///
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration)
btConstraintSolverPoolMt::ThreadSolver* btConstraintSolverPoolMt::getAndLockThreadSolver()
{
int i = btGetCurrentThreadIndex() % m_solvers.size();
while ( true )
{
ThreadSolver& solver = m_solvers[ i ];
if ( solver.mutex.tryLock() )
{
return &solver;
}
// failed, try the next one
i = ( i + 1 ) % m_solvers.size();
}
return NULL;
}
void btConstraintSolverPoolMt::init( btConstraintSolver** solvers, int numSolvers )
{
m_solverType = BT_SEQUENTIAL_IMPULSE_SOLVER;
m_solvers.resize( numSolvers );
for ( int i = 0; i < numSolvers; ++i )
{
m_solvers[ i ].solver = solvers[ i ];
}
if ( numSolvers > 0 )
{
m_solverType = solvers[ 0 ]->getSolverType();
}
}
// create the solvers for me
btConstraintSolverPoolMt::btConstraintSolverPoolMt( int numSolvers )
{
btAlignedObjectArray<btConstraintSolver*> solvers;
solvers.reserve( numSolvers );
for ( int i = 0; i < numSolvers; ++i )
{
btConstraintSolver* solver = new btSequentialImpulseConstraintSolver();
solvers.push_back( solver );
}
init( &solvers[ 0 ], numSolvers );
}
// pass in fully constructed solvers (destructor will delete them)
btConstraintSolverPoolMt::btConstraintSolverPoolMt( btConstraintSolver** solvers, int numSolvers )
{
init( solvers, numSolvers );
}
btConstraintSolverPoolMt::~btConstraintSolverPoolMt()
{
// delete all solvers
for ( int i = 0; i < m_solvers.size(); ++i )
{
ThreadSolver& solver = m_solvers[ i ];
delete solver.solver;
solver.solver = NULL;
}
}
///solve a group of constraints
btScalar btConstraintSolverPoolMt::solveGroup( btCollisionObject** bodies,
int numBodies,
btPersistentManifold** manifolds,
int numManifolds,
btTypedConstraint** constraints,
int numConstraints,
const btContactSolverInfo& info,
btIDebugDraw* debugDrawer,
btDispatcher* dispatcher
)
{
ThreadSolver* ts = getAndLockThreadSolver();
ts->solver->solveGroup( bodies, numBodies, manifolds, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher );
ts->mutex.unlock();
return 0.0f;
}
void btConstraintSolverPoolMt::reset()
{
for ( int i = 0; i < m_solvers.size(); ++i )
{
ThreadSolver& solver = m_solvers[ i ];
solver.mutex.lock();
solver.solver->reset();
solver.mutex.unlock();
}
}
///
/// btDiscreteDynamicsWorldMt
///
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btConstraintSolverPoolMt* constraintSolver, btCollisionConfiguration* collisionConfiguration)
: btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration)
{
if (m_ownsIslandManager)
@@ -124,8 +221,8 @@ btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,bt
{
void* mem = btAlignedAlloc(sizeof(btSimulationIslandManagerMt),16);
btSimulationIslandManagerMt* im = new (mem) btSimulationIslandManagerMt();
m_islandManager = im;
im->setMinimumSolverBatchSize( m_solverInfo.m_minimumSolverBatchSize );
m_islandManager = im;
}
}
@@ -145,7 +242,7 @@ btDiscreteDynamicsWorldMt::~btDiscreteDynamicsWorldMt()
}
void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo)
void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo)
{
BT_PROFILE("solveConstraints");
@@ -160,3 +257,65 @@ void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo
}
struct UpdaterUnconstrainedMotion : public btIParallelForBody
{
btScalar timeStep;
btRigidBody** rigidBodies;
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
for ( int i = iBegin; i < iEnd; ++i )
{
btRigidBody* body = rigidBodies[ i ];
if ( !body->isStaticOrKinematicObject() )
{
//don't integrate/update velocities here, it happens in the constraint solver
body->applyDamping( timeStep );
body->predictIntegratedTransform( timeStep, body->getInterpolationWorldTransform() );
}
}
}
};
void btDiscreteDynamicsWorldMt::predictUnconstraintMotion( btScalar timeStep )
{
BT_PROFILE( "predictUnconstraintMotion" );
int grainSize = 50; // num of iterations per task for TBB
int bodyCount = m_nonStaticRigidBodies.size();
UpdaterUnconstrainedMotion update;
update.timeStep = timeStep;
update.rigidBodies = bodyCount ? &m_nonStaticRigidBodies[ 0 ] : NULL;
btParallelFor( 0, bodyCount, grainSize, update );
}
void btDiscreteDynamicsWorldMt::createPredictiveContacts( btScalar timeStep )
{
releasePredictiveContacts();
int grainSize = 50; // num of iterations per task for TBB or OPENMP
if ( int bodyCount = m_nonStaticRigidBodies.size() )
{
UpdaterCreatePredictiveContacts update;
update.world = this;
update.timeStep = timeStep;
update.rigidBodies = &m_nonStaticRigidBodies[ 0 ];
btParallelFor( 0, bodyCount, grainSize, update );
}
}
void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
{
BT_PROFILE( "integrateTransforms" );
int grainSize = 50; // num of iterations per task for TBB or OPENMP
if ( int bodyCount = m_nonStaticRigidBodies.size() )
{
UpdaterIntegrateTransforms update;
update.world = this;
update.timeStep = timeStep;
update.rigidBodies = &m_nonStaticRigidBodies[ 0 ];
btParallelFor( 0, bodyCount, grainSize, update );
}
}

View File

@@ -18,24 +18,116 @@ subject to the following restrictions:
#define BT_DISCRETE_DYNAMICS_WORLD_MT_H
#include "btDiscreteDynamicsWorld.h"
#include "btSimulationIslandManagerMt.h"
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
struct InplaceSolverIslandCallbackMt;
///
/// btConstraintSolverPoolMt - masquerades as a constraint solver, but really it is a threadsafe pool of them.
///
/// Each solver in the pool is protected by a mutex. When solveGroup is called from a thread,
/// the pool looks for a solver that isn't being used by another thread, locks it, and dispatches the
/// call to the solver.
/// So long as there are at least as many solvers as there are hardware threads, it should never need to
/// spin wait.
///
class btConstraintSolverPoolMt : public btConstraintSolver
{
public:
// create the solvers for me
explicit btConstraintSolverPoolMt( int numSolvers );
// pass in fully constructed solvers (destructor will delete them)
btConstraintSolverPoolMt( btConstraintSolver** solvers, int numSolvers );
virtual ~btConstraintSolverPoolMt();
///solve a group of constraints
virtual btScalar solveGroup( btCollisionObject** bodies,
int numBodies,
btPersistentManifold** manifolds,
int numManifolds,
btTypedConstraint** constraints,
int numConstraints,
const btContactSolverInfo& info,
btIDebugDraw* debugDrawer,
btDispatcher* dispatcher
) BT_OVERRIDE;
virtual void reset() BT_OVERRIDE;
virtual btConstraintSolverType getSolverType() const BT_OVERRIDE { return m_solverType; }
private:
const static size_t kCacheLineSize = 128;
struct ThreadSolver
{
btConstraintSolver* solver;
btSpinMutex mutex;
char _cachelinePadding[ kCacheLineSize - sizeof( btSpinMutex ) - sizeof( void* ) ]; // keep mutexes from sharing a cache line
};
btAlignedObjectArray<ThreadSolver> m_solvers;
btConstraintSolverType m_solverType;
ThreadSolver* getAndLockThreadSolver();
void init( btConstraintSolver** solvers, int numSolvers );
};
///
/// btDiscreteDynamicsWorldMt -- a version of DiscreteDynamicsWorld with some minor changes to support
/// solving simulation islands on multiple threads.
///
/// Should function exactly like btDiscreteDynamicsWorld.
/// Also 3 methods that iterate over all of the rigidbodies can run in parallel:
/// - predictUnconstraintMotion
/// - integrateTransforms
/// - createPredictiveContacts
///
ATTRIBUTE_ALIGNED16(class) btDiscreteDynamicsWorldMt : public btDiscreteDynamicsWorld
{
protected:
InplaceSolverIslandCallbackMt* m_solverIslandCallbackMt;
virtual void solveConstraints(btContactSolverInfo& solverInfo);
virtual void solveConstraints(btContactSolverInfo& solverInfo) BT_OVERRIDE;
virtual void predictUnconstraintMotion( btScalar timeStep ) BT_OVERRIDE;
struct UpdaterCreatePredictiveContacts : public btIParallelForBody
{
btScalar timeStep;
btRigidBody** rigidBodies;
btDiscreteDynamicsWorldMt* world;
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
world->createPredictiveContactsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep );
}
};
virtual void createPredictiveContacts( btScalar timeStep ) BT_OVERRIDE;
struct UpdaterIntegrateTransforms : public btIParallelForBody
{
btScalar timeStep;
btRigidBody** rigidBodies;
btDiscreteDynamicsWorldMt* world;
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
world->integrateTransformsInternal( &rigidBodies[ iBegin ], iEnd - iBegin, timeStep );
}
};
virtual void integrateTransforms( btScalar timeStep ) BT_OVERRIDE;
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,
btBroadphaseInterface* pairCache,
btConstraintSolverPoolMt* constraintSolver, // Note this should be a solver-pool for multi-threading
btCollisionConfiguration* collisionConfiguration
);
virtual ~btDiscreteDynamicsWorldMt();
};

View File

@@ -15,6 +15,7 @@ subject to the following restrictions:
#include "LinearMath/btScalar.h"
#include "LinearMath/btThreads.h"
#include "btSimulationIslandManagerMt.h"
#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
@@ -44,7 +45,7 @@ btSimulationIslandManagerMt::btSimulationIslandManagerMt()
{
m_minimumSolverBatchSize = calcBatchCost(0, 128, 0);
m_batchIslandMinBodyCount = 32;
m_islandDispatch = defaultIslandDispatch;
m_islandDispatch = parallelIslandDispatch;
m_batchIsland = NULL;
}
@@ -545,7 +546,7 @@ void btSimulationIslandManagerMt::mergeIslands()
}
void btSimulationIslandManagerMt::defaultIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
{
// serial dispatch
btAlignedObjectArray<Island*>& islands = *islandsPtr;
@@ -565,6 +566,40 @@ void btSimulationIslandManagerMt::defaultIslandDispatch( btAlignedObjectArray<Is
}
}
struct UpdateIslandDispatcher : public btIParallelForBody
{
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr;
btSimulationIslandManagerMt::IslandCallback* callback;
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
for ( int i = iBegin; i < iEnd; ++i )
{
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ];
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
callback->processIsland( &island->bodyArray[ 0 ],
island->bodyArray.size(),
manifolds,
island->manifoldArray.size(),
constraintsPtr,
island->constraintArray.size(),
island->id
);
}
}
};
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
{
int grainSize = 1; // iterations per task
UpdateIslandDispatcher dispatcher;
dispatcher.islandsPtr = islandsPtr;
dispatcher.callback = callback;
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
}
///@todo: this is random access, it can be walked 'cache friendly'!
void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatcher,
btCollisionWorld* collisionWorld,

View File

@@ -59,7 +59,8 @@ public:
) = 0;
};
typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray<Island*>* islands, IslandCallback* callback );
static void defaultIslandDispatch( btAlignedObjectArray<Island*>* islands, IslandCallback* callback );
static void serialIslandDispatch( btAlignedObjectArray<Island*>* islands, IslandCallback* callback );
static void parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback );
protected:
btAlignedObjectArray<Island*> m_allocatedIslands; // owner of all Islands
btAlignedObjectArray<Island*> m_activeIslands; // islands actively in use

View File

@@ -14,6 +14,247 @@ subject to the following restrictions:
#include "btThreads.h"
#include <algorithm> // for min and max
#if BT_THREADSAFE
#if BT_USE_OPENMP
#include <omp.h>
#endif // #if BT_USE_OPENMP
#if BT_USE_PPL
// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
// Visual Studio 2010 and later should come with it
#include <concrtrm.h> // for GetProcessorCount()
#endif // #if BT_USE_PPL
#if BT_USE_TBB
// use Intel Threading Building Blocks for thread management
#define __TBB_NO_IMPLICIT_LINKAGE 1
#include <tbb/tbb.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#endif // #if BT_USE_TBB
static btITaskScheduler* gBtTaskScheduler;
static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
static btSpinMutex gThreadsRunningCounterMutex;
void btPushThreadsAreRunning()
{
gThreadsRunningCounterMutex.lock();
gThreadsRunningCounter++;
gThreadsRunningCounterMutex.unlock();
}
void btPopThreadsAreRunning()
{
gThreadsRunningCounterMutex.lock();
gThreadsRunningCounter--;
gThreadsRunningCounterMutex.unlock();
}
bool btThreadsAreRunning()
{
return gThreadsRunningCounter != 0;
}
void btSetTaskScheduler( btITaskScheduler* ts )
{
gBtTaskScheduler = ts;
}
btITaskScheduler* btGetTaskScheduler()
{
return gBtTaskScheduler;
}
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
{
gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
}
#if BT_USE_OPENMP
///
/// btTaskSchedulerOpenMP -- OpenMP task scheduler implementation
///
class btTaskSchedulerOpenMP : public btITaskScheduler
{
int m_numThreads;
public:
btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
{
m_numThreads = 0;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return omp_get_max_threads();
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = ( std::max )( 1, numThreads );
omp_set_num_threads( m_numThreads );
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
btPushThreadsAreRunning();
#pragma omp parallel for schedule( static, 1 )
for ( int i = iBegin; i < iEnd; i += grainSize )
{
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
}
btPopThreadsAreRunning();
}
};
#endif // #if BT_USE_OPENMP
#if BT_USE_TBB
///
/// btTaskSchedulerTBB -- task scheduler implemented via Intel Threaded Building Blocks
///
class btTaskSchedulerTBB : public btITaskScheduler
{
int m_numThreads;
tbb::task_scheduler_init* m_tbbSchedulerInit;
public:
btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
{
m_numThreads = 0;
m_tbbSchedulerInit = NULL;
}
~btTaskSchedulerTBB()
{
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return tbb::task_scheduler_init::default_num_threads();
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = ( std::max )( 1, numThreads );
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
}
struct BodyAdapter
{
const btIParallelForBody* mBody;
void operator()( const tbb::blocked_range<int>& range ) const
{
mBody->forLoop( range.begin(), range.end() );
}
};
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
// TBB dispatch
BodyAdapter tbbBody;
tbbBody.mBody = &body;
btPushThreadsAreRunning();
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody,
tbb::simple_partitioner()
);
btPopThreadsAreRunning();
}
};
#endif // #if BT_USE_TBB
#if BT_USE_PPL
///
/// btTaskSchedulerPPL -- task scheduler implemented via Microsoft Parallel Patterns Lib
///
class btTaskSchedulerPPL : public btITaskScheduler
{
int m_numThreads;
public:
btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
{
m_numThreads = 0;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return concurrency::GetProcessorCount();
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = ( std::max )( 1, numThreads );
using namespace concurrency;
if ( CurrentScheduler::Id() != -1 )
{
CurrentScheduler::Detach();
}
SchedulerPolicy policy;
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
CurrentScheduler::Create( policy );
}
struct BodyAdapter
{
const btIParallelForBody* mBody;
int mGrainSize;
int mIndexEnd;
void operator()( int i ) const
{
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
}
};
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
// PPL dispatch
BodyAdapter pplBody;
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
btPushThreadsAreRunning();
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
btPopThreadsAreRunning();
}
};
#endif // #if BT_USE_PPL
//
// Lightweight spin-mutex based on atomics
@@ -22,8 +263,6 @@ subject to the following restrictions:
// context switching.
//
#if BT_THREADSAFE
#if __cplusplus >= 201103L
// for anything claiming full C++11 compliance, use C++11 atomics
@@ -229,3 +468,64 @@ bool btSpinMutex::tryLock()
#endif // #if BT_THREADSAFE
///
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
/// (fallback in case no multi-threaded schedulers are available)
///
class btTaskSchedulerSequential : public btITaskScheduler
{
public:
btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {}
virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
virtual int getNumThreads() const BT_OVERRIDE { return 1; }
virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
body.forLoop( iBegin, iEnd );
}
};
// create a non-threaded task scheduler (always available)
btITaskScheduler* btGetSequentialTaskScheduler()
{
static btTaskSchedulerSequential sTaskScheduler;
return &sTaskScheduler;
}
// create an OpenMP task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetOpenMPTaskScheduler()
{
#if BT_USE_OPENMP && BT_THREADSAFE
static btTaskSchedulerOpenMP sTaskScheduler;
return &sTaskScheduler;
#else
return NULL;
#endif
}
// create an Intel TBB task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetTBBTaskScheduler()
{
#if BT_USE_TBB && BT_THREADSAFE
static btTaskSchedulerTBB sTaskScheduler;
return &sTaskScheduler;
#else
return NULL;
#endif
}
// create a PPL task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetPPLTaskScheduler()
{
#if BT_USE_PPL && BT_THREADSAFE
static btTaskSchedulerPPL sTaskScheduler;
return &sTaskScheduler;
#else
return NULL;
#endif
}

View File

@@ -19,6 +19,15 @@ subject to the following restrictions:
#include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
#if defined (_MSC_VER) && _MSC_VER >= 1600
// give us a compile error if any signatures of overriden methods is changed
#define BT_OVERRIDE override
#endif
#ifndef BT_OVERRIDE
#define BT_OVERRIDE
#endif
///
/// btSpinMutex -- lightweight spin-mutex implemented with atomic ops, never puts
/// a thread to sleep because it is designed to be used with a task scheduler
@@ -59,6 +68,7 @@ SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* mutex )
// for internal use only
bool btIsMainThread();
bool btThreadsAreRunning();
unsigned int btGetCurrentThreadIndex();
const unsigned int BT_MAX_THREAD_COUNT = 64;
@@ -71,5 +81,55 @@ SIMD_FORCE_INLINE void btMutexUnlock( btSpinMutex* ) {}
SIMD_FORCE_INLINE bool btMutexTryLock( btSpinMutex* ) {return true;}
#endif
//
// btIParallelForBody -- subclass this to express work that can be done in parallel
//
class btIParallelForBody
{
public:
virtual void forLoop( int iBegin, int iEnd ) const = 0;
};
//
// btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to
// worker threads
//
class btITaskScheduler
{
const char* m_name;
public:
btITaskScheduler( const char* name ) : m_name( name ) {}
const char* getName() const { return m_name; }
virtual ~btITaskScheduler() {}
virtual int getMaxNumThreads() const = 0;
virtual int getNumThreads() const = 0;
virtual void setNumThreads( int numThreads ) = 0;
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0;
};
// set the task scheduler to use for all calls to btParallelFor()
// NOTE: you must set this prior to using any of the multi-threaded "Mt" classes
void btSetTaskScheduler( btITaskScheduler* ts );
// get the current task scheduler
btITaskScheduler* btGetTaskScheduler();
// get non-threaded task scheduler (always available)
btITaskScheduler* btGetSequentialTaskScheduler();
// get OpenMP task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetOpenMPTaskScheduler();
// get Intel TBB task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetTBBTaskScheduler();
// get PPL task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetPPLTaskScheduler();
// btParallelFor -- call this to dispatch work like a for-loop
// (iterations may be done out of order, so no dependencies are allowed)
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body );
#endif //BT_THREADS_H