parallel solver: various changes

- threading: adding btSequentialImpulseConstraintSolverMt
 - task scheduler: added parallelSum so that parallel solver can compute residuals
 - CommonRigidBodyMTBase: add slider for solver least squares residual and allow multithreading without needing OpenMP, TBB, or PPL
 - taskScheduler: don't wait for workers to sleep/signal at the end of each parallel block
 - parallel solver: convertContacts split into an allocContactConstraints and setupContactConstraints stage, the latter of which is done in parallel
 - parallel solver: rolling friction is now interleaved along with normal friction
 - parallel solver: batchified split impulse solving + some cleanup
 - parallel solver: sorting batches from largest to smallest
 - parallel solver: added parallel batch creation
 - parallel solver: added warmstartingWriteBackContacts func + other cleanup
 - task scheduler: truncate low bits to preserve determinism with parallelSum
 - parallel solver: reducing dynamic mem allocs and trying to parallelize more of the batch setup
 - parallel solver: parallelize updating constraint batch ids for merging
 - parallel solver: adding debug visualization
 - task scheduler: make TBB task scheduler parallelSum deterministic
 - parallel solver: split batch gen code into separate file; allow selection of batch gen method
 - task scheduler: add sleepWorkerThreadsHint() at end of simulation
 - parallel solver: added grain size per phase
 - task Scheduler: fix for strange threading issue; also no need for main thread to wait for workers to sleep
 - base constraint solver: break out joint setup into separate function for profiling/overriding
 - parallel solver: allow different batching method for contacts vs joints
 - base constraint solver: add convertJoint and convertBodies to make it possible to parallelize joint and body conversion
 - parallel solver: convert joints and bodies in parallel now
 - parallel solver: speed up batch creation with run-length encoding
 - parallel solver: batch gen: run-length expansion in parallel; collect constraint info in parallel
 - parallel solver: adding spatial grid batching method
 - parallel solver: enhancements to spatial grid batching
 - sequential solver: moving code for writing back into functions that derived classes can call
 - parallel solver: do write back of bodies and joints in parallel
 - parallel solver: removed all batching methods except for spatial grid (others were ineffective)
 - parallel solver: added 2D or 3D grid batching options; and a bit of cleanup
 - move btDefaultTaskScheduler into LinearMath project
This commit is contained in:
Lunkhound
2017-06-04 17:57:25 -07:00
parent 94bc897067
commit b8720f2161
25 changed files with 5236 additions and 767 deletions

View File

@@ -226,7 +226,6 @@ SET(BulletExampleBrowser_SRCS
../MultiThreading/b3PosixThreadSupport.cpp
../MultiThreading/b3Win32ThreadSupport.cpp
../MultiThreading/b3ThreadSupportInterface.cpp
../MultiThreading/btTaskScheduler.cpp
../RenderingExamples/TinyRendererSetup.cpp
../RenderingExamples/TimeSeriesCanvas.cpp
../RenderingExamples/TimeSeriesCanvas.h

View File

@@ -29,17 +29,17 @@ class btCollisionShape;
#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h"
#include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands()
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
#include "BulletDynamics/ConstraintSolver/btNNCGConstraintSolver.h"
#include "BulletDynamics/MLCPSolvers/btMLCPSolver.h"
#include "BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h"
#include "BulletDynamics/MLCPSolvers/btDantzigSolver.h"
#include "BulletDynamics/MLCPSolvers/btLemkeSolver.h"
#include "../MultiThreading/btTaskScheduler.h"
static int gNumIslands = 0;
bool gAllowNestedParallelForLoops = false;
class Profiler
{
@@ -52,6 +52,10 @@ public:
kRecordPredictUnconstrainedMotion,
kRecordCreatePredictiveContacts,
kRecordIntegrateTransforms,
kRecordSolverTotal,
kRecordSolverSetup,
kRecordSolverIterations,
kRecordSolverFinish,
kRecordCount
};
@@ -139,6 +143,41 @@ static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep )
}
class MySequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolverMt
{
typedef btSequentialImpulseConstraintSolverMt ParentClass;
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
MySequentialImpulseConstraintSolverMt() {}
// for profiling
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverSetup);
btScalar ret = ParentClass::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyIterations( btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer ) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverIterations);
btScalar ret = ParentClass::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverFinish);
btScalar ret = ParentClass::solveGroupCacheFriendlyFinish(bodies, numBodies, infoGlobal);
return ret;
}
virtual btScalar solveGroup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverTotal);
btScalar ret = ParentClass::solveGroup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher);
return ret;
}
};
///
/// MyCollisionDispatcher -- subclassed for profiling purposes
///
@@ -218,6 +257,8 @@ btConstraintSolver* createSolverByType( SolverType t )
{
case SOLVER_TYPE_SEQUENTIAL_IMPULSE:
return new btSequentialImpulseConstraintSolver();
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT:
return new MySequentialImpulseConstraintSolverMt();
case SOLVER_TYPE_NNCG:
return new btNNCGConstraintSolver();
case SOLVER_TYPE_MLCP_PGS:
@@ -253,7 +294,7 @@ public:
{
addTaskScheduler( btGetSequentialTaskScheduler() );
#if BT_THREADSAFE
if ( btITaskScheduler* ts = createDefaultTaskScheduler() )
if ( btITaskScheduler* ts = btCreateDefaultTaskScheduler() )
{
m_allocatedTaskSchedulers.push_back( ts );
addTaskScheduler( ts );
@@ -310,7 +351,7 @@ static bool gDisplayProfileInfo = true;
static bool gMultithreadedWorld = false;
static bool gDisplayProfileInfo = false;
#endif
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT;
static int gSolverMode = SOLVER_SIMD |
SOLVER_USE_WARMSTARTING |
// SOLVER_RANDMIZE_ORDER |
@@ -318,9 +359,11 @@ static int gSolverMode = SOLVER_SIMD |
// SOLVER_USE_2_FRICTION_DIRECTIONS |
0;
static btScalar gSliderSolverIterations = 10.0f; // should be int
static btScalar gSliderNumThreads = 1.0f; // should be int
static btScalar gSliderIslandBatchingThreshold = 0.0f; // should be int
static btScalar gSliderMinBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_minBatchSize); // should be int
static btScalar gSliderMaxBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_maxBatchSize); // should be int
static btScalar gSliderLeastSquaresResidualThreshold = 0.0f;
////////////////////////////////////
CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper )
@@ -419,6 +462,23 @@ void setTaskSchedulerComboBoxCallback(int combobox, const char* item, void* user
}
void setBatchingMethodComboBoxCallback(int combobox, const char* item, void* userPointer)
{
#if BT_THREADSAFE
const char** items = static_cast<const char**>( userPointer );
for ( int i = 0; i < btBatchedConstraints::BATCHING_METHOD_COUNT; ++i )
{
if ( strcmp( item, items[ i ] ) == 0 )
{
// change the task scheduler
btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod = static_cast<btBatchedConstraints::BatchingMethod>( i );
break;
}
}
#endif // #if BT_THREADSAFE
}
static void setThreadCountCallback(float val, void* userPtr)
{
#if BT_THREADSAFE
@@ -435,13 +495,43 @@ static void setSolverIterationCountCallback(float val, void* userPtr)
}
}
static void setLargeIslandManifoldCountCallback( float val, void* userPtr )
{
btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching = int( gSliderIslandBatchingThreshold );
}
static void setMinBatchSizeCallback( float val, void* userPtr )
{
gSliderMaxBatchSize = (std::max)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setMaxBatchSizeCallback( float val, void* userPtr )
{
gSliderMinBatchSize = (std::min)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setLeastSquaresResidualThresholdCallback( float val, void* userPtr )
{
if (btDiscreteDynamicsWorld* world = reinterpret_cast<btDiscreteDynamicsWorld*>(userPtr))
{
world->getSolverInfo().m_leastSquaresResidualThreshold = gSliderLeastSquaresResidualThreshold;
}
}
void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
{
gNumIslands = 0;
m_solverType = gSolverType;
#if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB)
#if BT_THREADSAFE
btAssert( btGetTaskScheduler() != NULL );
m_multithreadCapable = true;
if (NULL != btGetTaskScheduler() && gTaskSchedulerMgr.getNumTaskSchedulers() > 1)
{
m_multithreadCapable = true;
}
#endif
if ( gMultithreadedWorld )
{
@@ -486,7 +576,12 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_broadphase = new btDbvtBroadphase();
m_solver = createSolverByType( m_solverType );
SolverType solverType = m_solverType;
if ( solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
{
solverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
}
m_solver = createSolverByType( solverType );
m_dynamicsWorld = new btDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration );
}
@@ -494,6 +589,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_dynamicsWorld->setInternalTickCallback( profileEndCallback, NULL, false );
m_dynamicsWorld->setGravity( btVector3( 0, -10, 0 ) );
m_dynamicsWorld->getSolverInfo().m_solverMode = gSolverMode;
m_dynamicsWorld->getSolverInfo().m_numIterations = btMax(1, int(gSliderSolverIterations));
createDefaultParameters();
}
@@ -504,16 +600,18 @@ void CommonRigidBodyMTBase::createDefaultParameters()
{
// create a button to toggle multithreaded world
ButtonParams button( "Multithreaded world enable", 0, true );
button.m_initialState = gMultithreadedWorld;
button.m_userPointer = &gMultithreadedWorld;
bool* ptr = &gMultithreadedWorld;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
// create a button to toggle profile printing
ButtonParams button( "Display solver info", 0, true );
button.m_initialState = gDisplayProfileInfo;
button.m_userPointer = &gDisplayProfileInfo;
bool* ptr = &gDisplayProfileInfo;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
@@ -544,6 +642,16 @@ void CommonRigidBodyMTBase::createDefaultParameters()
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the solver leastSquaresResidualThreshold (used to run fewer solver iterations when convergence is good)
SliderParams slider( "Solver residual thresh", &gSliderLeastSquaresResidualThreshold );
slider.m_minVal = 0.0f;
slider.m_maxVal = 0.25f;
slider.m_callback = setLeastSquaresResidualThresholdCallback;
slider.m_userPointer = m_dynamicsWorld;
slider.m_clampToIntegers = false;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
ButtonParams button( "Solver use SIMD", 0, true );
button.m_buttonId = SOLVER_SIMD;
@@ -618,20 +726,86 @@ void CommonRigidBodyMTBase::createDefaultParameters()
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
}
{
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
// if slider has not been set yet (by another demo),
if ( gSliderNumThreads <= 1.0f )
{
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
gSliderNumThreads = float( numThreads );
}
int maxNumThreads = btGetTaskScheduler()->getMaxNumThreads();
SliderParams slider("Thread count", &gSliderNumThreads);
slider.m_minVal = 1.0f;
slider.m_maxVal = float( BT_MAX_THREAD_COUNT );
slider.m_maxVal = float( maxNumThreads );
slider.m_callback = setThreadCountCallback;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the number of manifolds an island needs to be too large for parallel dispatch
if (gSliderIslandBatchingThreshold < 1.0)
{
gSliderIslandBatchingThreshold = float(btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching);
}
SliderParams slider( "IslandBatchThresh", &gSliderIslandBatchingThreshold );
slider.m_minVal = 1.0f;
slider.m_maxVal = 2000.0f;
slider.m_callback = setLargeIslandManifoldCountCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a combo box for selecting the batching method
static const char* sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_COUNT ];
{
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_2D ] = "Batching: 2D Grid";
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_3D ] = "Batching: 3D Grid";
};
ComboBoxParams comboParams;
comboParams.m_userPointer = sBatchingMethodComboBoxItems;
comboParams.m_numItems = btBatchedConstraints::BATCHING_METHOD_COUNT;
comboParams.m_startItem = static_cast<int>(btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod);
comboParams.m_items = sBatchingMethodComboBoxItems;
comboParams.m_callback = setBatchingMethodComboBoxCallback;
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
}
{
// a slider for the sequentialImpulseConstraintSolverMt min batch size (when batching)
SliderParams slider( "Min batch size", &gSliderMinBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMinBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the sequentialImpulseConstraintSolverMt max batch size (when batching)
SliderParams slider( "Max batch size", &gSliderMaxBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMaxBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a button to toggle debug drawing of batching visualization
ButtonParams button( "Visualize batching", 0, true );
bool* ptr = &btBatchedConstraints::s_debugDrawBatches;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
ButtonParams button( "Allow Nested ParallelFor", 0, true );
button.m_initialState = btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_userPointer = &btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
#endif // #if BT_THREADSAFE
}
}
@@ -643,6 +817,7 @@ void CommonRigidBodyMTBase::drawScreenText()
int xCoord = 400;
int yCoord = 30;
int yStep = 30;
int indent = 30;
if (m_solverType != gSolverType)
{
sprintf( msg, "restart example to change solver type" );
@@ -721,6 +896,34 @@ void CommonRigidBodyMTBase::drawScreenText()
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverTotal %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverTotal )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverSetup %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverSetup )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverIterations %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverIterations )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverFinish %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverFinish )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"PredictUnconstrainedMotion %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordPredictUnconstrainedMotion )*0.001f

View File

@@ -14,6 +14,7 @@
enum SolverType
{
SOLVER_TYPE_SEQUENTIAL_IMPULSE,
SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT,
SOLVER_TYPE_NNCG,
SOLVER_TYPE_MLCP_PGS,
SOLVER_TYPE_MLCP_DANTZIG,
@@ -27,6 +28,7 @@ inline const char* getSolverTypeName( SolverType t )
switch (t)
{
case SOLVER_TYPE_SEQUENTIAL_IMPULSE: return "SequentialImpulse";
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT: return "SequentialImpulseMt";
case SOLVER_TYPE_NNCG: return "NNCG";
case SOLVER_TYPE_MLCP_PGS: return "MLCP ProjectedGaussSeidel";
case SOLVER_TYPE_MLCP_DANTZIG: return "MLCP Dantzig";

View File

@@ -25,10 +25,10 @@ subject to the following restrictions:
static btScalar gSliderStackRows = 8.0f;
static btScalar gSliderStackColumns = 6.0f;
static btScalar gSliderStackHeight = 10.0f;
static btScalar gSliderStackWidth = 1.0f;
static btScalar gSliderStackRows = 1.0f;
static btScalar gSliderStackColumns = 1.0f;
static btScalar gSliderStackHeight = 15.0f;
static btScalar gSliderStackWidth = 8.0f;
static btScalar gSliderGroundHorizontalAmplitude = 0.0f;
static btScalar gSliderGroundVerticalAmplitude = 0.0f;
static btScalar gSliderGroundTilt = 0.0f;
@@ -75,6 +75,21 @@ public:
btScalar tilt = gSliderGroundTilt * SIMD_2_PI / 360.0f;
return btQuaternion( btVector3( 1.0f, 0.0f, 0.0f ), tilt );
}
struct TestSumBody : public btIParallelSumBody
{
virtual btScalar sumLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
btScalar sum = 0.0f;
for (int i = iBegin; i < iEnd; ++i)
{
if (i > 0)
{
sum += 1.0f / btScalar(i);
}
}
return sum;
}
};
virtual void stepSimulation( float deltaTime ) BT_OVERRIDE
{
if ( m_dynamicsWorld )
@@ -115,6 +130,14 @@ public:
// always step by 1/60 for benchmarking
m_dynamicsWorld->stepSimulation( 1.0f / 60.0f, 0 );
}
#if 0
{
// test parallelSum
TestSumBody testSumBody;
float testSum = btParallelSum( 1, 10000000, 10000, testSumBody );
printf( "sum = %f\n", testSum );
}
#endif
}
virtual void initPhysics() BT_OVERRIDE;

View File

@@ -1,448 +0,0 @@
#include "LinearMath/btTransform.h"
#include "../Utils/b3Clock.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btQuickprof.h"
#include <stdio.h>
#include <algorithm>
typedef void( *btThreadFunc )( void* userPtr, void* lsMemory );
typedef void* ( *btThreadLocalStorageFunc )();
#if BT_THREADSAFE
#if defined( _WIN32 )
#include "b3Win32ThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName )
{
b3Win32ThreadSupport::Win32ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
//constructionInfo.m_priority = 0; // highest priority (the default) -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
constructionInfo.m_priority = -1; // normal priority
b3Win32ThreadSupport* threadSupport = new b3Win32ThreadSupport( constructionInfo );
return threadSupport;
}
#else // #if defined( _WIN32 )
#include "b3PosixThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName)
{
b3PosixThreadSupport::ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
b3ThreadSupportInterface* threadSupport = new b3PosixThreadSupport( constructionInfo );
return threadSupport;
}
#endif // #else // #if defined( _WIN32 )
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int getNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#elif defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getNumHardwareThreads()
{
// caps out at 32
SYSTEM_INFO info;
GetSystemInfo( &info );
return info.dwNumberOfProcessors;
}
#else
int getNumHardwareThreads()
{
return 0; // don't know
}
#endif
struct WorkerThreadStatus
{
enum Type
{
kInvalid,
kWaitingForWork,
kWorking,
kSleeping,
};
};
struct IJob
{
virtual void executeJob() = 0;
};
class ParallelForJob : public IJob
{
const btIParallelForBody* mBody;
int mBegin;
int mEnd;
public:
ParallelForJob()
{
mBody = NULL;
mBegin = 0;
mEnd = 0;
}
void init( int iBegin, int iEnd, const btIParallelForBody& body )
{
mBody = &body;
mBegin = iBegin;
mEnd = iEnd;
}
virtual void executeJob() BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
mBody->forLoop( mBegin, mEnd );
}
};
struct JobContext
{
JobContext()
{
m_queueLock = NULL;
m_headIndex = 0;
m_tailIndex = 0;
m_workersShouldCheckQueue = false;
m_useSpinMutex = false;
}
b3CriticalSection* m_queueLock;
btSpinMutex m_mutex;
volatile bool m_workersShouldCheckQueue;
btAlignedObjectArray<IJob*> m_jobQueue;
bool m_queueIsEmpty;
int m_tailIndex;
int m_headIndex;
bool m_useSpinMutex;
void lockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.lock();
}
else
{
m_queueLock->lock();
}
}
void unlockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.unlock();
}
else
{
m_queueLock->unlock();
}
}
void clearQueue()
{
lockQueue();
m_headIndex = 0;
m_tailIndex = 0;
m_queueIsEmpty = true;
unlockQueue();
m_jobQueue.resizeNoInitialize( 0 );
}
void submitJob( IJob* job )
{
m_jobQueue.push_back( job );
lockQueue();
m_tailIndex++;
m_queueIsEmpty = false;
unlockQueue();
}
IJob* consumeJob()
{
if ( m_queueIsEmpty )
{
// lock free path. even if this is taken erroneously it isn't harmful
return NULL;
}
IJob* job = NULL;
lockQueue();
if ( !m_queueIsEmpty )
{
job = m_jobQueue[ m_headIndex++ ];
if ( m_headIndex == m_tailIndex )
{
m_queueIsEmpty = true;
}
}
unlockQueue();
return job;
}
};
struct WorkerThreadLocalStorage
{
int threadId;
WorkerThreadStatus::Type status;
};
static void WorkerThreadFunc( void* userPtr, void* lsMemory )
{
BT_PROFILE( "WorkerThreadFunc" );
WorkerThreadLocalStorage* localStorage = (WorkerThreadLocalStorage*) lsMemory;
localStorage->status = WorkerThreadStatus::kWaitingForWork;
//printf( "WorkerThreadFunc: worker %d start working\n", localStorage->threadId );
JobContext* jobContext = (JobContext*) userPtr;
while ( jobContext->m_workersShouldCheckQueue )
{
if ( IJob* job = jobContext->consumeJob() )
{
localStorage->status = WorkerThreadStatus::kWorking;
job->executeJob();
localStorage->status = WorkerThreadStatus::kWaitingForWork;
}
else
{
// todo: spin wait a bit to avoid hammering the empty queue
}
}
//printf( "WorkerThreadFunc stop working\n" );
localStorage->status = WorkerThreadStatus::kSleeping;
// go idle
}
static void* WorkerThreadAllocFunc()
{
return new WorkerThreadLocalStorage;
}
class btTaskSchedulerDefault : public btITaskScheduler
{
JobContext m_jobContext;
b3ThreadSupportInterface* m_threadSupport;
btAlignedObjectArray<ParallelForJob> m_jobs;
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
int m_numThreads;
int m_numWorkerThreads;
int m_numWorkersRunning;
public:
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
{
m_threadSupport = NULL;
m_numThreads = getNumHardwareThreads();
// if can't detect number of cores,
if ( m_numThreads == 0 )
{
// take a guess
m_numThreads = 4;
}
m_numWorkerThreads = m_numThreads - 1;
m_numWorkersRunning = 0;
}
virtual ~btTaskSchedulerDefault()
{
shutdown();
}
void init()
{
int maxNumWorkerThreads = BT_MAX_THREAD_COUNT - 1;
m_threadSupport = createThreadSupport( maxNumWorkerThreads, WorkerThreadFunc, WorkerThreadAllocFunc, "TaskScheduler" );
m_jobContext.m_queueLock = m_threadSupport->createCriticalSection();
for ( int i = 0; i < maxNumWorkerThreads; i++ )
{
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
storage->threadId = i;
storage->status = WorkerThreadStatus::kSleeping;
}
setWorkersActive( false ); // no work for them yet
}
virtual void shutdown()
{
setWorkersActive( false );
waitForWorkersToSleep();
m_threadSupport->deleteCriticalSection( m_jobContext.m_queueLock );
m_jobContext.m_queueLock = NULL;
delete m_threadSupport;
m_threadSupport = NULL;
}
void setWorkersActive( bool active )
{
m_jobContext.m_workersShouldCheckQueue = active;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return BT_MAX_THREAD_COUNT;
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = btMax( btMin(numThreads, int(BT_MAX_THREAD_COUNT)), 1 );
m_numWorkerThreads = m_numThreads - 1;
}
void waitJobs()
{
BT_PROFILE( "waitJobs" );
// have the main thread work until the job queue is empty
for ( ;; )
{
if ( IJob* job = m_jobContext.consumeJob() )
{
job->executeJob();
}
else
{
break;
}
}
// done with jobs for now, tell workers to rest
setWorkersActive( false );
waitForWorkersToSleep();
}
void wakeWorkers()
{
BT_PROFILE( "wakeWorkers" );
btAssert( m_jobContext.m_workersShouldCheckQueue );
// tell each worker thread to start working
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
m_threadSupport->runTask( B3_THREAD_SCHEDULE_TASK, &m_jobContext, i );
m_numWorkersRunning++;
}
}
void waitForWorkersToSleep()
{
BT_PROFILE( "waitForWorkersToSleep" );
while ( m_numWorkersRunning > 0 )
{
int iThread;
int threadStatus;
m_threadSupport->waitForResponse( &iThread, &threadStatus ); // wait for worker threads to finish working
m_numWorkersRunning--;
}
//m_threadSupport->waitForAllTasksToComplete();
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
//m_threadSupport->waitForTaskCompleted( i );
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
btAssert( storage->status == WorkerThreadStatus::kSleeping );
}
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
if ( jobCount > m_jobs.size() )
{
m_jobs.resize( jobCount );
}
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
{
m_jobContext.m_jobQueue.reserve( jobCount );
}
m_jobContext.clearQueue();
// prepare worker threads for incoming work
setWorkersActive( true );
wakeWorkers();
// submit all of the jobs
int iJob = 0;
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
ParallelForJob& job = m_jobs[ iJob ];
job.init( i, iE, body );
m_jobContext.submitJob( &job );
iJob++;
}
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
}
else
{
BT_PROFILE( "parallelFor_mainThread" );
// just run on main thread
body.forLoop( iBegin, iEnd );
}
}
};
btITaskScheduler* createDefaultTaskScheduler()
{
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
ts->init();
return ts;
}
#else // #if BT_THREADSAFE
btITaskScheduler* createDefaultTaskScheduler()
{
return NULL;
}
#endif // #else // #if BT_THREADSAFE

View File

@@ -1,26 +0,0 @@
/*
Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_TASK_SCHEDULER_H
#define BT_TASK_SCHEDULER_H
class btITaskScheduler;
btITaskScheduler* createDefaultTaskScheduler();
#endif // BT_TASK_SCHEDULER_H