parallel solver: various changes

- threading: adding btSequentialImpulseConstraintSolverMt
 - task scheduler: added parallelSum so that parallel solver can compute residuals
 - CommonRigidBodyMTBase: add slider for solver least squares residual and allow multithreading without needing OpenMP, TBB, or PPL
 - taskScheduler: don't wait for workers to sleep/signal at the end of each parallel block
 - parallel solver: convertContacts split into an allocContactConstraints and setupContactConstraints stage, the latter of which is done in parallel
 - parallel solver: rolling friction is now interleaved along with normal friction
 - parallel solver: batchified split impulse solving + some cleanup
 - parallel solver: sorting batches from largest to smallest
 - parallel solver: added parallel batch creation
 - parallel solver: added warmstartingWriteBackContacts func + other cleanup
 - task scheduler: truncate low bits to preserve determinism with parallelSum
 - parallel solver: reducing dynamic mem allocs and trying to parallelize more of the batch setup
 - parallel solver: parallelize updating constraint batch ids for merging
 - parallel solver: adding debug visualization
 - task scheduler: make TBB task scheduler parallelSum deterministic
 - parallel solver: split batch gen code into separate file; allow selection of batch gen method
 - task scheduler: add sleepWorkerThreadsHint() at end of simulation
 - parallel solver: added grain size per phase
 - task Scheduler: fix for strange threading issue; also no need for main thread to wait for workers to sleep
 - base constraint solver: break out joint setup into separate function for profiling/overriding
 - parallel solver: allow different batching method for contacts vs joints
 - base constraint solver: add convertJoint and convertBodies to make it possible to parallelize joint and body conversion
 - parallel solver: convert joints and bodies in parallel now
 - parallel solver: speed up batch creation with run-length encoding
 - parallel solver: batch gen: run-length expansion in parallel; collect constraint info in parallel
 - parallel solver: adding spatial grid batching method
 - parallel solver: enhancements to spatial grid batching
 - sequential solver: moving code for writing back into functions that derived classes can call
 - parallel solver: do write back of bodies and joints in parallel
 - parallel solver: removed all batching methods except for spatial grid (others were ineffective)
 - parallel solver: added 2D or 3D grid batching options; and a bit of cleanup
 - move btDefaultTaskScheduler into LinearMath project
This commit is contained in:
Lunkhound
2017-06-04 17:57:25 -07:00
parent 94bc897067
commit b8720f2161
25 changed files with 5236 additions and 767 deletions

View File

@@ -28,14 +28,14 @@ OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON)
OPTION(BUILD_SHARED_LIBS "Use shared libraries" OFF) OPTION(BUILD_SHARED_LIBS "Use shared libraries" OFF)
OPTION(USE_SOFT_BODY_MULTI_BODY_DYNAMICS_WORLD "Use btSoftMultiBodyDynamicsWorld" ON) OPTION(USE_SOFT_BODY_MULTI_BODY_DYNAMICS_WORLD "Use btSoftMultiBodyDynamicsWorld" ON)
OPTION(BULLET2_USE_THREAD_LOCKS "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF) OPTION(BULLET2_MULTITHREADING "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF)
IF (BULLET2_USE_THREAD_LOCKS) IF (BULLET2_MULTITHREADING)
OPTION(BULLET2_USE_OPEN_MP_MULTITHREADING "Build Bullet 2 with support for multi-threading with OpenMP (requires a compiler with OpenMP support)" OFF) OPTION(BULLET2_USE_OPEN_MP_MULTITHREADING "Build Bullet 2 with support for multi-threading with OpenMP (requires a compiler with OpenMP support)" OFF)
OPTION(BULLET2_USE_TBB_MULTITHREADING "Build Bullet 2 with support for multi-threading with Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF) OPTION(BULLET2_USE_TBB_MULTITHREADING "Build Bullet 2 with support for multi-threading with Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF)
IF (MSVC) IF (MSVC)
OPTION(BULLET2_USE_PPL_MULTITHREADING "Build Bullet 2 with support for multi-threading with Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF) OPTION(BULLET2_USE_PPL_MULTITHREADING "Build Bullet 2 with support for multi-threading with Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF)
ENDIF (MSVC) ENDIF (MSVC)
ENDIF (BULLET2_USE_THREAD_LOCKS) ENDIF (BULLET2_MULTITHREADING)
IF(NOT WIN32) IF(NOT WIN32)
@@ -225,12 +225,15 @@ IF(USE_GRAPHICAL_BENCHMARK)
ADD_DEFINITIONS( -DUSE_GRAPHICAL_BENCHMARK) ADD_DEFINITIONS( -DUSE_GRAPHICAL_BENCHMARK)
ENDIF (USE_GRAPHICAL_BENCHMARK) ENDIF (USE_GRAPHICAL_BENCHMARK)
IF(BULLET2_USE_THREAD_LOCKS) IF(BULLET2_MULTITHREADING)
ADD_DEFINITIONS( -DBT_THREADSAFE=1 ) ADD_DEFINITIONS( -DBT_THREADSAFE=1 )
IF (NOT MSVC) IF (NOT MSVC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
ENDIF (NOT MSVC) ENDIF (NOT MSVC)
ENDIF (BULLET2_USE_THREAD_LOCKS) IF (NOT WIN32)
LINK_LIBRARIES( pthread )
ENDIF (NOT WIN32)
ENDIF (BULLET2_MULTITHREADING)
IF (BULLET2_USE_OPEN_MP_MULTITHREADING) IF (BULLET2_USE_OPEN_MP_MULTITHREADING)
ADD_DEFINITIONS("-DBT_USE_OPENMP=1") ADD_DEFINITIONS("-DBT_USE_OPENMP=1")

View File

@@ -226,7 +226,6 @@ SET(BulletExampleBrowser_SRCS
../MultiThreading/b3PosixThreadSupport.cpp ../MultiThreading/b3PosixThreadSupport.cpp
../MultiThreading/b3Win32ThreadSupport.cpp ../MultiThreading/b3Win32ThreadSupport.cpp
../MultiThreading/b3ThreadSupportInterface.cpp ../MultiThreading/b3ThreadSupportInterface.cpp
../MultiThreading/btTaskScheduler.cpp
../RenderingExamples/TinyRendererSetup.cpp ../RenderingExamples/TinyRendererSetup.cpp
../RenderingExamples/TimeSeriesCanvas.cpp ../RenderingExamples/TimeSeriesCanvas.cpp
../RenderingExamples/TimeSeriesCanvas.h ../RenderingExamples/TimeSeriesCanvas.h

View File

@@ -29,17 +29,17 @@ class btCollisionShape;
#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h" #include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h"
#include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands() #include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands()
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h" #include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h" #include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
#include "BulletDynamics/ConstraintSolver/btNNCGConstraintSolver.h" #include "BulletDynamics/ConstraintSolver/btNNCGConstraintSolver.h"
#include "BulletDynamics/MLCPSolvers/btMLCPSolver.h" #include "BulletDynamics/MLCPSolvers/btMLCPSolver.h"
#include "BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h" #include "BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h"
#include "BulletDynamics/MLCPSolvers/btDantzigSolver.h" #include "BulletDynamics/MLCPSolvers/btDantzigSolver.h"
#include "BulletDynamics/MLCPSolvers/btLemkeSolver.h" #include "BulletDynamics/MLCPSolvers/btLemkeSolver.h"
#include "../MultiThreading/btTaskScheduler.h"
static int gNumIslands = 0; static int gNumIslands = 0;
bool gAllowNestedParallelForLoops = false;
class Profiler class Profiler
{ {
@@ -52,6 +52,10 @@ public:
kRecordPredictUnconstrainedMotion, kRecordPredictUnconstrainedMotion,
kRecordCreatePredictiveContacts, kRecordCreatePredictiveContacts,
kRecordIntegrateTransforms, kRecordIntegrateTransforms,
kRecordSolverTotal,
kRecordSolverSetup,
kRecordSolverIterations,
kRecordSolverFinish,
kRecordCount kRecordCount
}; };
@@ -139,6 +143,41 @@ static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep )
} }
class MySequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolverMt
{
typedef btSequentialImpulseConstraintSolverMt ParentClass;
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
MySequentialImpulseConstraintSolverMt() {}
// for profiling
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverSetup);
btScalar ret = ParentClass::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyIterations( btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer ) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverIterations);
btScalar ret = ParentClass::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverFinish);
btScalar ret = ParentClass::solveGroupCacheFriendlyFinish(bodies, numBodies, infoGlobal);
return ret;
}
virtual btScalar solveGroup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverTotal);
btScalar ret = ParentClass::solveGroup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher);
return ret;
}
};
/// ///
/// MyCollisionDispatcher -- subclassed for profiling purposes /// MyCollisionDispatcher -- subclassed for profiling purposes
/// ///
@@ -218,6 +257,8 @@ btConstraintSolver* createSolverByType( SolverType t )
{ {
case SOLVER_TYPE_SEQUENTIAL_IMPULSE: case SOLVER_TYPE_SEQUENTIAL_IMPULSE:
return new btSequentialImpulseConstraintSolver(); return new btSequentialImpulseConstraintSolver();
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT:
return new MySequentialImpulseConstraintSolverMt();
case SOLVER_TYPE_NNCG: case SOLVER_TYPE_NNCG:
return new btNNCGConstraintSolver(); return new btNNCGConstraintSolver();
case SOLVER_TYPE_MLCP_PGS: case SOLVER_TYPE_MLCP_PGS:
@@ -253,7 +294,7 @@ public:
{ {
addTaskScheduler( btGetSequentialTaskScheduler() ); addTaskScheduler( btGetSequentialTaskScheduler() );
#if BT_THREADSAFE #if BT_THREADSAFE
if ( btITaskScheduler* ts = createDefaultTaskScheduler() ) if ( btITaskScheduler* ts = btCreateDefaultTaskScheduler() )
{ {
m_allocatedTaskSchedulers.push_back( ts ); m_allocatedTaskSchedulers.push_back( ts );
addTaskScheduler( ts ); addTaskScheduler( ts );
@@ -310,7 +351,7 @@ static bool gDisplayProfileInfo = true;
static bool gMultithreadedWorld = false; static bool gMultithreadedWorld = false;
static bool gDisplayProfileInfo = false; static bool gDisplayProfileInfo = false;
#endif #endif
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE; static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT;
static int gSolverMode = SOLVER_SIMD | static int gSolverMode = SOLVER_SIMD |
SOLVER_USE_WARMSTARTING | SOLVER_USE_WARMSTARTING |
// SOLVER_RANDMIZE_ORDER | // SOLVER_RANDMIZE_ORDER |
@@ -318,9 +359,11 @@ static int gSolverMode = SOLVER_SIMD |
// SOLVER_USE_2_FRICTION_DIRECTIONS | // SOLVER_USE_2_FRICTION_DIRECTIONS |
0; 0;
static btScalar gSliderSolverIterations = 10.0f; // should be int static btScalar gSliderSolverIterations = 10.0f; // should be int
static btScalar gSliderNumThreads = 1.0f; // should be int static btScalar gSliderNumThreads = 1.0f; // should be int
static btScalar gSliderIslandBatchingThreshold = 0.0f; // should be int
static btScalar gSliderMinBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_minBatchSize); // should be int
static btScalar gSliderMaxBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_maxBatchSize); // should be int
static btScalar gSliderLeastSquaresResidualThreshold = 0.0f;
//////////////////////////////////// ////////////////////////////////////
CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper ) CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper )
@@ -419,6 +462,23 @@ void setTaskSchedulerComboBoxCallback(int combobox, const char* item, void* user
} }
void setBatchingMethodComboBoxCallback(int combobox, const char* item, void* userPointer)
{
#if BT_THREADSAFE
const char** items = static_cast<const char**>( userPointer );
for ( int i = 0; i < btBatchedConstraints::BATCHING_METHOD_COUNT; ++i )
{
if ( strcmp( item, items[ i ] ) == 0 )
{
// change the task scheduler
btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod = static_cast<btBatchedConstraints::BatchingMethod>( i );
break;
}
}
#endif // #if BT_THREADSAFE
}
static void setThreadCountCallback(float val, void* userPtr) static void setThreadCountCallback(float val, void* userPtr)
{ {
#if BT_THREADSAFE #if BT_THREADSAFE
@@ -435,13 +495,43 @@ static void setSolverIterationCountCallback(float val, void* userPtr)
} }
} }
static void setLargeIslandManifoldCountCallback( float val, void* userPtr )
{
btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching = int( gSliderIslandBatchingThreshold );
}
static void setMinBatchSizeCallback( float val, void* userPtr )
{
gSliderMaxBatchSize = (std::max)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setMaxBatchSizeCallback( float val, void* userPtr )
{
gSliderMinBatchSize = (std::min)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setLeastSquaresResidualThresholdCallback( float val, void* userPtr )
{
if (btDiscreteDynamicsWorld* world = reinterpret_cast<btDiscreteDynamicsWorld*>(userPtr))
{
world->getSolverInfo().m_leastSquaresResidualThreshold = gSliderLeastSquaresResidualThreshold;
}
}
void CommonRigidBodyMTBase::createEmptyDynamicsWorld() void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
{ {
gNumIslands = 0; gNumIslands = 0;
m_solverType = gSolverType; m_solverType = gSolverType;
#if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB) #if BT_THREADSAFE
btAssert( btGetTaskScheduler() != NULL ); btAssert( btGetTaskScheduler() != NULL );
m_multithreadCapable = true; if (NULL != btGetTaskScheduler() && gTaskSchedulerMgr.getNumTaskSchedulers() > 1)
{
m_multithreadCapable = true;
}
#endif #endif
if ( gMultithreadedWorld ) if ( gMultithreadedWorld )
{ {
@@ -486,7 +576,12 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_broadphase = new btDbvtBroadphase(); m_broadphase = new btDbvtBroadphase();
m_solver = createSolverByType( m_solverType ); SolverType solverType = m_solverType;
if ( solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
{
solverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
}
m_solver = createSolverByType( solverType );
m_dynamicsWorld = new btDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration ); m_dynamicsWorld = new btDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration );
} }
@@ -494,6 +589,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_dynamicsWorld->setInternalTickCallback( profileEndCallback, NULL, false ); m_dynamicsWorld->setInternalTickCallback( profileEndCallback, NULL, false );
m_dynamicsWorld->setGravity( btVector3( 0, -10, 0 ) ); m_dynamicsWorld->setGravity( btVector3( 0, -10, 0 ) );
m_dynamicsWorld->getSolverInfo().m_solverMode = gSolverMode; m_dynamicsWorld->getSolverInfo().m_solverMode = gSolverMode;
m_dynamicsWorld->getSolverInfo().m_numIterations = btMax(1, int(gSliderSolverIterations));
createDefaultParameters(); createDefaultParameters();
} }
@@ -504,16 +600,18 @@ void CommonRigidBodyMTBase::createDefaultParameters()
{ {
// create a button to toggle multithreaded world // create a button to toggle multithreaded world
ButtonParams button( "Multithreaded world enable", 0, true ); ButtonParams button( "Multithreaded world enable", 0, true );
button.m_initialState = gMultithreadedWorld; bool* ptr = &gMultithreadedWorld;
button.m_userPointer = &gMultithreadedWorld; button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback; button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button ); m_guiHelper->getParameterInterface()->registerButtonParameter( button );
} }
{ {
// create a button to toggle profile printing // create a button to toggle profile printing
ButtonParams button( "Display solver info", 0, true ); ButtonParams button( "Display solver info", 0, true );
button.m_initialState = gDisplayProfileInfo; bool* ptr = &gDisplayProfileInfo;
button.m_userPointer = &gDisplayProfileInfo; button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback; button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button ); m_guiHelper->getParameterInterface()->registerButtonParameter( button );
} }
@@ -544,6 +642,16 @@ void CommonRigidBodyMTBase::createDefaultParameters()
slider.m_clampToIntegers = true; slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider ); m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
} }
{
// a slider for the solver leastSquaresResidualThreshold (used to run fewer solver iterations when convergence is good)
SliderParams slider( "Solver residual thresh", &gSliderLeastSquaresResidualThreshold );
slider.m_minVal = 0.0f;
slider.m_maxVal = 0.25f;
slider.m_callback = setLeastSquaresResidualThresholdCallback;
slider.m_userPointer = m_dynamicsWorld;
slider.m_clampToIntegers = false;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{ {
ButtonParams button( "Solver use SIMD", 0, true ); ButtonParams button( "Solver use SIMD", 0, true );
button.m_buttonId = SOLVER_SIMD; button.m_buttonId = SOLVER_SIMD;
@@ -618,20 +726,86 @@ void CommonRigidBodyMTBase::createDefaultParameters()
m_guiHelper->getParameterInterface()->registerComboBox( comboParams ); m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
} }
{ {
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
// if slider has not been set yet (by another demo), // if slider has not been set yet (by another demo),
if ( gSliderNumThreads <= 1.0f ) if ( gSliderNumThreads <= 1.0f )
{ {
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
gSliderNumThreads = float( numThreads ); gSliderNumThreads = float( numThreads );
} }
int maxNumThreads = btGetTaskScheduler()->getMaxNumThreads();
SliderParams slider("Thread count", &gSliderNumThreads); SliderParams slider("Thread count", &gSliderNumThreads);
slider.m_minVal = 1.0f; slider.m_minVal = 1.0f;
slider.m_maxVal = float( BT_MAX_THREAD_COUNT ); slider.m_maxVal = float( maxNumThreads );
slider.m_callback = setThreadCountCallback; slider.m_callback = setThreadCountCallback;
slider.m_clampToIntegers = true; slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider ); m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
} }
{
// a slider for the number of manifolds an island needs to be too large for parallel dispatch
if (gSliderIslandBatchingThreshold < 1.0)
{
gSliderIslandBatchingThreshold = float(btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching);
}
SliderParams slider( "IslandBatchThresh", &gSliderIslandBatchingThreshold );
slider.m_minVal = 1.0f;
slider.m_maxVal = 2000.0f;
slider.m_callback = setLargeIslandManifoldCountCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a combo box for selecting the batching method
static const char* sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_COUNT ];
{
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_2D ] = "Batching: 2D Grid";
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_3D ] = "Batching: 3D Grid";
};
ComboBoxParams comboParams;
comboParams.m_userPointer = sBatchingMethodComboBoxItems;
comboParams.m_numItems = btBatchedConstraints::BATCHING_METHOD_COUNT;
comboParams.m_startItem = static_cast<int>(btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod);
comboParams.m_items = sBatchingMethodComboBoxItems;
comboParams.m_callback = setBatchingMethodComboBoxCallback;
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
}
{
// a slider for the sequentialImpulseConstraintSolverMt min batch size (when batching)
SliderParams slider( "Min batch size", &gSliderMinBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMinBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the sequentialImpulseConstraintSolverMt max batch size (when batching)
SliderParams slider( "Max batch size", &gSliderMaxBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMaxBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a button to toggle debug drawing of batching visualization
ButtonParams button( "Visualize batching", 0, true );
bool* ptr = &btBatchedConstraints::s_debugDrawBatches;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
ButtonParams button( "Allow Nested ParallelFor", 0, true );
button.m_initialState = btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_userPointer = &btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
#endif // #if BT_THREADSAFE #endif // #if BT_THREADSAFE
} }
} }
@@ -643,6 +817,7 @@ void CommonRigidBodyMTBase::drawScreenText()
int xCoord = 400; int xCoord = 400;
int yCoord = 30; int yCoord = 30;
int yStep = 30; int yStep = 30;
int indent = 30;
if (m_solverType != gSolverType) if (m_solverType != gSolverType)
{ {
sprintf( msg, "restart example to change solver type" ); sprintf( msg, "restart example to change solver type" );
@@ -721,6 +896,34 @@ void CommonRigidBodyMTBase::drawScreenText()
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f ); m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep; yCoord += yStep;
sprintf( msg,
"SolverTotal %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverTotal )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverSetup %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverSetup )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverIterations %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverIterations )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverFinish %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverFinish )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg, sprintf( msg,
"PredictUnconstrainedMotion %5.3f ms", "PredictUnconstrainedMotion %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordPredictUnconstrainedMotion )*0.001f gProfiler.getAverageTime( Profiler::kRecordPredictUnconstrainedMotion )*0.001f

View File

@@ -14,6 +14,7 @@
enum SolverType enum SolverType
{ {
SOLVER_TYPE_SEQUENTIAL_IMPULSE, SOLVER_TYPE_SEQUENTIAL_IMPULSE,
SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT,
SOLVER_TYPE_NNCG, SOLVER_TYPE_NNCG,
SOLVER_TYPE_MLCP_PGS, SOLVER_TYPE_MLCP_PGS,
SOLVER_TYPE_MLCP_DANTZIG, SOLVER_TYPE_MLCP_DANTZIG,
@@ -27,6 +28,7 @@ inline const char* getSolverTypeName( SolverType t )
switch (t) switch (t)
{ {
case SOLVER_TYPE_SEQUENTIAL_IMPULSE: return "SequentialImpulse"; case SOLVER_TYPE_SEQUENTIAL_IMPULSE: return "SequentialImpulse";
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT: return "SequentialImpulseMt";
case SOLVER_TYPE_NNCG: return "NNCG"; case SOLVER_TYPE_NNCG: return "NNCG";
case SOLVER_TYPE_MLCP_PGS: return "MLCP ProjectedGaussSeidel"; case SOLVER_TYPE_MLCP_PGS: return "MLCP ProjectedGaussSeidel";
case SOLVER_TYPE_MLCP_DANTZIG: return "MLCP Dantzig"; case SOLVER_TYPE_MLCP_DANTZIG: return "MLCP Dantzig";

View File

@@ -25,10 +25,10 @@ subject to the following restrictions:
static btScalar gSliderStackRows = 8.0f; static btScalar gSliderStackRows = 1.0f;
static btScalar gSliderStackColumns = 6.0f; static btScalar gSliderStackColumns = 1.0f;
static btScalar gSliderStackHeight = 10.0f; static btScalar gSliderStackHeight = 15.0f;
static btScalar gSliderStackWidth = 1.0f; static btScalar gSliderStackWidth = 8.0f;
static btScalar gSliderGroundHorizontalAmplitude = 0.0f; static btScalar gSliderGroundHorizontalAmplitude = 0.0f;
static btScalar gSliderGroundVerticalAmplitude = 0.0f; static btScalar gSliderGroundVerticalAmplitude = 0.0f;
static btScalar gSliderGroundTilt = 0.0f; static btScalar gSliderGroundTilt = 0.0f;
@@ -75,6 +75,21 @@ public:
btScalar tilt = gSliderGroundTilt * SIMD_2_PI / 360.0f; btScalar tilt = gSliderGroundTilt * SIMD_2_PI / 360.0f;
return btQuaternion( btVector3( 1.0f, 0.0f, 0.0f ), tilt ); return btQuaternion( btVector3( 1.0f, 0.0f, 0.0f ), tilt );
} }
struct TestSumBody : public btIParallelSumBody
{
virtual btScalar sumLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
btScalar sum = 0.0f;
for (int i = iBegin; i < iEnd; ++i)
{
if (i > 0)
{
sum += 1.0f / btScalar(i);
}
}
return sum;
}
};
virtual void stepSimulation( float deltaTime ) BT_OVERRIDE virtual void stepSimulation( float deltaTime ) BT_OVERRIDE
{ {
if ( m_dynamicsWorld ) if ( m_dynamicsWorld )
@@ -115,6 +130,14 @@ public:
// always step by 1/60 for benchmarking // always step by 1/60 for benchmarking
m_dynamicsWorld->stepSimulation( 1.0f / 60.0f, 0 ); m_dynamicsWorld->stepSimulation( 1.0f / 60.0f, 0 );
} }
#if 0
{
// test parallelSum
TestSumBody testSumBody;
float testSum = btParallelSum( 1, 10000000, 10000, testSumBody );
printf( "sum = %f\n", testSum );
}
#endif
} }
virtual void initPhysics() BT_OVERRIDE; virtual void initPhysics() BT_OVERRIDE;

View File

@@ -1,448 +0,0 @@
#include "LinearMath/btTransform.h"
#include "../Utils/b3Clock.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btQuickprof.h"
#include <stdio.h>
#include <algorithm>
typedef void( *btThreadFunc )( void* userPtr, void* lsMemory );
typedef void* ( *btThreadLocalStorageFunc )();
#if BT_THREADSAFE
#if defined( _WIN32 )
#include "b3Win32ThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName )
{
b3Win32ThreadSupport::Win32ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
//constructionInfo.m_priority = 0; // highest priority (the default) -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
constructionInfo.m_priority = -1; // normal priority
b3Win32ThreadSupport* threadSupport = new b3Win32ThreadSupport( constructionInfo );
return threadSupport;
}
#else // #if defined( _WIN32 )
#include "b3PosixThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName)
{
b3PosixThreadSupport::ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
b3ThreadSupportInterface* threadSupport = new b3PosixThreadSupport( constructionInfo );
return threadSupport;
}
#endif // #else // #if defined( _WIN32 )
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int getNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#elif defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getNumHardwareThreads()
{
// caps out at 32
SYSTEM_INFO info;
GetSystemInfo( &info );
return info.dwNumberOfProcessors;
}
#else
int getNumHardwareThreads()
{
return 0; // don't know
}
#endif
struct WorkerThreadStatus
{
enum Type
{
kInvalid,
kWaitingForWork,
kWorking,
kSleeping,
};
};
struct IJob
{
virtual void executeJob() = 0;
};
class ParallelForJob : public IJob
{
const btIParallelForBody* mBody;
int mBegin;
int mEnd;
public:
ParallelForJob()
{
mBody = NULL;
mBegin = 0;
mEnd = 0;
}
void init( int iBegin, int iEnd, const btIParallelForBody& body )
{
mBody = &body;
mBegin = iBegin;
mEnd = iEnd;
}
virtual void executeJob() BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
mBody->forLoop( mBegin, mEnd );
}
};
struct JobContext
{
JobContext()
{
m_queueLock = NULL;
m_headIndex = 0;
m_tailIndex = 0;
m_workersShouldCheckQueue = false;
m_useSpinMutex = false;
}
b3CriticalSection* m_queueLock;
btSpinMutex m_mutex;
volatile bool m_workersShouldCheckQueue;
btAlignedObjectArray<IJob*> m_jobQueue;
bool m_queueIsEmpty;
int m_tailIndex;
int m_headIndex;
bool m_useSpinMutex;
void lockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.lock();
}
else
{
m_queueLock->lock();
}
}
void unlockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.unlock();
}
else
{
m_queueLock->unlock();
}
}
void clearQueue()
{
lockQueue();
m_headIndex = 0;
m_tailIndex = 0;
m_queueIsEmpty = true;
unlockQueue();
m_jobQueue.resizeNoInitialize( 0 );
}
void submitJob( IJob* job )
{
m_jobQueue.push_back( job );
lockQueue();
m_tailIndex++;
m_queueIsEmpty = false;
unlockQueue();
}
IJob* consumeJob()
{
if ( m_queueIsEmpty )
{
// lock free path. even if this is taken erroneously it isn't harmful
return NULL;
}
IJob* job = NULL;
lockQueue();
if ( !m_queueIsEmpty )
{
job = m_jobQueue[ m_headIndex++ ];
if ( m_headIndex == m_tailIndex )
{
m_queueIsEmpty = true;
}
}
unlockQueue();
return job;
}
};
struct WorkerThreadLocalStorage
{
int threadId;
WorkerThreadStatus::Type status;
};
static void WorkerThreadFunc( void* userPtr, void* lsMemory )
{
BT_PROFILE( "WorkerThreadFunc" );
WorkerThreadLocalStorage* localStorage = (WorkerThreadLocalStorage*) lsMemory;
localStorage->status = WorkerThreadStatus::kWaitingForWork;
//printf( "WorkerThreadFunc: worker %d start working\n", localStorage->threadId );
JobContext* jobContext = (JobContext*) userPtr;
while ( jobContext->m_workersShouldCheckQueue )
{
if ( IJob* job = jobContext->consumeJob() )
{
localStorage->status = WorkerThreadStatus::kWorking;
job->executeJob();
localStorage->status = WorkerThreadStatus::kWaitingForWork;
}
else
{
// todo: spin wait a bit to avoid hammering the empty queue
}
}
//printf( "WorkerThreadFunc stop working\n" );
localStorage->status = WorkerThreadStatus::kSleeping;
// go idle
}
static void* WorkerThreadAllocFunc()
{
return new WorkerThreadLocalStorage;
}
class btTaskSchedulerDefault : public btITaskScheduler
{
JobContext m_jobContext;
b3ThreadSupportInterface* m_threadSupport;
btAlignedObjectArray<ParallelForJob> m_jobs;
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
int m_numThreads;
int m_numWorkerThreads;
int m_numWorkersRunning;
public:
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
{
m_threadSupport = NULL;
m_numThreads = getNumHardwareThreads();
// if can't detect number of cores,
if ( m_numThreads == 0 )
{
// take a guess
m_numThreads = 4;
}
m_numWorkerThreads = m_numThreads - 1;
m_numWorkersRunning = 0;
}
virtual ~btTaskSchedulerDefault()
{
shutdown();
}
void init()
{
int maxNumWorkerThreads = BT_MAX_THREAD_COUNT - 1;
m_threadSupport = createThreadSupport( maxNumWorkerThreads, WorkerThreadFunc, WorkerThreadAllocFunc, "TaskScheduler" );
m_jobContext.m_queueLock = m_threadSupport->createCriticalSection();
for ( int i = 0; i < maxNumWorkerThreads; i++ )
{
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
storage->threadId = i;
storage->status = WorkerThreadStatus::kSleeping;
}
setWorkersActive( false ); // no work for them yet
}
virtual void shutdown()
{
setWorkersActive( false );
waitForWorkersToSleep();
m_threadSupport->deleteCriticalSection( m_jobContext.m_queueLock );
m_jobContext.m_queueLock = NULL;
delete m_threadSupport;
m_threadSupport = NULL;
}
void setWorkersActive( bool active )
{
m_jobContext.m_workersShouldCheckQueue = active;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return BT_MAX_THREAD_COUNT;
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = btMax( btMin(numThreads, int(BT_MAX_THREAD_COUNT)), 1 );
m_numWorkerThreads = m_numThreads - 1;
}
void waitJobs()
{
BT_PROFILE( "waitJobs" );
// have the main thread work until the job queue is empty
for ( ;; )
{
if ( IJob* job = m_jobContext.consumeJob() )
{
job->executeJob();
}
else
{
break;
}
}
// done with jobs for now, tell workers to rest
setWorkersActive( false );
waitForWorkersToSleep();
}
void wakeWorkers()
{
BT_PROFILE( "wakeWorkers" );
btAssert( m_jobContext.m_workersShouldCheckQueue );
// tell each worker thread to start working
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
m_threadSupport->runTask( B3_THREAD_SCHEDULE_TASK, &m_jobContext, i );
m_numWorkersRunning++;
}
}
void waitForWorkersToSleep()
{
BT_PROFILE( "waitForWorkersToSleep" );
while ( m_numWorkersRunning > 0 )
{
int iThread;
int threadStatus;
m_threadSupport->waitForResponse( &iThread, &threadStatus ); // wait for worker threads to finish working
m_numWorkersRunning--;
}
//m_threadSupport->waitForAllTasksToComplete();
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
//m_threadSupport->waitForTaskCompleted( i );
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
btAssert( storage->status == WorkerThreadStatus::kSleeping );
}
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
if ( jobCount > m_jobs.size() )
{
m_jobs.resize( jobCount );
}
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
{
m_jobContext.m_jobQueue.reserve( jobCount );
}
m_jobContext.clearQueue();
// prepare worker threads for incoming work
setWorkersActive( true );
wakeWorkers();
// submit all of the jobs
int iJob = 0;
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
ParallelForJob& job = m_jobs[ iJob ];
job.init( i, iE, body );
m_jobContext.submitJob( &job );
iJob++;
}
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
}
else
{
BT_PROFILE( "parallelFor_mainThread" );
// just run on main thread
body.forLoop( iBegin, iEnd );
}
}
};
btITaskScheduler* createDefaultTaskScheduler()
{
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
ts->init();
return ts;
}
#else // #if BT_THREADSAFE
btITaskScheduler* createDefaultTaskScheduler()
{
return NULL;
}
#endif // #else // #if BT_THREADSAFE

View File

@@ -1,26 +0,0 @@
/*
Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_TASK_SCHEDULER_H
#define BT_TASK_SCHEDULER_H
class btITaskScheduler;
btITaskScheduler* createDefaultTaskScheduler();
#endif // BT_TASK_SCHEDULER_H

View File

@@ -15,6 +15,8 @@ SET(BulletDynamics_SRCS
ConstraintSolver/btHingeConstraint.cpp ConstraintSolver/btHingeConstraint.cpp
ConstraintSolver/btPoint2PointConstraint.cpp ConstraintSolver/btPoint2PointConstraint.cpp
ConstraintSolver/btSequentialImpulseConstraintSolver.cpp ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
ConstraintSolver/btSequentialImpulseConstraintSolverMt.cpp
ConstraintSolver/btBatchedConstraints.cpp
ConstraintSolver/btNNCGConstraintSolver.cpp ConstraintSolver/btNNCGConstraintSolver.cpp
ConstraintSolver/btSliderConstraint.cpp ConstraintSolver/btSliderConstraint.cpp
ConstraintSolver/btSolve2LinearConstraint.cpp ConstraintSolver/btSolve2LinearConstraint.cpp
@@ -62,6 +64,7 @@ SET(ConstraintSolver_HDRS
ConstraintSolver/btJacobianEntry.h ConstraintSolver/btJacobianEntry.h
ConstraintSolver/btPoint2PointConstraint.h ConstraintSolver/btPoint2PointConstraint.h
ConstraintSolver/btSequentialImpulseConstraintSolver.h ConstraintSolver/btSequentialImpulseConstraintSolver.h
ConstraintSolver/btSequentialImpulseConstraintSolverMt.h
ConstraintSolver/btNNCGConstraintSolver.h ConstraintSolver/btNNCGConstraintSolver.h
ConstraintSolver/btSliderConstraint.h ConstraintSolver/btSliderConstraint.h
ConstraintSolver/btSolve2LinearConstraint.h ConstraintSolver/btSolve2LinearConstraint.h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_BATCHED_CONSTRAINTS_H
#define BT_BATCHED_CONSTRAINTS_H
#include "LinearMath/btThreads.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
#include "BulletDynamics/ConstraintSolver/btSolverConstraint.h"
class btIDebugDraw;
struct btBatchedConstraints
{
enum BatchingMethod
{
BATCHING_METHOD_SPATIAL_GRID_2D,
BATCHING_METHOD_SPATIAL_GRID_3D,
BATCHING_METHOD_COUNT
};
struct Range
{
int begin;
int end;
Range() : begin( 0 ), end( 0 ) {}
Range( int _beg, int _end ) : begin( _beg ), end( _end ) {}
};
btAlignedObjectArray<int> m_constraintIndices;
btAlignedObjectArray<Range> m_batches; // each batch is a range of indices in the m_constraintIndices array
btAlignedObjectArray<Range> m_phases; // each phase is range of indices in the m_batches array
btAlignedObjectArray<char> m_phaseGrainSize; // max grain size for each phase
btAlignedObjectArray<int> m_phaseOrder; // phases can be done in any order, so we can randomize the order here
btIDebugDraw* m_debugDrawer;
static bool s_debugDrawBatches;
btBatchedConstraints() {m_debugDrawer=NULL;}
void setup( btConstraintArray* constraints,
const btAlignedObjectArray<btSolverBody>& bodies,
BatchingMethod batchingMethod,
int minBatchSize,
int maxBatchSize,
btAlignedObjectArray<char>* scratchMemory
);
bool validate( btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies ) const;
};
#endif // BT_BATCHED_CONSTRAINTS_H

View File

@@ -1258,6 +1258,256 @@ void btSequentialImpulseConstraintSolver::convertContacts(btPersistentManifold**
} }
} }
void btSequentialImpulseConstraintSolver::convertJoint(btSolverConstraint* currentConstraintRow,
btTypedConstraint* constraint,
const btTypedConstraint::btConstraintInfo1& info1,
int solverBodyIdA,
int solverBodyIdB,
const btContactSolverInfo& infoGlobal
)
{
const btRigidBody& rbA = constraint->getRigidBodyA();
const btRigidBody& rbB = constraint->getRigidBodyB();
const btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
const btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
for (int j=0;j<info1.m_numConstraintRows;j++)
{
memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
currentConstraintRow[j].m_appliedImpulse = 0.f;
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
}
// these vectors are already cleared in initSolverBody, no need to redundantly clear again
btAssert(bodyAPtr->getDeltaLinearVelocity().isZero());
btAssert(bodyAPtr->getDeltaAngularVelocity().isZero());
btAssert(bodyAPtr->getPushVelocity().isZero());
btAssert(bodyAPtr->getTurnVelocity().isZero());
btAssert(bodyBPtr->getDeltaLinearVelocity().isZero());
btAssert(bodyBPtr->getDeltaAngularVelocity().isZero());
btAssert(bodyBPtr->getPushVelocity().isZero());
btAssert(bodyBPtr->getTurnVelocity().isZero());
//bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
btTypedConstraint::btConstraintInfo2 info2;
info2.fps = 1.f/infoGlobal.m_timeStep;
info2.erp = infoGlobal.m_erp;
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
///the size of btSolverConstraint needs be a multiple of btScalar
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
info2.m_constraintError = &currentConstraintRow->m_rhs;
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
info2.m_damping = infoGlobal.m_damping;
info2.cfm = &currentConstraintRow->m_cfm;
info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
info2.m_numIterations = infoGlobal.m_numIterations;
constraint->getInfo2(&info2);
///finalize the constraint setup
for (int j=0;j<info1.m_numConstraintRows;j++)
{
btSolverConstraint& solverConstraint = currentConstraintRow[j];
if (solverConstraint.m_upperLimit>=constraint->getBreakingImpulseThreshold())
{
solverConstraint.m_upperLimit = constraint->getBreakingImpulseThreshold();
}
if (solverConstraint.m_lowerLimit<=-constraint->getBreakingImpulseThreshold())
{
solverConstraint.m_lowerLimit = -constraint->getBreakingImpulseThreshold();
}
solverConstraint.m_originalContactPoint = constraint;
{
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
}
{
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
}
{
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
btScalar fsum = btFabs(sum);
btAssert(fsum > SIMD_EPSILON);
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
}
{
btScalar rel_vel;
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
rel_vel = vel1Dotn+vel2Dotn;
btScalar restitution = 0.f;
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
btScalar velocityError = restitution - rel_vel * info2.m_damping;
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
solverConstraint.m_appliedImpulse = 0.f;
}
}
}
void btSequentialImpulseConstraintSolver::convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("convertJoints");
for (int j=0;j<numConstraints;j++)
{
btTypedConstraint* constraint = constraints[j];
constraint->buildJacobian();
constraint->internalSetAppliedImpulse(0.0f);
}
int totalNumRows = 0;
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
//calculate the total number of contraint rows
for (int i=0;i<numConstraints;i++)
{
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
btJointFeedback* fb = constraints[i]->getJointFeedback();
if (fb)
{
fb->m_appliedForceBodyA.setZero();
fb->m_appliedTorqueBodyA.setZero();
fb->m_appliedForceBodyB.setZero();
fb->m_appliedTorqueBodyB.setZero();
}
if (constraints[i]->isEnabled())
{
constraints[i]->getInfo1(&info1);
} else
{
info1.m_numConstraintRows = 0;
info1.nub = 0;
}
totalNumRows += info1.m_numConstraintRows;
}
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
///setup the btSolverConstraints
int currentRow = 0;
for (int i=0;i<numConstraints;i++)
{
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
if (info1.m_numConstraintRows)
{
btAssert(currentRow<totalNumRows);
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
btTypedConstraint* constraint = constraints[i];
btRigidBody& rbA = constraint->getRigidBodyA();
btRigidBody& rbB = constraint->getRigidBodyB();
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
}
currentRow+=info1.m_numConstraintRows;
}
}
void btSequentialImpulseConstraintSolver::convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("convertBodies");
for (int i = 0; i < numBodies; i++)
{
bodies[i]->setCompanionId(-1);
}
#if BT_THREADSAFE
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
#endif // BT_THREADSAFE
m_tmpSolverBodyPool.reserve(numBodies+1);
m_tmpSolverBodyPool.resize(0);
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
//initSolverBody(&fixedBody,0);
for (int i=0;i<numBodies;i++)
{
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
btRigidBody* body = btRigidBody::upcast(bodies[i]);
if (body && body->getInvMass())
{
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
btVector3 gyroForce (0,0,0);
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
{
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
{
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
{
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
}
}
}
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
{ {
m_fixedBodyId = -1; m_fixedBodyId = -1;
@@ -1344,250 +1594,13 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
#endif //BT_ADDITIONAL_DEBUG #endif //BT_ADDITIONAL_DEBUG
for (int i = 0; i < numBodies; i++)
{
bodies[i]->setCompanionId(-1);
}
#if BT_THREADSAFE
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
#endif // BT_THREADSAFE
m_tmpSolverBodyPool.reserve(numBodies+1);
m_tmpSolverBodyPool.resize(0);
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
//initSolverBody(&fixedBody,0);
//convert all bodies //convert all bodies
convertBodies(bodies, numBodies, infoGlobal);
convertJoints(constraints, numConstraints, infoGlobal);
for (int i=0;i<numBodies;i++) convertContacts(manifoldPtr,numManifolds,infoGlobal);
{
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
btRigidBody* body = btRigidBody::upcast(bodies[i]);
if (body && body->getInvMass())
{
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
btVector3 gyroForce (0,0,0);
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
{
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
{
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
{
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
}
}
if (1)
{
int j;
for (j=0;j<numConstraints;j++)
{
btTypedConstraint* constraint = constraints[j];
constraint->buildJacobian();
constraint->internalSetAppliedImpulse(0.0f);
}
}
//btRigidBody* rb0=0,*rb1=0;
//if (1)
{
{
int totalNumRows = 0;
int i;
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
//calculate the total number of contraint rows
for (i=0;i<numConstraints;i++)
{
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
btJointFeedback* fb = constraints[i]->getJointFeedback();
if (fb)
{
fb->m_appliedForceBodyA.setZero();
fb->m_appliedTorqueBodyA.setZero();
fb->m_appliedForceBodyB.setZero();
fb->m_appliedTorqueBodyB.setZero();
}
if (constraints[i]->isEnabled())
{
constraints[i]->getInfo1(&info1);
} else
{
info1.m_numConstraintRows = 0;
info1.nub = 0;
}
totalNumRows += info1.m_numConstraintRows;
}
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
///setup the btSolverConstraints
int currentRow = 0;
for (i=0;i<numConstraints;i++)
{
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
if (info1.m_numConstraintRows)
{
btAssert(currentRow<totalNumRows);
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
btTypedConstraint* constraint = constraints[i];
btRigidBody& rbA = constraint->getRigidBodyA();
btRigidBody& rbB = constraint->getRigidBodyB();
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
int j;
for ( j=0;j<info1.m_numConstraintRows;j++)
{
memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
currentConstraintRow[j].m_appliedImpulse = 0.f;
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
}
bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
btTypedConstraint::btConstraintInfo2 info2;
info2.fps = 1.f/infoGlobal.m_timeStep;
info2.erp = infoGlobal.m_erp;
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
///the size of btSolverConstraint needs be a multiple of btScalar
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
info2.m_constraintError = &currentConstraintRow->m_rhs;
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
info2.m_damping = infoGlobal.m_damping;
info2.cfm = &currentConstraintRow->m_cfm;
info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
info2.m_numIterations = infoGlobal.m_numIterations;
constraints[i]->getInfo2(&info2);
///finalize the constraint setup
for ( j=0;j<info1.m_numConstraintRows;j++)
{
btSolverConstraint& solverConstraint = currentConstraintRow[j];
if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
}
if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
}
solverConstraint.m_originalContactPoint = constraint;
{
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
}
{
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
}
{
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
btScalar fsum = btFabs(sum);
btAssert(fsum > SIMD_EPSILON);
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
}
{
btScalar rel_vel;
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
rel_vel = vel1Dotn+vel2Dotn;
btScalar restitution = 0.f;
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
btScalar velocityError = restitution - rel_vel * info2.m_damping;
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
solverConstraint.m_appliedImpulse = 0.f;
}
}
}
currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
}
}
convertContacts(manifoldPtr,numManifolds,infoGlobal);
}
// btContactSolverInfo info = infoGlobal; // btContactSolverInfo info = infoGlobal;
@@ -1627,6 +1640,7 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/) btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/)
{ {
BT_PROFILE("solveSingleIteration");
btScalar leastSquaresResidual = 0.f; btScalar leastSquaresResidual = 0.f;
int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
@@ -1805,6 +1819,7 @@ btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration
void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
{ {
BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations");
int iteration; int iteration;
if (infoGlobal.m_splitImpulse) if (infoGlobal.m_splitImpulse)
{ {
@@ -1863,14 +1878,9 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(
return 0.f; return 0.f;
} }
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal) void btSequentialImpulseConstraintSolver::writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
{ {
int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); for (int j=iBegin; j<iEnd; j++)
int i,j;
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
{
for (j=0;j<numPoolConstraints;j++)
{ {
const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j]; const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint; btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
@@ -1886,10 +1896,11 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
} }
//do a callback here? //do a callback here?
} }
} }
numPoolConstraints = m_tmpSolverNonContactConstraintPool.size(); void btSequentialImpulseConstraintSolver::writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
for (j=0;j<numPoolConstraints;j++) {
for (int j=iBegin; j<iEnd; j++)
{ {
const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j]; const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint; btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
@@ -1909,10 +1920,12 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
constr->setEnabled(false); constr->setEnabled(false);
} }
} }
}
void btSequentialImpulseConstraintSolver::writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
for ( i=0;i<m_tmpSolverBodyPool.size();i++) {
for (int i=iBegin; i<iEnd; i++)
{ {
btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody; btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
if (body) if (body)
@@ -1936,6 +1949,19 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
m_tmpSolverBodyPool[i].m_originalBody->setCompanionId(-1); m_tmpSolverBodyPool[i].m_originalBody->setCompanionId(-1);
} }
} }
}
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("solveGroupCacheFriendlyFinish");
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
{
writeBackContacts(0, m_tmpSolverContactConstraintPool.size(), infoGlobal);
}
writeBackJoints(0, m_tmpSolverNonContactConstraintPool.size(), infoGlobal);
writeBackBodies(0, m_tmpSolverBodyPool.size(), infoGlobal);
m_tmpSolverContactConstraintPool.resizeNoInitialize(0); m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);

View File

@@ -95,6 +95,10 @@ protected:
void convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal); void convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
void convertJoint(btSolverConstraint* destConstraintRow, btTypedConstraint* srcConstraint, const btTypedConstraint::btConstraintInfo1& info1, int solverBodyIdA, int solverBodyIdB, const btContactSolverInfo& infoGlobal);
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal);
btSimdScalar resolveSplitPenetrationSIMD(btSolverBody& bodyA,btSolverBody& bodyB, const btSolverConstraint& contactConstraint) btSimdScalar resolveSplitPenetrationSIMD(btSolverBody& bodyA,btSolverBody& bodyB, const btSolverConstraint& contactConstraint)
{ {
@@ -121,7 +125,9 @@ protected:
protected: protected:
void writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer); virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal); virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal);
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer); virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
#define BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
#include "btSequentialImpulseConstraintSolver.h"
#include "btBatchedConstraints.h"
#include "LinearMath/btThreads.h"
///
/// btSequentialImpulseConstraintSolverMt
///
/// A multithreaded variant of the sequential impulse constraint solver. The constraints to be solved are grouped into
/// batches and phases where each batch of constraints within a given phase can be solved in parallel with the rest.
/// Ideally we want as few phases as possible, and each phase should have many batches, and all of the batches should
/// have about the same number of constraints.
/// This method works best on a large island of many constraints.
///
/// Supports all of the features of the normal sequential impulse solver such as:
/// - split penetration impulse
/// - rolling friction
/// - interleaving constraints
/// - warmstarting
/// - 2 friction directions
/// - randomized constraint ordering
/// - early termination when leastSquaresResidualThreshold is satisfied
///
/// When the SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS flag is enabled, unlike the normal SequentialImpulse solver,
/// the rolling friction is interleaved as well.
/// Interleaving the contact penetration constraints with friction reduces the number of parallel loops that need to be done,
/// which reduces threading overhead so it can be a performance win, however, it does seem to produce a less stable simulation,
/// at least on stacks of blocks.
///
/// When the SOLVER_RANDMIZE_ORDER flag is enabled, the ordering of phases, and the ordering of constraints within each batch
/// is randomized, however it does not swap constraints between batches.
/// This is to avoid regenerating the batches for each solver iteration which would be quite costly in performance.
///
/// Note that a non-zero leastSquaresResidualThreshold could possibly affect the determinism of the simulation
/// if the task scheduler's parallelSum operation is non-deterministic. The parallelSum operation can be non-deterministic
/// because floating point addition is not associative due to rounding errors.
/// The task scheduler can and should ensure that the result of any parallelSum operation is deterministic.
///
ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolver
{
public:
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
// temp struct used to collect info from persistent manifolds into a cache-friendly struct using multiple threads
struct btContactManifoldCachedInfo
{
static const int MAX_NUM_CONTACT_POINTS = 4;
int numTouchingContacts;
int solverBodyIds[ 2 ];
int contactIndex;
int rollingFrictionIndex;
bool contactHasRollingFriction[ MAX_NUM_CONTACT_POINTS ];
btManifoldPoint* contactPoints[ MAX_NUM_CONTACT_POINTS ];
};
// temp struct used for setting up joint constraints in parallel
struct JointParams
{
int m_solverConstraint;
int m_solverBodyA;
int m_solverBodyB;
};
void internalInitMultipleJoints(btTypedConstraint** constraints, int iBegin, int iEnd);
void internalConvertMultipleJoints( const btAlignedObjectArray<JointParams>& jointParamsArray, btTypedConstraint** constraints, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal );
// parameters to control batching
static bool s_allowNestedParallelForLoops; // whether to allow nested parallel operations
static int s_minimumContactManifoldsForBatching; // don't even try to batch if fewer manifolds than this
static btBatchedConstraints::BatchingMethod s_contactBatchingMethod;
static btBatchedConstraints::BatchingMethod s_jointBatchingMethod;
static int s_minBatchSize; // desired number of constraints per batch
static int s_maxBatchSize;
protected:
static const int CACHE_LINE_SIZE = 64;
btBatchedConstraints m_batchedContactConstraints;
btBatchedConstraints m_batchedJointConstraints;
int m_numFrictionDirections;
bool m_useBatching;
bool m_useObsoleteJointConstraints;
btAlignedObjectArray<btContactManifoldCachedInfo> m_manifoldCachedInfoArray;
btAlignedObjectArray<int> m_rollingFrictionIndexTable; // lookup table mapping contact index to rolling friction index
btSpinMutex m_bodySolverArrayMutex;
char m_antiFalseSharingPadding[CACHE_LINE_SIZE]; // padding to keep mutexes in separate cachelines
btSpinMutex m_kinematicBodyUniqueIdToSolverBodyTableMutex;
btAlignedObjectArray<char> m_scratchMemory;
virtual void randomizeConstraintOrdering( int iteration, int numIterations );
virtual btScalar resolveAllJointConstraints( int iteration );
virtual btScalar resolveAllContactConstraints();
virtual btScalar resolveAllContactFrictionConstraints();
virtual btScalar resolveAllContactConstraintsInterleaved();
virtual btScalar resolveAllRollingFrictionConstraints();
virtual void setupBatchedContactConstraints();
virtual void setupBatchedJointConstraints();
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
virtual void convertContacts(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
int getOrInitSolverBodyThreadsafe(btCollisionObject& body, btScalar timeStep);
void allocAllContactConstraints(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal);
void setupAllContactConstraints(const btContactSolverInfo& infoGlobal);
void randomizeBatchedConstraintOrdering( btBatchedConstraints* batchedConstraints );
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
btSequentialImpulseConstraintSolverMt();
virtual ~btSequentialImpulseConstraintSolverMt();
btScalar resolveMultipleJointConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd, int iteration );
btScalar resolveMultipleContactConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactSplitPenetrationImpulseConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactRollingFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactConstraintsInterleaved( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
void internalCollectContactManifoldCachedInfo(btContactManifoldCachedInfo* cachedInfoArray, btPersistentManifold** manifold, int numManifolds, const btContactSolverInfo& infoGlobal);
void internalAllocContactConstraints(const btContactManifoldCachedInfo* cachedInfoArray, int numManifolds);
void internalSetupContactConstraints(int iContact, const btContactSolverInfo& infoGlobal);
void internalConvertBodies(btCollisionObject** bodies, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
};
#endif //BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H

View File

@@ -325,3 +325,14 @@ void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
} }
} }
int btDiscreteDynamicsWorldMt::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep )
{
int numSubSteps = btDiscreteDynamicsWorld::stepSimulation(timeStep, maxSubSteps, fixedTimeStep);
if (btITaskScheduler* scheduler = btGetTaskScheduler())
{
// tell Bullet's threads to sleep, so other threads can run
scheduler->sleepWorkerThreadsHint();
}
return numSubSteps;
}

View File

@@ -129,6 +129,8 @@ public:
btCollisionConfiguration* collisionConfiguration btCollisionConfiguration* collisionConfiguration
); );
virtual ~btDiscreteDynamicsWorldMt(); virtual ~btDiscreteDynamicsWorldMt();
virtual int stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep ) BT_OVERRIDE;
}; };
#endif //BT_DISCRETE_DYNAMICS_WORLD_H #endif //BT_DISCRETE_DYNAMICS_WORLD_H

View File

@@ -22,6 +22,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h" #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h" #include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h" #include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h" // for s_minimumContactManifoldsForBatching
//#include <stdio.h> //#include <stdio.h>
#include "LinearMath/btQuickprof.h" #include "LinearMath/btQuickprof.h"
@@ -589,14 +590,52 @@ struct UpdateIslandDispatcher : public btIParallelForBody
} }
}; };
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback ) void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
{ {
BT_PROFILE( "parallelIslandDispatch" ); BT_PROFILE( "parallelIslandDispatch" );
int grainSize = 1; // iterations per task //
// if there are islands with many contacts, it may be faster to submit these
// large islands *serially* to a single parallel constraint solver, and then later
// submit the remaining smaller islands in parallel to multiple sequential solvers.
//
// Some task schedulers do not deal well with nested parallelFor loops. One implementation
// of OpenMP was actually slower than doing everything single-threaded. Intel TBB
// on the other hand, seems to do a pretty respectable job with it.
//
// When solving islands in parallel, the worst case performance happens when there
// is one very large island and then perhaps a smattering of very small
// islands -- one worker thread takes the large island and the remaining workers
// tear through the smaller islands and then sit idle waiting for the first worker
// to finish. Solving islands in parallel works best when there are numerous small
// islands, roughly equal in size.
//
// By contrast, the other approach -- the parallel constraint solver -- is only
// able to deliver a worthwhile speedup when the island is large. For smaller islands,
// it is difficult to extract a useful amount of parallelism -- the overhead of grouping
// the constraints into batches and sending the batches to worker threads can nullify
// any gains from parallelism.
//
UpdateIslandDispatcher dispatcher; UpdateIslandDispatcher dispatcher;
dispatcher.islandsPtr = islandsPtr; dispatcher.islandsPtr = islandsPtr;
dispatcher.callback = callback; dispatcher.callback = callback;
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher ); // We take advantage of the fact the islands are sorted in order of decreasing size
int iBegin = 0;
while (iBegin < islandsPtr->size())
{
btSimulationIslandManagerMt::Island* island = (*islandsPtr)[ iBegin ];
if (island->manifoldArray.size() < btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching)
{
// OK to submit the rest of the array in parallel
break;
}
++iBegin;
}
// serial dispatch for large islands (if any)
dispatcher.forLoop(0, iBegin);
// parallel dispatch for rest
btParallelFor( iBegin, islandsPtr->size(), 1, dispatcher );
} }

View File

@@ -106,5 +106,7 @@ public:
} }
}; };
extern int gLargeIslandManifoldCount;
#endif //BT_SIMULATION_ISLAND_MANAGER_H #endif //BT_SIMULATION_ISLAND_MANAGER_H

View File

@@ -14,6 +14,9 @@ SET(LinearMath_SRCS
btSerializer64.cpp btSerializer64.cpp
btThreads.cpp btThreads.cpp
btVector3.cpp btVector3.cpp
TaskScheduler/btTaskScheduler.cpp
TaskScheduler/btThreadSupportPosix.cpp
TaskScheduler/btThreadSupportWin32.cpp
) )
SET(LinearMath_HDRS SET(LinearMath_HDRS
@@ -44,6 +47,7 @@ SET(LinearMath_HDRS
btTransform.h btTransform.h
btTransformUtil.h btTransformUtil.h
btVector3.h btVector3.h
TaskScheduler/btThreadSupportInterface.h
) )
ADD_LIBRARY(LinearMath ${LinearMath_SRCS} ${LinearMath_HDRS}) ADD_LIBRARY(LinearMath ${LinearMath_SRCS} ${LinearMath_HDRS})

View File

@@ -0,0 +1,619 @@
#include "LinearMath/btMinMax.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btQuickprof.h"
#include <stdio.h>
#include <algorithm>
typedef void( *btThreadFunc )( void* userPtr, void* lsMemory );
typedef void* ( *btThreadLocalStorageFunc )();
#if BT_THREADSAFE
#include "btThreadSupportInterface.h"
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int getNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#elif defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getNumHardwareThreads()
{
// caps out at 32
SYSTEM_INFO info;
GetSystemInfo( &info );
return info.dwNumberOfProcessors;
}
#else
int getNumHardwareThreads()
{
return 0; // don't know
}
#endif
void btSpinPause()
{
#if defined( _WIN32 )
YieldProcessor();
#endif
}
struct WorkerThreadStatus
{
enum Type
{
kInvalid,
kWaitingForWork,
kWorking,
kSleeping,
};
};
struct IJob
{
virtual void executeJob(int threadId) = 0;
};
class ParallelForJob : public IJob
{
const btIParallelForBody* mBody;
int mBegin;
int mEnd;
public:
ParallelForJob( int iBegin, int iEnd, const btIParallelForBody& body )
{
mBody = &body;
mBegin = iBegin;
mEnd = iEnd;
}
virtual void executeJob(int threadId) BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
mBody->forLoop( mBegin, mEnd );
}
};
static const int kCacheLineSize = 64;
struct ThreadLocalSum
{
btScalar mSum;
char mCachePadding[ kCacheLineSize - sizeof( btScalar ) ];
};
class ParallelSumJob : public IJob
{
const btIParallelSumBody* mBody;
ThreadLocalSum* mSumArray;
int mBegin;
int mEnd;
public:
ParallelSumJob( int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalSum* sums )
{
mBody = &body;
mSumArray = sums;
mBegin = iBegin;
mEnd = iEnd;
}
virtual void executeJob( int threadId ) BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
btScalar val = mBody->sumLoop( mBegin, mEnd );
// by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision)
const float TRUNC_SCALE = float(1<<19);
val = floor(val*TRUNC_SCALE+0.5f)/TRUNC_SCALE; // truncate some bits
mSumArray[threadId].mSum += val;
}
};
struct JobContext
{
JobContext()
{
m_queueLock = NULL;
m_headIndex = 0;
m_tailIndex = 0;
m_workersShouldCheckQueue = false;
m_workersShouldSleep = false;
m_useSpinMutex = false;
m_coolDownTime = 1000; // 1000 microseconds
}
btCriticalSection* m_queueLock;
btSpinMutex m_mutex;
volatile bool m_workersShouldCheckQueue;
volatile bool m_workersShouldSleep;
btAlignedObjectArray<IJob*> m_jobQueue;
bool m_queueIsEmpty;
int m_tailIndex;
int m_headIndex;
bool m_useSpinMutex;
unsigned int m_coolDownTime;
btClock m_clock;
void lockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.lock();
}
else
{
m_queueLock->lock();
}
}
void unlockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.unlock();
}
else
{
m_queueLock->unlock();
}
}
void clearQueue()
{
lockQueue();
m_headIndex = 0;
m_tailIndex = 0;
m_queueIsEmpty = true;
unlockQueue();
m_jobQueue.resizeNoInitialize( 0 );
}
void submitJob( IJob* job )
{
m_jobQueue.push_back( job );
lockQueue();
m_tailIndex++;
m_queueIsEmpty = false;
unlockQueue();
}
IJob* consumeJob()
{
if ( m_queueIsEmpty )
{
// lock free path. even if this is taken erroneously it isn't harmful
return NULL;
}
IJob* job = NULL;
lockQueue();
if ( !m_queueIsEmpty )
{
job = m_jobQueue[ m_headIndex++ ];
if ( m_headIndex == m_tailIndex )
{
m_queueIsEmpty = true;
}
}
unlockQueue();
return job;
}
};
struct WorkerThreadLocalStorage
{
int threadId;
WorkerThreadStatus::Type status;
int numJobsFinished;
btSpinMutex m_mutex;
};
static void WorkerThreadFunc( void* userPtr, void* lsMemory )
{
BT_PROFILE( "WorkerThreadFunc" );
WorkerThreadLocalStorage* localStorage = (WorkerThreadLocalStorage*) lsMemory;
JobContext* jobContext = (JobContext*) userPtr;
bool shouldSleep = false;
while (! shouldSleep)
{
// do work
localStorage->m_mutex.lock();
while ( IJob* job = jobContext->consumeJob() )
{
localStorage->status = WorkerThreadStatus::kWorking;
job->executeJob( localStorage->threadId );
localStorage->numJobsFinished++;
}
localStorage->status = WorkerThreadStatus::kWaitingForWork;
localStorage->m_mutex.unlock();
unsigned long long int clockStart = jobContext->m_clock.getTimeMicroseconds();
// while queue is empty,
while (jobContext->m_queueIsEmpty)
{
// todo: spin wait a bit to avoid hammering the empty queue
btSpinPause();
if ( jobContext->m_workersShouldSleep )
{
shouldSleep = true;
break;
}
// if jobs are incoming,
if (jobContext->m_workersShouldCheckQueue)
{
clockStart = jobContext->m_clock.getTimeMicroseconds(); // reset clock
}
else
{
// if no jobs incoming and queue has been empty for the cooldown time, sleep
unsigned long long int timeElapsed = jobContext->m_clock.getTimeMicroseconds() - clockStart;
if (timeElapsed > jobContext->m_coolDownTime)
{
shouldSleep = true;
break;
}
}
}
}
// go idle
localStorage->m_mutex.lock();
localStorage->status = WorkerThreadStatus::kSleeping;
localStorage->m_mutex.unlock();
}
static void* WorkerThreadAllocFunc()
{
return new WorkerThreadLocalStorage;
}
class btTaskSchedulerDefault : public btITaskScheduler
{
JobContext m_jobContext;
btThreadSupportInterface* m_threadSupport;
btAlignedObjectArray<char> m_jobMem;
btAlignedObjectArray<char> m_threadLocalMem;
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
int m_numThreads;
int m_numWorkerThreads;
int m_maxNumThreads;
int m_numJobs;
public:
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
{
m_threadSupport = NULL;
}
virtual ~btTaskSchedulerDefault()
{
shutdown();
}
void init()
{
btThreadSupportInterface::ConstructionInfo constructionInfo( "TaskScheduler", WorkerThreadFunc, WorkerThreadAllocFunc );
m_threadSupport = btThreadSupportInterface::create( constructionInfo );
m_numWorkerThreads = m_threadSupport->getNumWorkerThreads();
m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1;
m_numThreads = m_maxNumThreads;
m_jobContext.m_queueLock = m_threadSupport->createCriticalSection();
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
storage->threadId = i + 1; // workers start at 1
storage->status = WorkerThreadStatus::kSleeping;
}
setWorkersActive( false ); // no work for them yet
setNumThreads( m_threadSupport->getCacheFriendlyNumThreads() );
}
virtual void shutdown()
{
setWorkersActive( false );
waitForWorkersToSleep();
m_threadSupport->deleteCriticalSection( m_jobContext.m_queueLock );
m_jobContext.m_queueLock = NULL;
delete m_threadSupport;
m_threadSupport = NULL;
}
void setWorkersActive( bool active )
{
m_jobContext.m_workersShouldCheckQueue = active;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return m_maxNumThreads;
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = btMax( btMin(numThreads, int(m_maxNumThreads)), 1 );
m_numWorkerThreads = m_numThreads - 1;
}
void waitJobs()
{
BT_PROFILE( "waitJobs" );
// have the main thread work until the job queue is empty
int numMainThreadJobsFinished = 0;
while ( IJob* job = m_jobContext.consumeJob() )
{
job->executeJob( 0 );
numMainThreadJobsFinished++;
}
// done with jobs for now, tell workers to rest
setWorkersActive( false );
unsigned long long int clockStart = m_jobContext.m_clock.getTimeMicroseconds();
// wait for workers to finish any jobs in progress
while ( true )
{
int numWorkerJobsFinished = 0;
for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
{
WorkerThreadLocalStorage* storage = static_cast<WorkerThreadLocalStorage*>( m_threadSupport->getThreadLocalMemory( iWorker ) );
storage->m_mutex.lock();
numWorkerJobsFinished += storage->numJobsFinished;
storage->m_mutex.unlock();
}
if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs)
{
break;
}
unsigned long long int timeElapsed = m_jobContext.m_clock.getTimeMicroseconds() - clockStart;
btAssert(timeElapsed < 1000);
if (timeElapsed > 100000)
{
break;
}
btSpinPause();
}
}
void wakeWorkers(int numWorkersToWake)
{
BT_PROFILE( "wakeWorkers" );
btAssert( m_jobContext.m_workersShouldCheckQueue );
int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads);
int numActiveWorkers = 0;
for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
{
// note this count of active workers is not necessarily totally reliable, because a worker thread could be
// just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare.
WorkerThreadLocalStorage* storage = static_cast<WorkerThreadLocalStorage*>( m_threadSupport->getThreadLocalMemory( iWorker ) );
if (storage->status != WorkerThreadStatus::kSleeping)
{
numActiveWorkers++;
}
}
for ( int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker )
{
WorkerThreadLocalStorage* storage = static_cast<WorkerThreadLocalStorage*>( m_threadSupport->getThreadLocalMemory( iWorker ) );
if (storage->status == WorkerThreadStatus::kSleeping)
{
m_threadSupport->runTask( iWorker, &m_jobContext );
numActiveWorkers++;
}
}
}
void waitForWorkersToSleep()
{
BT_PROFILE( "waitForWorkersToSleep" );
m_jobContext.m_workersShouldSleep = true;
m_threadSupport->waitForAllTasks();
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
WorkerThreadLocalStorage* storage = static_cast<WorkerThreadLocalStorage*>( m_threadSupport->getThreadLocalMemory(i) );
btAssert( storage );
btAssert( storage->status == WorkerThreadStatus::kSleeping );
}
}
virtual void sleepWorkerThreadsHint() BT_OVERRIDE
{
BT_PROFILE( "sleepWorkerThreadsHint" );
// hint the task scheduler that we may not be using these threads for a little while
m_jobContext.m_workersShouldSleep = true;
}
void prepareWorkerThreads()
{
for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
{
WorkerThreadLocalStorage* storage = static_cast<WorkerThreadLocalStorage*>( m_threadSupport->getThreadLocalMemory( iWorker ) );
storage->m_mutex.lock();
storage->numJobsFinished = 0;
storage->m_mutex.unlock();
}
m_jobContext.m_workersShouldSleep = false;
setWorkersActive( true );
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
typedef ParallelForJob JobType;
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
m_numJobs = jobCount;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
int jobSize = sizeof( JobType );
int jobBufSize = jobSize * jobCount;
// make sure we have enough memory allocated to store jobs
if ( jobBufSize > m_jobMem.size() )
{
m_jobMem.resize( jobBufSize );
}
// make sure job queue is big enough
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
{
m_jobContext.m_jobQueue.reserve( jobCount );
}
m_jobContext.clearQueue();
// prepare worker threads for incoming work
prepareWorkerThreads();
// submit all of the jobs
int iJob = 0;
JobType* jobs = reinterpret_cast<JobType*>( &m_jobMem[ 0 ] );
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
JobType& job = jobs[ iJob ];
new ( (void*) &job ) ParallelForJob( i, iE, body ); // placement new
m_jobContext.submitJob( &job );
iJob++;
}
wakeWorkers( jobCount - 1 );
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
}
else
{
BT_PROFILE( "parallelFor_mainThread" );
// just run on main thread
body.forLoop( iBegin, iEnd );
}
}
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
typedef ParallelSumJob JobType;
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
m_numJobs = jobCount;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
int jobSize = sizeof( JobType );
int jobBufSize = jobSize * jobCount;
// make sure we have enough memory allocated to store jobs
if ( jobBufSize > m_jobMem.size() )
{
m_jobMem.resize( jobBufSize );
}
// make sure job queue is big enough
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
{
m_jobContext.m_jobQueue.reserve( jobCount );
}
// make sure thread local area is big enough
int threadLocalSize = m_numThreads * sizeof( ThreadLocalSum );
if ( threadLocalSize > m_threadLocalMem.size() )
{
m_threadLocalMem.resize( threadLocalSize );
}
// initialize summation
ThreadLocalSum* threadLocalSum = reinterpret_cast<ThreadLocalSum*>( &m_threadLocalMem[ 0 ] );
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
{
threadLocalSum[ iThread ].mSum = btScalar( 0 );
}
m_jobContext.clearQueue();
// prepare worker threads for incoming work
prepareWorkerThreads();
// submit all of the jobs
int iJob = 0;
JobType* jobs = reinterpret_cast<JobType*>( &m_jobMem[ 0 ] );
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
JobType& job = jobs[ iJob ];
new ( (void*) &job ) ParallelSumJob( i, iE, body, threadLocalSum ); // placement new
m_jobContext.submitJob( &job );
iJob++;
}
wakeWorkers( jobCount - 1 );
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
// add up all the thread sums
btScalar sum = btScalar(0);
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
{
sum += threadLocalSum[ iThread ].mSum;
}
return sum;
}
else
{
BT_PROFILE( "parallelSum_mainThread" );
// just run on main thread
return body.sumLoop( iBegin, iEnd );
}
}
};
btITaskScheduler* btCreateDefaultTaskScheduler()
{
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
ts->init();
return ts;
}
#else // #if BT_THREADSAFE
btITaskScheduler* btCreateDefaultTaskScheduler()
{
return NULL;
}
#endif // #else // #if BT_THREADSAFE

View File

@@ -0,0 +1,75 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_THREAD_SUPPORT_INTERFACE_H
#define BT_THREAD_SUPPORT_INTERFACE_H
class btCriticalSection
{
public:
btCriticalSection() {}
virtual ~btCriticalSection() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};
class btThreadSupportInterface
{
public:
virtual ~btThreadSupportInterface() {}
virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
virtual void runTask( int threadIndex, void* userData ) = 0;
virtual void waitForAllTasks() = 0;
virtual btCriticalSection* createCriticalSection() = 0;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) = 0;
virtual void* getThreadLocalMemory( int taskId ) { return NULL; }
typedef void( *ThreadFunc )( void* userPtr, void* lsMemory );
typedef void* ( *MemorySetupFunc )( );
struct ConstructionInfo
{
ConstructionInfo( const char* uniqueName,
ThreadFunc userThreadFunc,
MemorySetupFunc lsMemoryFunc,
int threadStackSize = 65535
)
:m_uniqueName( uniqueName ),
m_userThreadFunc( userThreadFunc ),
m_lsMemoryFunc( lsMemoryFunc ),
m_threadStackSize( threadStackSize )
{
}
const char* m_uniqueName;
ThreadFunc m_userThreadFunc;
MemorySetupFunc m_lsMemoryFunc;
int m_threadStackSize;
};
static btThreadSupportInterface* create( const ConstructionInfo& info );
};
#endif //BT_THREAD_SUPPORT_INTERFACE_H

View File

@@ -0,0 +1,369 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#if BT_THREADSAFE && !defined( _WIN32 )
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btMinMax.h"
#include "btThreadSupportInterface.h"
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
#endif //_XOPEN_SOURCE
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h> //for sysconf
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int btGetNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#else
int btGetNumHardwareThreads()
{
return sysconf( _SC_NPROCESSORS_ONLN );
}
#endif
// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportPosix : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc
pthread_t thread;
//each tread will wait until this signal to start its work
sem_t* startSemaphore;
// this is a copy of m_mainSemaphore,
//each tread will signal once it is finished with its work
sem_t* m_mainSemaphore;
unsigned long threadUsed;
};
private:
typedef unsigned long long UINT64;
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
// m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
sem_t* m_mainSemaphore;
int m_numThreads;
UINT64 m_startedThreadsMask;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
public:
btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportPosix();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
// TODO: return the number of logical processors sharing the first L3 cache
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
virtual void* getThreadLocalMemory( int taskId ) BT_OVERRIDE
{
return m_activeThreadStatus[ taskId ].m_lsMemory;
}
};
#define checkPThreadFunction(returnValue) \
if(0 != returnValue) { \
printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
}
// The number of threads should be equal to the number of available cores
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
btThreadSupportPosix::btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo )
{
startThreads( threadConstructionInfo );
}
// cleanup/shutdown Libspe2
btThreadSupportPosix::~btThreadSupportPosix()
{
stopThreads();
}
#if (defined (__APPLE__))
#define NAMED_SEMAPHORES
#endif
static sem_t* createSem( const char* baseName )
{
static int semCount = 0;
#ifdef NAMED_SEMAPHORES
/// Named semaphore begin
char name[ 32 ];
snprintf( name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++ );
sem_t* tempSem = sem_open( name, O_CREAT, 0600, 0 );
if ( tempSem != reinterpret_cast<sem_t *>( SEM_FAILED ) )
{
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
}
else
{
//printf("Error creating Semaphore %d\n", errno);
exit( -1 );
}
/// Named semaphore end
#else
sem_t* tempSem = new sem_t;
checkPThreadFunction( sem_init( tempSem, 0, 0 ) );
#endif
return tempSem;
}
static void destroySem( sem_t* semaphore )
{
#ifdef NAMED_SEMAPHORES
checkPThreadFunction( sem_close( semaphore ) );
#else
checkPThreadFunction( sem_destroy( semaphore ) );
delete semaphore;
#endif
}
static void *threadFunction( void *argument )
{
btThreadSupportPosix::btThreadStatus* status = ( btThreadSupportPosix::btThreadStatus* )argument;
while ( 1 )
{
checkPThreadFunction( sem_wait( status->startSemaphore ) );
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr, status->m_lsMemory );
status->m_status = 2;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
status->threadUsed++;
}
else
{
//exit Thread
status->m_status = 3;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
printf( "Thread with taskId %i exiting\n", status->m_taskId );
break;
}
}
printf( "Thread TERMINATED\n" );
}
///send messages to SPUs
void btThreadSupportPosix::runTask( int threadIndex, void* userData )
{
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( threadIndex >= 0 );
btAssert( threadIndex < m_activeThreadStatus.size() );
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadsMask |= UINT64( 1 ) << threadIndex;
// fire event to start new task
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
}
///check for messages from SPUs
int btThreadSupportPosix::waitForResponse()
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert( m_activeThreadStatus.size() );
// wait for any of the threads to finish
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
// get at least one thread which has finished
size_t last = -1;
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
if ( 2 == m_activeThreadStatus[ t ].m_status )
{
last = t;
break;
}
}
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
// need to find an active spu
btAssert( last >= 0 );
m_startedThreadsMask &= ~( UINT64( 1 ) << last );
return last;
}
void btThreadSupportPosix::waitForAllTasks()
{
while ( m_startedThreadsMask )
{
waitForResponse();
}
}
void btThreadSupportPosix::startThreads( const ConstructionInfo& threadConstructionInfo )
{
m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
printf( "%s creating %i threads.\n", __FUNCTION__, m_numThreads );
m_activeThreadStatus.resize( m_numThreads );
m_startedThreadsMask = 0;
m_mainSemaphore = createSem( "main" );
//checkPThreadFunction(sem_wait(mainSemaphore));
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
threadStatus.startSemaphore = createSem( "threadLocal" );
checkPThreadFunction( pthread_create( &threadStatus.thread, NULL, &threadFunction, (void*) &threadStatus ) );
threadStatus.m_userPtr = 0;
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_mainSemaphore = m_mainSemaphore;
threadStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
threadStatus.threadUsed = 0;
printf( "started thread %d \n", i );
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportPosix::stopThreads()
{
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ t ];
printf( "%s: Thread %i used: %ld\n", __FUNCTION__, int( t ), threadStatus.threadUsed );
threadStatus.m_userPtr = 0;
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
printf( "destroy semaphore\n" );
destroySem( threadStatus.startSemaphore );
printf( "semaphore destroyed\n" );
checkPThreadFunction( pthread_join( threadStatus.thread, 0 ) );
}
printf( "destroy main semaphore\n" );
destroySem( m_mainSemaphore );
printf( "main semaphore destroyed\n" );
m_activeThreadStatus.clear();
}
class btCriticalSectionPosix : public btCriticalSection
{
pthread_mutex_t m_mutex;
public:
btCriticalSectionPosix()
{
pthread_mutex_init( &m_mutex, NULL );
}
virtual ~btCriticalSectionPosix()
{
pthread_mutex_destroy( &m_mutex );
}
virtual void lock()
{
pthread_mutex_lock( &m_mutex );
}
virtual void unlock()
{
pthread_mutex_unlock( &m_mutex );
}
};
btCriticalSection* btThreadSupportPosix::createCriticalSection()
{
return new btCriticalSectionPosix();
}
void btThreadSupportPosix::deleteCriticalSection( btCriticalSection* cs )
{
delete cs;
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
{
return new btThreadSupportPosix( info );
}
#endif // BT_THREADSAFE && !defined( _WIN32 )

View File

@@ -0,0 +1,480 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#if defined( _WIN32 ) && BT_THREADSAFE
#include "LinearMath/btScalar.h"
#include "LinearMath/btMinMax.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "btThreadSupportInterface.h"
#include <windows.h>
#include <stdio.h>
struct btProcessorInfo
{
int numLogicalProcessors;
int numCores;
int numNumaNodes;
int numL1Cache;
int numL2Cache;
int numL3Cache;
int numPhysicalPackages;
static const int maxNumTeamMasks = 32;
int numTeamMasks;
UINT64 processorTeamMasks[ maxNumTeamMasks ];
};
UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return procInfo.processorTeamMasks[ i ];
}
}
return 0;
}
int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return i;
}
}
return -1;
}
int countSetBits( ULONG64 bits )
{
int count = 0;
while ( bits )
{
if ( bits & 1 )
{
count++;
}
bits >>= 1;
}
return count;
}
typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
void getProcessorInformation( btProcessorInfo* procInfo )
{
memset( procInfo, 0, sizeof( *procInfo ) );
Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
(Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
if ( getLogicalProcInfo == NULL )
{
// no info
return;
}
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
DWORD bufSize = 0;
while ( true )
{
if ( getLogicalProcInfo( buf, &bufSize ) )
{
break;
}
else
{
if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
{
if ( buf )
{
free( buf );
}
buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
}
}
}
int len = bufSize / sizeof( *buf );
for ( int i = 0; i < len; ++i )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
switch ( info->Relationship )
{
case RelationNumaNode:
procInfo->numNumaNodes++;
break;
case RelationProcessorCore:
procInfo->numCores++;
procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
break;
case RelationCache:
if ( info->Cache.Level == 1 )
{
procInfo->numL1Cache++;
}
else if ( info->Cache.Level == 2 )
{
procInfo->numL2Cache++;
}
else if ( info->Cache.Level == 3 )
{
procInfo->numL3Cache++;
// processors that share L3 cache are considered to be on the same team
// because they can more easily work together on the same data.
// Large performance penalties will occur if 2 or more threads from different
// teams attempt to frequently read and modify the same cache lines.
//
// On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
// 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
// CCXs are operating on the same data, many cycles will be spent keeping the
// two caches coherent.
if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
{
procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
procInfo->numTeamMasks++;
}
}
break;
case RelationProcessorPackage:
procInfo->numPhysicalPackages++;
break;
}
}
free( buf );
}
///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportWin32 : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
void* m_eventStartHandle;
char m_eventStartHandleName[ 32 ];
void* m_eventCompleteHandle;
char m_eventCompleteHandleName[ 32 ];
};
private:
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
btAlignedObjectArray<void*> m_completeHandles;
int m_numThreads;
DWORD_PTR m_startedThreadMask;
btProcessorInfo m_processorInfo;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
public:
btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportWin32();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual void* getThreadLocalMemory( int taskId ) BT_OVERRIDE
{
return m_activeThreadStatus[ taskId ].m_lsMemory;
}
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
};
btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
{
startThreads( threadConstructionInfo );
}
btThreadSupportWin32::~btThreadSupportWin32()
{
stopThreads();
}
DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
{
btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
while ( 1 )
{
WaitForSingleObject( status->m_eventStartHandle, INFINITE );
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr, status->m_lsMemory );
status->m_status = 2;
SetEvent( status->m_eventCompleteHandle );
}
else
{
//exit Thread
status->m_status = 3;
printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
SetEvent( status->m_eventCompleteHandle );
break;
}
}
printf( "Thread TERMINATED\n" );
return 0;
}
void btThreadSupportWin32::runTask( int threadIndex, void* userData )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( taskId >= 0 );
btAssert( int( taskId ) < m_activeThreadStatus.size() );
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
///fire event to start new task
SetEvent( threadStatus.m_eventStartHandle );
}
int btThreadSupportWin32::waitForResponse()
{
btAssert( m_activeThreadStatus.size() );
int last = -1;
DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
btAssert( res != WAIT_FAILED );
last = res - WAIT_OBJECT_0;
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btAssert( threadStatus.m_threadHandle );
btAssert( threadStatus.m_eventCompleteHandle );
//WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
///need to find an active spu
btAssert( last >= 0 );
m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
return last;
}
void btThreadSupportWin32::waitForAllTasks()
{
while ( m_startedThreadMask )
{
waitForResponse();
}
}
void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
{
static int uniqueId = 0;
uniqueId++;
btProcessorInfo& procInfo = m_processorInfo;
getProcessorInformation( &procInfo );
DWORD_PTR dwProcessAffinityMask = 0;
DWORD_PTR dwSystemAffinityMask = 0;
if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
{
dwProcessAffinityMask = 0;
}
///The number of threads should be equal to the number of available cores - 1
m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
m_activeThreadStatus.resize( m_numThreads );
m_completeHandles.resize( m_numThreads );
m_startedThreadMask = 0;
// set main thread affinity
if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
{
SetThreadAffinityMask( GetCurrentThread(), mask );
SetThreadIdealProcessor( GetCurrentThread(), 0 );
}
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
LPVOID lpParameter = &threadStatus;
DWORD dwCreationFlags = 0;
LPDWORD lpThreadId = 0;
threadStatus.m_userPtr = 0;
sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
//SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
// highest priority -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
//SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
{
int processorId = i + 1; // leave processor 0 for main thread
DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
if ( teamMask )
{
// bind each thread to only execute on processors of it's assigned team
// - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
// - for multi-socket Intel this will keep threads from migrating from one socket to another
// - for AMD Ryzen this will keep threads from migrating from one CCX to another
DWORD_PTR mask = teamMask & dwProcessAffinityMask;
if ( mask )
{
SetThreadAffinityMask( handle, mask );
}
}
SetThreadIdealProcessor( handle, processorId );
}
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_threadHandle = handle;
threadStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportWin32::stopThreads()
{
for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
if ( threadStatus.m_status > 0 )
{
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
}
delete threadStatus.m_lsMemory;
threadStatus.m_userPtr = 0;
SetEvent( threadStatus.m_eventStartHandle );
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
CloseHandle( threadStatus.m_eventCompleteHandle );
CloseHandle( threadStatus.m_eventStartHandle );
CloseHandle( threadStatus.m_threadHandle );
}
m_activeThreadStatus.clear();
m_completeHandles.clear();
}
class btWin32CriticalSection : public btCriticalSection
{
private:
CRITICAL_SECTION mCriticalSection;
public:
btWin32CriticalSection()
{
InitializeCriticalSection( &mCriticalSection );
}
~btWin32CriticalSection()
{
DeleteCriticalSection( &mCriticalSection );
}
void lock()
{
EnterCriticalSection( &mCriticalSection );
}
void unlock()
{
LeaveCriticalSection( &mCriticalSection );
}
};
btCriticalSection* btThreadSupportWin32::createCriticalSection()
{
unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
return cs;
}
void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
{
criticalSection->~btCriticalSection();
btAlignedFree( criticalSection );
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
{
return new btThreadSupportWin32( info );
}
#endif //defined(_WIN32) && BT_THREADSAFE

View File

@@ -453,6 +453,33 @@ void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBod
#endif// #if BT_THREADSAFE #endif// #if BT_THREADSAFE
} }
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
{
#if BT_THREADSAFE
#if BT_DETECT_BAD_THREAD_INDEX
if ( !btThreadsAreRunning() )
{
// clear out thread ids
for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
{
gDebugThreadIds[ i ] = kInvalidThreadId;
}
}
#endif // #if BT_DETECT_BAD_THREAD_INDEX
btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
#else // #if BT_THREADSAFE
// non-parallel version of btParallelSum
btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
return body.sumLoop( iBegin, iEnd );
#endif //#else // #if BT_THREADSAFE
}
/// ///
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler /// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
@@ -470,6 +497,11 @@ public:
BT_PROFILE( "parallelFor_sequential" ); BT_PROFILE( "parallelFor_sequential" );
body.forLoop( iBegin, iEnd ); body.forLoop( iBegin, iEnd );
} }
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_sequential" );
return body.sumLoop( iBegin, iEnd );
}
}; };
@@ -514,11 +546,25 @@ public:
#pragma omp parallel for schedule( static, 1 ) #pragma omp parallel for schedule( static, 1 )
for ( int i = iBegin; i < iEnd; i += grainSize ) for ( int i = iBegin; i < iEnd; i += grainSize )
{ {
BT_PROFILE( "OpenMP_job" ); BT_PROFILE( "OpenMP_forJob" );
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) ); body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
} }
btPopThreadsAreRunning(); btPopThreadsAreRunning();
} }
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_OpenMP" );
btPushThreadsAreRunning();
btScalar sum = btScalar( 0 );
#pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
for ( int i = iBegin; i < iEnd; i += grainSize )
{
BT_PROFILE( "OpenMP_sumJob" );
sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
}
btPopThreadsAreRunning();
return sum;
}
}; };
#endif // #if BT_USE_OPENMP && BT_THREADSAFE #endif // #if BT_USE_OPENMP && BT_THREADSAFE
@@ -571,22 +617,21 @@ public:
btResetThreadIndexCounter(); btResetThreadIndexCounter();
} }
} }
struct BodyAdapter struct ForBodyAdapter
{ {
const btIParallelForBody* mBody; const btIParallelForBody* mBody;
ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
void operator()( const tbb::blocked_range<int>& range ) const void operator()( const tbb::blocked_range<int>& range ) const
{ {
BT_PROFILE( "TBB_job" ); BT_PROFILE( "TBB_forJob" );
mBody->forLoop( range.begin(), range.end() ); mBody->forLoop( range.begin(), range.end() );
} }
}; };
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{ {
BT_PROFILE( "parallelFor_TBB" ); BT_PROFILE( "parallelFor_TBB" );
// TBB dispatch ForBodyAdapter tbbBody( &body );
BodyAdapter tbbBody;
tbbBody.mBody = &body;
btPushThreadsAreRunning(); btPushThreadsAreRunning();
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody, tbbBody,
@@ -594,6 +639,29 @@ public:
); );
btPopThreadsAreRunning(); btPopThreadsAreRunning();
} }
struct SumBodyAdapter
{
const btIParallelSumBody* mBody;
btScalar mSum;
SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
void operator()( const tbb::blocked_range<int>& range )
{
BT_PROFILE( "TBB_sumJob" );
mSum += mBody->sumLoop( range.begin(), range.end() );
}
};
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_TBB" );
SumBodyAdapter tbbBody( &body );
btPushThreadsAreRunning();
tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
btPopThreadsAreRunning();
return tbbBody.mSum;
}
}; };
#endif // #if BT_USE_TBB && BT_THREADSAFE #endif // #if BT_USE_TBB && BT_THREADSAFE
@@ -605,6 +673,7 @@ public:
class btTaskSchedulerPPL : public btITaskScheduler class btTaskSchedulerPPL : public btITaskScheduler
{ {
int m_numThreads; int m_numThreads;
concurrency::combinable<btScalar> m_sum; // for parallelSum
public: public:
btTaskSchedulerPPL() : btITaskScheduler( "PPL" ) btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
{ {
@@ -644,15 +713,16 @@ public:
btResetThreadIndexCounter(); btResetThreadIndexCounter();
} }
} }
struct BodyAdapter struct ForBodyAdapter
{ {
const btIParallelForBody* mBody; const btIParallelForBody* mBody;
int mGrainSize; int mGrainSize;
int mIndexEnd; int mIndexEnd;
ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
void operator()( int i ) const void operator()( int i ) const
{ {
BT_PROFILE( "PPL_job" ); BT_PROFILE( "PPL_forJob" );
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) ); mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
} }
}; };
@@ -660,10 +730,7 @@ public:
{ {
BT_PROFILE( "parallelFor_PPL" ); BT_PROFILE( "parallelFor_PPL" );
// PPL dispatch // PPL dispatch
BodyAdapter pplBody; ForBodyAdapter pplBody( &body, grainSize, iEnd );
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
btPushThreadsAreRunning(); btPushThreadsAreRunning();
// note: MSVC 2010 doesn't support partitioner args, so avoid them // note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin, concurrency::parallel_for( iBegin,
@@ -673,6 +740,36 @@ public:
); );
btPopThreadsAreRunning(); btPopThreadsAreRunning();
} }
struct SumBodyAdapter
{
const btIParallelSumBody* mBody;
concurrency::combinable<btScalar>* mSum;
int mGrainSize;
int mIndexEnd;
SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
void operator()( int i ) const
{
BT_PROFILE( "PPL_sumJob" );
mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
}
};
static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_PPL" );
m_sum.clear();
SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
btPushThreadsAreRunning();
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
btPopThreadsAreRunning();
return m_sum.combine( sumFunc );
}
}; };
#endif // #if BT_USE_PPL && BT_THREADSAFE #endif // #if BT_USE_PPL && BT_THREADSAFE

View File

@@ -107,6 +107,17 @@ public:
virtual void forLoop( int iBegin, int iEnd ) const = 0; virtual void forLoop( int iBegin, int iEnd ) const = 0;
}; };
//
// btIParallelSumBody -- subclass this to express work that can be done in parallel
// and produces a sum over all loop elements
//
class btIParallelSumBody
{
public:
virtual ~btIParallelSumBody() {}
virtual btScalar sumLoop( int iBegin, int iEnd ) const = 0;
};
// //
// btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to // btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to
// worker threads // worker threads
@@ -122,6 +133,8 @@ public:
virtual int getNumThreads() const = 0; virtual int getNumThreads() const = 0;
virtual void setNumThreads( int numThreads ) = 0; virtual void setNumThreads( int numThreads ) = 0;
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0; virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0;
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) = 0;
virtual void sleepWorkerThreadsHint() {} // hint the task scheduler that we may not be using these threads for a little while
// internal use only // internal use only
virtual void activate(); virtual void activate();
@@ -143,6 +156,9 @@ btITaskScheduler* btGetTaskScheduler();
// get non-threaded task scheduler (always available) // get non-threaded task scheduler (always available)
btITaskScheduler* btGetSequentialTaskScheduler(); btITaskScheduler* btGetSequentialTaskScheduler();
// create a default task scheduler (Win32 or pthreads based)
btITaskScheduler* btCreateDefaultTaskScheduler();
// get OpenMP task scheduler (if available, otherwise returns null) // get OpenMP task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetOpenMPTaskScheduler(); btITaskScheduler* btGetOpenMPTaskScheduler();
@@ -156,5 +172,9 @@ btITaskScheduler* btGetPPLTaskScheduler();
// (iterations may be done out of order, so no dependencies are allowed) // (iterations may be done out of order, so no dependencies are allowed)
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ); void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body );
// btParallelSum -- call this to dispatch work like a for-loop, returns the sum of all iterations
// (iterations may be done out of order, so no dependencies are allowed)
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body );
#endif #endif