Merge pull request #1579 from lunkhound/parallel-solver-wip3

Multithreaded constraint solver
This commit is contained in:
erwincoumans
2018-03-26 17:11:05 -07:00
committed by GitHub
28 changed files with 5520 additions and 908 deletions

View File

@@ -28,14 +28,14 @@ OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON)
OPTION(BUILD_SHARED_LIBS "Use shared libraries" OFF)
OPTION(USE_SOFT_BODY_MULTI_BODY_DYNAMICS_WORLD "Use btSoftMultiBodyDynamicsWorld" ON)
OPTION(BULLET2_USE_THREAD_LOCKS "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF)
IF (BULLET2_USE_THREAD_LOCKS)
OPTION(BULLET2_MULTITHREADING "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF)
IF (BULLET2_MULTITHREADING)
OPTION(BULLET2_USE_OPEN_MP_MULTITHREADING "Build Bullet 2 with support for multi-threading with OpenMP (requires a compiler with OpenMP support)" OFF)
OPTION(BULLET2_USE_TBB_MULTITHREADING "Build Bullet 2 with support for multi-threading with Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF)
IF (MSVC)
OPTION(BULLET2_USE_PPL_MULTITHREADING "Build Bullet 2 with support for multi-threading with Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF)
ENDIF (MSVC)
ENDIF (BULLET2_USE_THREAD_LOCKS)
ENDIF (BULLET2_MULTITHREADING)
IF(NOT WIN32)
@@ -225,12 +225,15 @@ IF(USE_GRAPHICAL_BENCHMARK)
ADD_DEFINITIONS( -DUSE_GRAPHICAL_BENCHMARK)
ENDIF (USE_GRAPHICAL_BENCHMARK)
IF(BULLET2_USE_THREAD_LOCKS)
IF(BULLET2_MULTITHREADING)
ADD_DEFINITIONS( -DBT_THREADSAFE=1 )
IF (NOT MSVC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
ENDIF (NOT MSVC)
ENDIF (BULLET2_USE_THREAD_LOCKS)
IF (NOT WIN32)
LINK_LIBRARIES( pthread )
ENDIF (NOT WIN32)
ENDIF (BULLET2_MULTITHREADING)
IF (BULLET2_USE_OPEN_MP_MULTITHREADING)
ADD_DEFINITIONS("-DBT_USE_OPENMP=1")

View File

@@ -182,6 +182,14 @@ end
trigger = "audio",
description = "Enable audio"
}
newoption
{
trigger = "enable_multithreading",
description = "enable CPU multithreading for bullet2 libs"
}
if _OPTIONS["enable_multithreading"] then
defines {"BT_THREADSAFE=1"}
end
if _OPTIONS["double"] then
defines {"BT_USE_DOUBLE_PRECISION"}
end

View File

@@ -226,7 +226,6 @@ SET(BulletExampleBrowser_SRCS
../MultiThreading/b3PosixThreadSupport.cpp
../MultiThreading/b3Win32ThreadSupport.cpp
../MultiThreading/b3ThreadSupportInterface.cpp
../MultiThreading/btTaskScheduler.cpp
../RenderingExamples/TinyRendererSetup.cpp
../RenderingExamples/TimeSeriesCanvas.cpp
../RenderingExamples/TimeSeriesCanvas.h

View File

@@ -29,17 +29,17 @@ class btCollisionShape;
#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h"
#include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands()
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
#include "BulletDynamics/ConstraintSolver/btNNCGConstraintSolver.h"
#include "BulletDynamics/MLCPSolvers/btMLCPSolver.h"
#include "BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h"
#include "BulletDynamics/MLCPSolvers/btDantzigSolver.h"
#include "BulletDynamics/MLCPSolvers/btLemkeSolver.h"
#include "../MultiThreading/btTaskScheduler.h"
static int gNumIslands = 0;
bool gAllowNestedParallelForLoops = false;
class Profiler
{
@@ -52,6 +52,10 @@ public:
kRecordPredictUnconstrainedMotion,
kRecordCreatePredictiveContacts,
kRecordIntegrateTransforms,
kRecordSolverTotal,
kRecordSolverSetup,
kRecordSolverIterations,
kRecordSolverFinish,
kRecordCount
};
@@ -139,6 +143,41 @@ static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep )
}
class MySequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolverMt
{
typedef btSequentialImpulseConstraintSolverMt ParentClass;
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
MySequentialImpulseConstraintSolverMt() {}
// for profiling
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverSetup);
btScalar ret = ParentClass::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyIterations( btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer ) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverIterations);
btScalar ret = ParentClass::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
return ret;
}
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverFinish);
btScalar ret = ParentClass::solveGroupCacheFriendlyFinish(bodies, numBodies, infoGlobal);
return ret;
}
virtual btScalar solveGroup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) BT_OVERRIDE
{
ProfileHelper prof(Profiler::kRecordSolverTotal);
btScalar ret = ParentClass::solveGroup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher);
return ret;
}
};
///
/// MyCollisionDispatcher -- subclassed for profiling purposes
///
@@ -161,11 +200,11 @@ public:
///
/// myParallelIslandDispatch -- wrap default parallel dispatch for profiling and to get the number of simulation islands
//
void myParallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback )
void myParallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, const btSimulationIslandManagerMt::SolverParams& solverParams)
{
ProfileHelper prof( Profiler::kRecordDispatchIslands );
gNumIslands = islandsPtr->size();
btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, callback );
btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, solverParams );
}
@@ -200,9 +239,10 @@ public:
MyDiscreteDynamicsWorld( btDispatcher* dispatcher,
btBroadphaseInterface* pairCache,
btConstraintSolverPoolMt* constraintSolver,
btSequentialImpulseConstraintSolverMt* constraintSolverMt,
btCollisionConfiguration* collisionConfiguration
) :
btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, collisionConfiguration )
btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, constraintSolverMt, collisionConfiguration )
{
btSimulationIslandManagerMt* islandMgr = static_cast<btSimulationIslandManagerMt*>( m_islandManager );
islandMgr->setIslandDispatchFunction( myParallelIslandDispatch );
@@ -218,6 +258,8 @@ btConstraintSolver* createSolverByType( SolverType t )
{
case SOLVER_TYPE_SEQUENTIAL_IMPULSE:
return new btSequentialImpulseConstraintSolver();
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT:
return new MySequentialImpulseConstraintSolverMt();
case SOLVER_TYPE_NNCG:
return new btNNCGConstraintSolver();
case SOLVER_TYPE_MLCP_PGS:
@@ -253,7 +295,7 @@ public:
{
addTaskScheduler( btGetSequentialTaskScheduler() );
#if BT_THREADSAFE
if ( btITaskScheduler* ts = createDefaultTaskScheduler() )
if ( btITaskScheduler* ts = btCreateDefaultTaskScheduler() )
{
m_allocatedTaskSchedulers.push_back( ts );
addTaskScheduler( ts );
@@ -306,11 +348,12 @@ static btTaskSchedulerManager gTaskSchedulerMgr;
#if BT_THREADSAFE
static bool gMultithreadedWorld = true;
static bool gDisplayProfileInfo = true;
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT;
#else
static bool gMultithreadedWorld = false;
static bool gDisplayProfileInfo = false;
#endif
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
#endif
static int gSolverMode = SOLVER_SIMD |
SOLVER_USE_WARMSTARTING |
// SOLVER_RANDMIZE_ORDER |
@@ -318,9 +361,11 @@ static int gSolverMode = SOLVER_SIMD |
// SOLVER_USE_2_FRICTION_DIRECTIONS |
0;
static btScalar gSliderSolverIterations = 10.0f; // should be int
static btScalar gSliderNumThreads = 1.0f; // should be int
static btScalar gSliderIslandBatchingThreshold = 0.0f; // should be int
static btScalar gSliderMinBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_minBatchSize); // should be int
static btScalar gSliderMaxBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_maxBatchSize); // should be int
static btScalar gSliderLeastSquaresResidualThreshold = 0.0f;
////////////////////////////////////
CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper )
@@ -419,6 +464,23 @@ void setTaskSchedulerComboBoxCallback(int combobox, const char* item, void* user
}
void setBatchingMethodComboBoxCallback(int combobox, const char* item, void* userPointer)
{
#if BT_THREADSAFE
const char** items = static_cast<const char**>( userPointer );
for ( int i = 0; i < btBatchedConstraints::BATCHING_METHOD_COUNT; ++i )
{
if ( strcmp( item, items[ i ] ) == 0 )
{
// change the task scheduler
btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod = static_cast<btBatchedConstraints::BatchingMethod>( i );
break;
}
}
#endif // #if BT_THREADSAFE
}
static void setThreadCountCallback(float val, void* userPtr)
{
#if BT_THREADSAFE
@@ -435,13 +497,43 @@ static void setSolverIterationCountCallback(float val, void* userPtr)
}
}
static void setLargeIslandManifoldCountCallback( float val, void* userPtr )
{
btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching = int( gSliderIslandBatchingThreshold );
}
static void setMinBatchSizeCallback( float val, void* userPtr )
{
gSliderMaxBatchSize = (std::max)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setMaxBatchSizeCallback( float val, void* userPtr )
{
gSliderMinBatchSize = (std::min)(gSliderMinBatchSize, gSliderMaxBatchSize);
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
}
static void setLeastSquaresResidualThresholdCallback( float val, void* userPtr )
{
if (btDiscreteDynamicsWorld* world = reinterpret_cast<btDiscreteDynamicsWorld*>(userPtr))
{
world->getSolverInfo().m_leastSquaresResidualThreshold = gSliderLeastSquaresResidualThreshold;
}
}
void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
{
gNumIslands = 0;
m_solverType = gSolverType;
#if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB)
#if BT_THREADSAFE
btAssert( btGetTaskScheduler() != NULL );
m_multithreadCapable = true;
if (NULL != btGetTaskScheduler() && gTaskSchedulerMgr.getNumTaskSchedulers() > 1)
{
m_multithreadCapable = true;
}
#endif
if ( gMultithreadedWorld )
{
@@ -457,16 +549,28 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
btConstraintSolverPoolMt* solverPool;
{
SolverType poolSolverType = m_solverType;
if (poolSolverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT)
{
// pool solvers shouldn't be parallel solvers, we don't allow that kind of
// nested parallelism because of performance issues
poolSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
}
btConstraintSolver* solvers[ BT_MAX_THREAD_COUNT ];
int maxThreadCount = BT_MAX_THREAD_COUNT;
for ( int i = 0; i < maxThreadCount; ++i )
{
solvers[ i ] = createSolverByType( m_solverType );
solvers[ i ] = createSolverByType( poolSolverType );
}
solverPool = new btConstraintSolverPoolMt( solvers, maxThreadCount );
m_solver = solverPool;
}
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, m_collisionConfiguration );
btSequentialImpulseConstraintSolverMt* solverMt = NULL;
if ( m_solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
{
solverMt = new MySequentialImpulseConstraintSolverMt();
}
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, solverMt, m_collisionConfiguration );
m_dynamicsWorld = world;
m_multithreadedWorld = true;
btAssert( btGetTaskScheduler() != NULL );
@@ -486,7 +590,14 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_broadphase = new btDbvtBroadphase();
m_solver = createSolverByType( m_solverType );
SolverType solverType = m_solverType;
if ( solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
{
// using the parallel solver with the single-threaded world works, but is
// disabled here to avoid confusion
solverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
}
m_solver = createSolverByType( solverType );
m_dynamicsWorld = new btDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration );
}
@@ -494,6 +605,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
m_dynamicsWorld->setInternalTickCallback( profileEndCallback, NULL, false );
m_dynamicsWorld->setGravity( btVector3( 0, -10, 0 ) );
m_dynamicsWorld->getSolverInfo().m_solverMode = gSolverMode;
m_dynamicsWorld->getSolverInfo().m_numIterations = btMax(1, int(gSliderSolverIterations));
createDefaultParameters();
}
@@ -504,16 +616,18 @@ void CommonRigidBodyMTBase::createDefaultParameters()
{
// create a button to toggle multithreaded world
ButtonParams button( "Multithreaded world enable", 0, true );
button.m_initialState = gMultithreadedWorld;
button.m_userPointer = &gMultithreadedWorld;
bool* ptr = &gMultithreadedWorld;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
// create a button to toggle profile printing
ButtonParams button( "Display solver info", 0, true );
button.m_initialState = gDisplayProfileInfo;
button.m_userPointer = &gDisplayProfileInfo;
bool* ptr = &gDisplayProfileInfo;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
@@ -544,6 +658,16 @@ void CommonRigidBodyMTBase::createDefaultParameters()
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the solver leastSquaresResidualThreshold (used to run fewer solver iterations when convergence is good)
SliderParams slider( "Solver residual thresh", &gSliderLeastSquaresResidualThreshold );
slider.m_minVal = 0.0f;
slider.m_maxVal = 0.25f;
slider.m_callback = setLeastSquaresResidualThresholdCallback;
slider.m_userPointer = m_dynamicsWorld;
slider.m_clampToIntegers = false;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
ButtonParams button( "Solver use SIMD", 0, true );
button.m_buttonId = SOLVER_SIMD;
@@ -618,20 +742,86 @@ void CommonRigidBodyMTBase::createDefaultParameters()
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
}
{
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
// if slider has not been set yet (by another demo),
if ( gSliderNumThreads <= 1.0f )
{
// create a slider to set the number of threads to use
int numThreads = btGetTaskScheduler()->getNumThreads();
gSliderNumThreads = float( numThreads );
}
int maxNumThreads = btGetTaskScheduler()->getMaxNumThreads();
SliderParams slider("Thread count", &gSliderNumThreads);
slider.m_minVal = 1.0f;
slider.m_maxVal = float( BT_MAX_THREAD_COUNT );
slider.m_maxVal = float( maxNumThreads );
slider.m_callback = setThreadCountCallback;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the number of manifolds an island needs to be too large for parallel dispatch
if (gSliderIslandBatchingThreshold < 1.0)
{
gSliderIslandBatchingThreshold = float(btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching);
}
SliderParams slider( "IslandBatchThresh", &gSliderIslandBatchingThreshold );
slider.m_minVal = 1.0f;
slider.m_maxVal = 2000.0f;
slider.m_callback = setLargeIslandManifoldCountCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a combo box for selecting the batching method
static const char* sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_COUNT ];
{
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_2D ] = "Batching: 2D Grid";
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_3D ] = "Batching: 3D Grid";
};
ComboBoxParams comboParams;
comboParams.m_userPointer = sBatchingMethodComboBoxItems;
comboParams.m_numItems = btBatchedConstraints::BATCHING_METHOD_COUNT;
comboParams.m_startItem = static_cast<int>(btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod);
comboParams.m_items = sBatchingMethodComboBoxItems;
comboParams.m_callback = setBatchingMethodComboBoxCallback;
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
}
{
// a slider for the sequentialImpulseConstraintSolverMt min batch size (when batching)
SliderParams slider( "Min batch size", &gSliderMinBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMinBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// a slider for the sequentialImpulseConstraintSolverMt max batch size (when batching)
SliderParams slider( "Max batch size", &gSliderMaxBatchSize );
slider.m_minVal = 1.0f;
slider.m_maxVal = 1000.0f;
slider.m_callback = setMaxBatchSizeCallback;
slider.m_userPointer = NULL;
slider.m_clampToIntegers = true;
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
}
{
// create a button to toggle debug drawing of batching visualization
ButtonParams button( "Visualize batching", 0, true );
bool* ptr = &btBatchedConstraints::s_debugDrawBatches;
button.m_initialState = *ptr;
button.m_userPointer = ptr;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
{
ButtonParams button( "Allow Nested ParallelFor", 0, true );
button.m_initialState = btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_userPointer = &btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
button.m_callback = boolPtrButtonCallback;
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
}
#endif // #if BT_THREADSAFE
}
}
@@ -643,6 +833,7 @@ void CommonRigidBodyMTBase::drawScreenText()
int xCoord = 400;
int yCoord = 30;
int yStep = 30;
int indent = 30;
if (m_solverType != gSolverType)
{
sprintf( msg, "restart example to change solver type" );
@@ -721,6 +912,34 @@ void CommonRigidBodyMTBase::drawScreenText()
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverTotal %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverTotal )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverSetup %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverSetup )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverIterations %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverIterations )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"SolverFinish %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordSolverFinish )*0.001f
);
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
yCoord += yStep;
sprintf( msg,
"PredictUnconstrainedMotion %5.3f ms",
gProfiler.getAverageTime( Profiler::kRecordPredictUnconstrainedMotion )*0.001f

View File

@@ -14,6 +14,7 @@
enum SolverType
{
SOLVER_TYPE_SEQUENTIAL_IMPULSE,
SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT,
SOLVER_TYPE_NNCG,
SOLVER_TYPE_MLCP_PGS,
SOLVER_TYPE_MLCP_DANTZIG,
@@ -27,6 +28,7 @@ inline const char* getSolverTypeName( SolverType t )
switch (t)
{
case SOLVER_TYPE_SEQUENTIAL_IMPULSE: return "SequentialImpulse";
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT: return "SequentialImpulseMt";
case SOLVER_TYPE_NNCG: return "NNCG";
case SOLVER_TYPE_MLCP_PGS: return "MLCP ProjectedGaussSeidel";
case SOLVER_TYPE_MLCP_DANTZIG: return "MLCP Dantzig";

View File

@@ -25,10 +25,10 @@ subject to the following restrictions:
static btScalar gSliderStackRows = 8.0f;
static btScalar gSliderStackColumns = 6.0f;
static btScalar gSliderStackHeight = 10.0f;
static btScalar gSliderStackWidth = 1.0f;
static btScalar gSliderStackRows = 1.0f;
static btScalar gSliderStackColumns = 1.0f;
static btScalar gSliderStackHeight = 15.0f;
static btScalar gSliderStackWidth = 8.0f;
static btScalar gSliderGroundHorizontalAmplitude = 0.0f;
static btScalar gSliderGroundVerticalAmplitude = 0.0f;
static btScalar gSliderGroundTilt = 0.0f;
@@ -75,6 +75,21 @@ public:
btScalar tilt = gSliderGroundTilt * SIMD_2_PI / 360.0f;
return btQuaternion( btVector3( 1.0f, 0.0f, 0.0f ), tilt );
}
struct TestSumBody : public btIParallelSumBody
{
virtual btScalar sumLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
btScalar sum = 0.0f;
for (int i = iBegin; i < iEnd; ++i)
{
if (i > 0)
{
sum += 1.0f / btScalar(i);
}
}
return sum;
}
};
virtual void stepSimulation( float deltaTime ) BT_OVERRIDE
{
if ( m_dynamicsWorld )
@@ -115,6 +130,14 @@ public:
// always step by 1/60 for benchmarking
m_dynamicsWorld->stepSimulation( 1.0f / 60.0f, 0 );
}
#if 0
{
// test parallelSum
TestSumBody testSumBody;
float testSum = btParallelSum( 1, 10000000, 10000, testSumBody );
printf( "sum = %f\n", testSum );
}
#endif
}
virtual void initPhysics() BT_OVERRIDE;

View File

@@ -1,448 +0,0 @@
#include "LinearMath/btTransform.h"
#include "../Utils/b3Clock.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btQuickprof.h"
#include <stdio.h>
#include <algorithm>
typedef void( *btThreadFunc )( void* userPtr, void* lsMemory );
typedef void* ( *btThreadLocalStorageFunc )();
#if BT_THREADSAFE
#if defined( _WIN32 )
#include "b3Win32ThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName )
{
b3Win32ThreadSupport::Win32ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
//constructionInfo.m_priority = 0; // highest priority (the default) -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
constructionInfo.m_priority = -1; // normal priority
b3Win32ThreadSupport* threadSupport = new b3Win32ThreadSupport( constructionInfo );
return threadSupport;
}
#else // #if defined( _WIN32 )
#include "b3PosixThreadSupport.h"
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName)
{
b3PosixThreadSupport::ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
b3ThreadSupportInterface* threadSupport = new b3PosixThreadSupport( constructionInfo );
return threadSupport;
}
#endif // #else // #if defined( _WIN32 )
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int getNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#elif defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getNumHardwareThreads()
{
// caps out at 32
SYSTEM_INFO info;
GetSystemInfo( &info );
return info.dwNumberOfProcessors;
}
#else
int getNumHardwareThreads()
{
return 0; // don't know
}
#endif
struct WorkerThreadStatus
{
enum Type
{
kInvalid,
kWaitingForWork,
kWorking,
kSleeping,
};
};
struct IJob
{
virtual void executeJob() = 0;
};
class ParallelForJob : public IJob
{
const btIParallelForBody* mBody;
int mBegin;
int mEnd;
public:
ParallelForJob()
{
mBody = NULL;
mBegin = 0;
mEnd = 0;
}
void init( int iBegin, int iEnd, const btIParallelForBody& body )
{
mBody = &body;
mBegin = iBegin;
mEnd = iEnd;
}
virtual void executeJob() BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
mBody->forLoop( mBegin, mEnd );
}
};
struct JobContext
{
JobContext()
{
m_queueLock = NULL;
m_headIndex = 0;
m_tailIndex = 0;
m_workersShouldCheckQueue = false;
m_useSpinMutex = false;
}
b3CriticalSection* m_queueLock;
btSpinMutex m_mutex;
volatile bool m_workersShouldCheckQueue;
btAlignedObjectArray<IJob*> m_jobQueue;
bool m_queueIsEmpty;
int m_tailIndex;
int m_headIndex;
bool m_useSpinMutex;
void lockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.lock();
}
else
{
m_queueLock->lock();
}
}
void unlockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.unlock();
}
else
{
m_queueLock->unlock();
}
}
void clearQueue()
{
lockQueue();
m_headIndex = 0;
m_tailIndex = 0;
m_queueIsEmpty = true;
unlockQueue();
m_jobQueue.resizeNoInitialize( 0 );
}
void submitJob( IJob* job )
{
m_jobQueue.push_back( job );
lockQueue();
m_tailIndex++;
m_queueIsEmpty = false;
unlockQueue();
}
IJob* consumeJob()
{
if ( m_queueIsEmpty )
{
// lock free path. even if this is taken erroneously it isn't harmful
return NULL;
}
IJob* job = NULL;
lockQueue();
if ( !m_queueIsEmpty )
{
job = m_jobQueue[ m_headIndex++ ];
if ( m_headIndex == m_tailIndex )
{
m_queueIsEmpty = true;
}
}
unlockQueue();
return job;
}
};
struct WorkerThreadLocalStorage
{
int threadId;
WorkerThreadStatus::Type status;
};
static void WorkerThreadFunc( void* userPtr, void* lsMemory )
{
BT_PROFILE( "WorkerThreadFunc" );
WorkerThreadLocalStorage* localStorage = (WorkerThreadLocalStorage*) lsMemory;
localStorage->status = WorkerThreadStatus::kWaitingForWork;
//printf( "WorkerThreadFunc: worker %d start working\n", localStorage->threadId );
JobContext* jobContext = (JobContext*) userPtr;
while ( jobContext->m_workersShouldCheckQueue )
{
if ( IJob* job = jobContext->consumeJob() )
{
localStorage->status = WorkerThreadStatus::kWorking;
job->executeJob();
localStorage->status = WorkerThreadStatus::kWaitingForWork;
}
else
{
// todo: spin wait a bit to avoid hammering the empty queue
}
}
//printf( "WorkerThreadFunc stop working\n" );
localStorage->status = WorkerThreadStatus::kSleeping;
// go idle
}
static void* WorkerThreadAllocFunc()
{
return new WorkerThreadLocalStorage;
}
class btTaskSchedulerDefault : public btITaskScheduler
{
JobContext m_jobContext;
b3ThreadSupportInterface* m_threadSupport;
btAlignedObjectArray<ParallelForJob> m_jobs;
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
int m_numThreads;
int m_numWorkerThreads;
int m_numWorkersRunning;
public:
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
{
m_threadSupport = NULL;
m_numThreads = getNumHardwareThreads();
// if can't detect number of cores,
if ( m_numThreads == 0 )
{
// take a guess
m_numThreads = 4;
}
m_numWorkerThreads = m_numThreads - 1;
m_numWorkersRunning = 0;
}
virtual ~btTaskSchedulerDefault()
{
shutdown();
}
void init()
{
int maxNumWorkerThreads = BT_MAX_THREAD_COUNT - 1;
m_threadSupport = createThreadSupport( maxNumWorkerThreads, WorkerThreadFunc, WorkerThreadAllocFunc, "TaskScheduler" );
m_jobContext.m_queueLock = m_threadSupport->createCriticalSection();
for ( int i = 0; i < maxNumWorkerThreads; i++ )
{
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
storage->threadId = i;
storage->status = WorkerThreadStatus::kSleeping;
}
setWorkersActive( false ); // no work for them yet
}
virtual void shutdown()
{
setWorkersActive( false );
waitForWorkersToSleep();
m_threadSupport->deleteCriticalSection( m_jobContext.m_queueLock );
m_jobContext.m_queueLock = NULL;
delete m_threadSupport;
m_threadSupport = NULL;
}
void setWorkersActive( bool active )
{
m_jobContext.m_workersShouldCheckQueue = active;
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return BT_MAX_THREAD_COUNT;
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = btMax( btMin(numThreads, int(BT_MAX_THREAD_COUNT)), 1 );
m_numWorkerThreads = m_numThreads - 1;
}
void waitJobs()
{
BT_PROFILE( "waitJobs" );
// have the main thread work until the job queue is empty
for ( ;; )
{
if ( IJob* job = m_jobContext.consumeJob() )
{
job->executeJob();
}
else
{
break;
}
}
// done with jobs for now, tell workers to rest
setWorkersActive( false );
waitForWorkersToSleep();
}
void wakeWorkers()
{
BT_PROFILE( "wakeWorkers" );
btAssert( m_jobContext.m_workersShouldCheckQueue );
// tell each worker thread to start working
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
m_threadSupport->runTask( B3_THREAD_SCHEDULE_TASK, &m_jobContext, i );
m_numWorkersRunning++;
}
}
void waitForWorkersToSleep()
{
BT_PROFILE( "waitForWorkersToSleep" );
while ( m_numWorkersRunning > 0 )
{
int iThread;
int threadStatus;
m_threadSupport->waitForResponse( &iThread, &threadStatus ); // wait for worker threads to finish working
m_numWorkersRunning--;
}
//m_threadSupport->waitForAllTasksToComplete();
for ( int i = 0; i < m_numWorkerThreads; i++ )
{
//m_threadSupport->waitForTaskCompleted( i );
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
btAssert( storage );
btAssert( storage->status == WorkerThreadStatus::kSleeping );
}
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
if ( jobCount > m_jobs.size() )
{
m_jobs.resize( jobCount );
}
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
{
m_jobContext.m_jobQueue.reserve( jobCount );
}
m_jobContext.clearQueue();
// prepare worker threads for incoming work
setWorkersActive( true );
wakeWorkers();
// submit all of the jobs
int iJob = 0;
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
ParallelForJob& job = m_jobs[ iJob ];
job.init( i, iE, body );
m_jobContext.submitJob( &job );
iJob++;
}
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
}
else
{
BT_PROFILE( "parallelFor_mainThread" );
// just run on main thread
body.forLoop( iBegin, iEnd );
}
}
};
btITaskScheduler* createDefaultTaskScheduler()
{
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
ts->init();
return ts;
}
#else // #if BT_THREADSAFE
btITaskScheduler* createDefaultTaskScheduler()
{
return NULL;
}
#endif // #else // #if BT_THREADSAFE

View File

@@ -1,26 +0,0 @@
/*
Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_TASK_SCHEDULER_H
#define BT_TASK_SCHEDULER_H
class btITaskScheduler;
btITaskScheduler* createDefaultTaskScheduler();
#endif // BT_TASK_SCHEDULER_H

View File

@@ -134,6 +134,9 @@ sources = ["examples/pybullet/pybullet.c"]\
+["src/LinearMath/btConvexHullComputer.cpp"]\
+["src/LinearMath/btQuickprof.cpp"]\
+["src/LinearMath/btThreads.cpp"]\
+["src/LinearMath/TaskScheduler/btTaskScheduler.cpp"]\
+["src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp"]\
+["src/LinearMath/TaskScheduler/btThreadSupportWin32.cpp"]\
+["src/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp"]\
+["src/BulletCollision/BroadphaseCollision/btDbvt.cpp"]\
+["src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp"]\
@@ -233,6 +236,7 @@ sources = ["examples/pybullet/pybullet.c"]\
+["src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp"]\
+["src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp"]\
+["src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp"]\
@@ -249,6 +253,7 @@ sources = ["examples/pybullet/pybullet.c"]\
+["src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp"]\
+["src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.cpp"]\
+["src/BulletDynamics/MLCPSolvers/btDantzigLCP.cpp"]\
+["src/BulletDynamics/MLCPSolvers/btLemkeAlgorithm.cpp"]\
+["src/BulletDynamics/MLCPSolvers/btMLCPSolver.cpp"]\
@@ -479,4 +484,3 @@ setup(
packages=[x for x in find_packages('examples/pybullet/gym')],
package_data = { 'pybullet_data': need_files }
)

View File

@@ -15,6 +15,8 @@ SET(BulletDynamics_SRCS
ConstraintSolver/btHingeConstraint.cpp
ConstraintSolver/btPoint2PointConstraint.cpp
ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
ConstraintSolver/btSequentialImpulseConstraintSolverMt.cpp
ConstraintSolver/btBatchedConstraints.cpp
ConstraintSolver/btNNCGConstraintSolver.cpp
ConstraintSolver/btSliderConstraint.cpp
ConstraintSolver/btSolve2LinearConstraint.cpp
@@ -62,6 +64,7 @@ SET(ConstraintSolver_HDRS
ConstraintSolver/btJacobianEntry.h
ConstraintSolver/btPoint2PointConstraint.h
ConstraintSolver/btSequentialImpulseConstraintSolver.h
ConstraintSolver/btSequentialImpulseConstraintSolverMt.h
ConstraintSolver/btNNCGConstraintSolver.h
ConstraintSolver/btSliderConstraint.h
ConstraintSolver/btSolve2LinearConstraint.h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_BATCHED_CONSTRAINTS_H
#define BT_BATCHED_CONSTRAINTS_H
#include "LinearMath/btThreads.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
#include "BulletDynamics/ConstraintSolver/btSolverConstraint.h"
class btIDebugDraw;
struct btBatchedConstraints
{
enum BatchingMethod
{
BATCHING_METHOD_SPATIAL_GRID_2D,
BATCHING_METHOD_SPATIAL_GRID_3D,
BATCHING_METHOD_COUNT
};
struct Range
{
int begin;
int end;
Range() : begin( 0 ), end( 0 ) {}
Range( int _beg, int _end ) : begin( _beg ), end( _end ) {}
};
btAlignedObjectArray<int> m_constraintIndices;
btAlignedObjectArray<Range> m_batches; // each batch is a range of indices in the m_constraintIndices array
btAlignedObjectArray<Range> m_phases; // each phase is range of indices in the m_batches array
btAlignedObjectArray<char> m_phaseGrainSize; // max grain size for each phase
btAlignedObjectArray<int> m_phaseOrder; // phases can be done in any order, so we can randomize the order here
btIDebugDraw* m_debugDrawer;
static bool s_debugDrawBatches;
btBatchedConstraints() {m_debugDrawer=NULL;}
void setup( btConstraintArray* constraints,
const btAlignedObjectArray<btSolverBody>& bodies,
BatchingMethod batchingMethod,
int minBatchSize,
int maxBatchSize,
btAlignedObjectArray<char>* scratchMemory
);
bool validate( btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies ) const;
};
#endif // BT_BATCHED_CONSTRAINTS_H

View File

@@ -1258,6 +1258,256 @@ void btSequentialImpulseConstraintSolver::convertContacts(btPersistentManifold**
}
}
void btSequentialImpulseConstraintSolver::convertJoint(btSolverConstraint* currentConstraintRow,
btTypedConstraint* constraint,
const btTypedConstraint::btConstraintInfo1& info1,
int solverBodyIdA,
int solverBodyIdB,
const btContactSolverInfo& infoGlobal
)
{
const btRigidBody& rbA = constraint->getRigidBodyA();
const btRigidBody& rbB = constraint->getRigidBodyB();
const btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
const btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
for (int j=0;j<info1.m_numConstraintRows;j++)
{
memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
currentConstraintRow[j].m_appliedImpulse = 0.f;
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
}
// these vectors are already cleared in initSolverBody, no need to redundantly clear again
btAssert(bodyAPtr->getDeltaLinearVelocity().isZero());
btAssert(bodyAPtr->getDeltaAngularVelocity().isZero());
btAssert(bodyAPtr->getPushVelocity().isZero());
btAssert(bodyAPtr->getTurnVelocity().isZero());
btAssert(bodyBPtr->getDeltaLinearVelocity().isZero());
btAssert(bodyBPtr->getDeltaAngularVelocity().isZero());
btAssert(bodyBPtr->getPushVelocity().isZero());
btAssert(bodyBPtr->getTurnVelocity().isZero());
//bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
//bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
//bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
btTypedConstraint::btConstraintInfo2 info2;
info2.fps = 1.f/infoGlobal.m_timeStep;
info2.erp = infoGlobal.m_erp;
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
///the size of btSolverConstraint needs be a multiple of btScalar
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
info2.m_constraintError = &currentConstraintRow->m_rhs;
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
info2.m_damping = infoGlobal.m_damping;
info2.cfm = &currentConstraintRow->m_cfm;
info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
info2.m_numIterations = infoGlobal.m_numIterations;
constraint->getInfo2(&info2);
///finalize the constraint setup
for (int j=0;j<info1.m_numConstraintRows;j++)
{
btSolverConstraint& solverConstraint = currentConstraintRow[j];
if (solverConstraint.m_upperLimit>=constraint->getBreakingImpulseThreshold())
{
solverConstraint.m_upperLimit = constraint->getBreakingImpulseThreshold();
}
if (solverConstraint.m_lowerLimit<=-constraint->getBreakingImpulseThreshold())
{
solverConstraint.m_lowerLimit = -constraint->getBreakingImpulseThreshold();
}
solverConstraint.m_originalContactPoint = constraint;
{
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
}
{
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
}
{
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
btScalar fsum = btFabs(sum);
btAssert(fsum > SIMD_EPSILON);
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
}
{
btScalar rel_vel;
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
rel_vel = vel1Dotn+vel2Dotn;
btScalar restitution = 0.f;
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
btScalar velocityError = restitution - rel_vel * info2.m_damping;
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
solverConstraint.m_appliedImpulse = 0.f;
}
}
}
void btSequentialImpulseConstraintSolver::convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("convertJoints");
for (int j=0;j<numConstraints;j++)
{
btTypedConstraint* constraint = constraints[j];
constraint->buildJacobian();
constraint->internalSetAppliedImpulse(0.0f);
}
int totalNumRows = 0;
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
//calculate the total number of contraint rows
for (int i=0;i<numConstraints;i++)
{
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
btJointFeedback* fb = constraints[i]->getJointFeedback();
if (fb)
{
fb->m_appliedForceBodyA.setZero();
fb->m_appliedTorqueBodyA.setZero();
fb->m_appliedForceBodyB.setZero();
fb->m_appliedTorqueBodyB.setZero();
}
if (constraints[i]->isEnabled())
{
constraints[i]->getInfo1(&info1);
} else
{
info1.m_numConstraintRows = 0;
info1.nub = 0;
}
totalNumRows += info1.m_numConstraintRows;
}
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
///setup the btSolverConstraints
int currentRow = 0;
for (int i=0;i<numConstraints;i++)
{
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
if (info1.m_numConstraintRows)
{
btAssert(currentRow<totalNumRows);
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
btTypedConstraint* constraint = constraints[i];
btRigidBody& rbA = constraint->getRigidBodyA();
btRigidBody& rbB = constraint->getRigidBodyB();
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
}
currentRow+=info1.m_numConstraintRows;
}
}
void btSequentialImpulseConstraintSolver::convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("convertBodies");
for (int i = 0; i < numBodies; i++)
{
bodies[i]->setCompanionId(-1);
}
#if BT_THREADSAFE
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
#endif // BT_THREADSAFE
m_tmpSolverBodyPool.reserve(numBodies+1);
m_tmpSolverBodyPool.resize(0);
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
//initSolverBody(&fixedBody,0);
for (int i=0;i<numBodies;i++)
{
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
btRigidBody* body = btRigidBody::upcast(bodies[i]);
if (body && body->getInvMass())
{
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
btVector3 gyroForce (0,0,0);
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
{
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
{
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
{
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
}
}
}
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
{
m_fixedBodyId = -1;
@@ -1344,250 +1594,13 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
#endif //BT_ADDITIONAL_DEBUG
for (int i = 0; i < numBodies; i++)
{
bodies[i]->setCompanionId(-1);
}
#if BT_THREADSAFE
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
#endif // BT_THREADSAFE
m_tmpSolverBodyPool.reserve(numBodies+1);
m_tmpSolverBodyPool.resize(0);
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
//initSolverBody(&fixedBody,0);
//convert all bodies
convertBodies(bodies, numBodies, infoGlobal);
convertJoints(constraints, numConstraints, infoGlobal);
for (int i=0;i<numBodies;i++)
{
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
convertContacts(manifoldPtr,numManifolds,infoGlobal);
btRigidBody* body = btRigidBody::upcast(bodies[i]);
if (body && body->getInvMass())
{
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
btVector3 gyroForce (0,0,0);
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
{
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
{
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
{
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
solverBody.m_externalTorqueImpulse += gyroForce;
}
}
}
if (1)
{
int j;
for (j=0;j<numConstraints;j++)
{
btTypedConstraint* constraint = constraints[j];
constraint->buildJacobian();
constraint->internalSetAppliedImpulse(0.0f);
}
}
//btRigidBody* rb0=0,*rb1=0;
//if (1)
{
{
int totalNumRows = 0;
int i;
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
//calculate the total number of contraint rows
for (i=0;i<numConstraints;i++)
{
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
btJointFeedback* fb = constraints[i]->getJointFeedback();
if (fb)
{
fb->m_appliedForceBodyA.setZero();
fb->m_appliedTorqueBodyA.setZero();
fb->m_appliedForceBodyB.setZero();
fb->m_appliedTorqueBodyB.setZero();
}
if (constraints[i]->isEnabled())
{
constraints[i]->getInfo1(&info1);
} else
{
info1.m_numConstraintRows = 0;
info1.nub = 0;
}
totalNumRows += info1.m_numConstraintRows;
}
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
///setup the btSolverConstraints
int currentRow = 0;
for (i=0;i<numConstraints;i++)
{
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
if (info1.m_numConstraintRows)
{
btAssert(currentRow<totalNumRows);
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
btTypedConstraint* constraint = constraints[i];
btRigidBody& rbA = constraint->getRigidBodyA();
btRigidBody& rbB = constraint->getRigidBodyB();
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
int j;
for ( j=0;j<info1.m_numConstraintRows;j++)
{
memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
currentConstraintRow[j].m_appliedImpulse = 0.f;
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
}
bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
btTypedConstraint::btConstraintInfo2 info2;
info2.fps = 1.f/infoGlobal.m_timeStep;
info2.erp = infoGlobal.m_erp;
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
///the size of btSolverConstraint needs be a multiple of btScalar
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
info2.m_constraintError = &currentConstraintRow->m_rhs;
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
info2.m_damping = infoGlobal.m_damping;
info2.cfm = &currentConstraintRow->m_cfm;
info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
info2.m_numIterations = infoGlobal.m_numIterations;
constraints[i]->getInfo2(&info2);
///finalize the constraint setup
for ( j=0;j<info1.m_numConstraintRows;j++)
{
btSolverConstraint& solverConstraint = currentConstraintRow[j];
if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
}
if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
}
solverConstraint.m_originalContactPoint = constraint;
{
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
}
{
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
}
{
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
btScalar fsum = btFabs(sum);
btAssert(fsum > SIMD_EPSILON);
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
}
{
btScalar rel_vel;
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
rel_vel = vel1Dotn+vel2Dotn;
btScalar restitution = 0.f;
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
btScalar velocityError = restitution - rel_vel * info2.m_damping;
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
solverConstraint.m_appliedImpulse = 0.f;
}
}
}
currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
}
}
convertContacts(manifoldPtr,numManifolds,infoGlobal);
}
// btContactSolverInfo info = infoGlobal;
@@ -1627,6 +1640,7 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/)
{
BT_PROFILE("solveSingleIteration");
btScalar leastSquaresResidual = 0.f;
int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
@@ -1805,6 +1819,7 @@ btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration
void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
{
BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations");
int iteration;
if (infoGlobal.m_splitImpulse)
{
@@ -1863,14 +1878,9 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(
return 0.f;
}
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
void btSequentialImpulseConstraintSolver::writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
{
int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
int i,j;
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
{
for (j=0;j<numPoolConstraints;j++)
for (int j=iBegin; j<iEnd; j++)
{
const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
@@ -1886,10 +1896,11 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
}
//do a callback here?
}
}
}
numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
for (j=0;j<numPoolConstraints;j++)
void btSequentialImpulseConstraintSolver::writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
{
for (int j=iBegin; j<iEnd; j++)
{
const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
@@ -1909,10 +1920,12 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
constr->setEnabled(false);
}
}
}
for ( i=0;i<m_tmpSolverBodyPool.size();i++)
void btSequentialImpulseConstraintSolver::writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
{
for (int i=iBegin; i<iEnd; i++)
{
btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
if (body)
@@ -1936,6 +1949,19 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
m_tmpSolverBodyPool[i].m_originalBody->setCompanionId(-1);
}
}
}
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("solveGroupCacheFriendlyFinish");
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
{
writeBackContacts(0, m_tmpSolverContactConstraintPool.size(), infoGlobal);
}
writeBackJoints(0, m_tmpSolverNonContactConstraintPool.size(), infoGlobal);
writeBackBodies(0, m_tmpSolverBodyPool.size(), infoGlobal);
m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);

View File

@@ -95,6 +95,10 @@ protected:
void convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
void convertJoint(btSolverConstraint* destConstraintRow, btTypedConstraint* srcConstraint, const btTypedConstraint::btConstraintInfo1& info1, int solverBodyIdA, int solverBodyIdB, const btContactSolverInfo& infoGlobal);
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal);
btSimdScalar resolveSplitPenetrationSIMD(btSolverBody& bodyA,btSolverBody& bodyB, const btSolverConstraint& contactConstraint)
{
@@ -121,7 +125,9 @@ protected:
protected:
void writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal);
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
#define BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
#include "btSequentialImpulseConstraintSolver.h"
#include "btBatchedConstraints.h"
#include "LinearMath/btThreads.h"
///
/// btSequentialImpulseConstraintSolverMt
///
/// A multithreaded variant of the sequential impulse constraint solver. The constraints to be solved are grouped into
/// batches and phases where each batch of constraints within a given phase can be solved in parallel with the rest.
/// Ideally we want as few phases as possible, and each phase should have many batches, and all of the batches should
/// have about the same number of constraints.
/// This method works best on a large island of many constraints.
///
/// Supports all of the features of the normal sequential impulse solver such as:
/// - split penetration impulse
/// - rolling friction
/// - interleaving constraints
/// - warmstarting
/// - 2 friction directions
/// - randomized constraint ordering
/// - early termination when leastSquaresResidualThreshold is satisfied
///
/// When the SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS flag is enabled, unlike the normal SequentialImpulse solver,
/// the rolling friction is interleaved as well.
/// Interleaving the contact penetration constraints with friction reduces the number of parallel loops that need to be done,
/// which reduces threading overhead so it can be a performance win, however, it does seem to produce a less stable simulation,
/// at least on stacks of blocks.
///
/// When the SOLVER_RANDMIZE_ORDER flag is enabled, the ordering of phases, and the ordering of constraints within each batch
/// is randomized, however it does not swap constraints between batches.
/// This is to avoid regenerating the batches for each solver iteration which would be quite costly in performance.
///
/// Note that a non-zero leastSquaresResidualThreshold could possibly affect the determinism of the simulation
/// if the task scheduler's parallelSum operation is non-deterministic. The parallelSum operation can be non-deterministic
/// because floating point addition is not associative due to rounding errors.
/// The task scheduler can and should ensure that the result of any parallelSum operation is deterministic.
///
ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolver
{
public:
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
// temp struct used to collect info from persistent manifolds into a cache-friendly struct using multiple threads
struct btContactManifoldCachedInfo
{
static const int MAX_NUM_CONTACT_POINTS = 4;
int numTouchingContacts;
int solverBodyIds[ 2 ];
int contactIndex;
int rollingFrictionIndex;
bool contactHasRollingFriction[ MAX_NUM_CONTACT_POINTS ];
btManifoldPoint* contactPoints[ MAX_NUM_CONTACT_POINTS ];
};
// temp struct used for setting up joint constraints in parallel
struct JointParams
{
int m_solverConstraint;
int m_solverBodyA;
int m_solverBodyB;
};
void internalInitMultipleJoints(btTypedConstraint** constraints, int iBegin, int iEnd);
void internalConvertMultipleJoints( const btAlignedObjectArray<JointParams>& jointParamsArray, btTypedConstraint** constraints, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal );
// parameters to control batching
static bool s_allowNestedParallelForLoops; // whether to allow nested parallel operations
static int s_minimumContactManifoldsForBatching; // don't even try to batch if fewer manifolds than this
static btBatchedConstraints::BatchingMethod s_contactBatchingMethod;
static btBatchedConstraints::BatchingMethod s_jointBatchingMethod;
static int s_minBatchSize; // desired number of constraints per batch
static int s_maxBatchSize;
protected:
static const int CACHE_LINE_SIZE = 64;
btBatchedConstraints m_batchedContactConstraints;
btBatchedConstraints m_batchedJointConstraints;
int m_numFrictionDirections;
bool m_useBatching;
bool m_useObsoleteJointConstraints;
btAlignedObjectArray<btContactManifoldCachedInfo> m_manifoldCachedInfoArray;
btAlignedObjectArray<int> m_rollingFrictionIndexTable; // lookup table mapping contact index to rolling friction index
btSpinMutex m_bodySolverArrayMutex;
char m_antiFalseSharingPadding[CACHE_LINE_SIZE]; // padding to keep mutexes in separate cachelines
btSpinMutex m_kinematicBodyUniqueIdToSolverBodyTableMutex;
btAlignedObjectArray<char> m_scratchMemory;
virtual void randomizeConstraintOrdering( int iteration, int numIterations );
virtual btScalar resolveAllJointConstraints( int iteration );
virtual btScalar resolveAllContactConstraints();
virtual btScalar resolveAllContactFrictionConstraints();
virtual btScalar resolveAllContactConstraintsInterleaved();
virtual btScalar resolveAllRollingFrictionConstraints();
virtual void setupBatchedContactConstraints();
virtual void setupBatchedJointConstraints();
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
virtual void convertContacts(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
int getOrInitSolverBodyThreadsafe(btCollisionObject& body, btScalar timeStep);
void allocAllContactConstraints(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal);
void setupAllContactConstraints(const btContactSolverInfo& infoGlobal);
void randomizeBatchedConstraintOrdering( btBatchedConstraints* batchedConstraints );
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
btSequentialImpulseConstraintSolverMt();
virtual ~btSequentialImpulseConstraintSolverMt();
btScalar resolveMultipleJointConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd, int iteration );
btScalar resolveMultipleContactConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactSplitPenetrationImpulseConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactRollingFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
btScalar resolveMultipleContactConstraintsInterleaved( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
void internalCollectContactManifoldCachedInfo(btContactManifoldCachedInfo* cachedInfoArray, btPersistentManifold** manifold, int numManifolds, const btContactSolverInfo& infoGlobal);
void internalAllocContactConstraints(const btContactManifoldCachedInfo* cachedInfoArray, int numManifolds);
void internalSetupContactConstraints(int iContact, const btContactSolverInfo& infoGlobal);
void internalConvertBodies(btCollisionObject** bodies, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
void internalWriteBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
};
#endif //BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H

View File

@@ -50,63 +50,6 @@ subject to the following restrictions:
#include "LinearMath/btSerializer.h"
struct InplaceSolverIslandCallbackMt : public btSimulationIslandManagerMt::IslandCallback
{
btContactSolverInfo* m_solverInfo;
btConstraintSolver* m_solver;
btIDebugDraw* m_debugDrawer;
btDispatcher* m_dispatcher;
InplaceSolverIslandCallbackMt(
btConstraintSolver* solver,
btStackAlloc* stackAlloc,
btDispatcher* dispatcher)
:m_solverInfo(NULL),
m_solver(solver),
m_debugDrawer(NULL),
m_dispatcher(dispatcher)
{
}
InplaceSolverIslandCallbackMt& operator=(InplaceSolverIslandCallbackMt& other)
{
btAssert(0);
(void)other;
return *this;
}
SIMD_FORCE_INLINE void setup ( btContactSolverInfo* solverInfo, btIDebugDraw* debugDrawer)
{
btAssert(solverInfo);
m_solverInfo = solverInfo;
m_debugDrawer = debugDrawer;
}
virtual void processIsland( btCollisionObject** bodies,
int numBodies,
btPersistentManifold** manifolds,
int numManifolds,
btTypedConstraint** constraints,
int numConstraints,
int islandId
)
{
m_solver->solveGroup( bodies,
numBodies,
manifolds,
numManifolds,
constraints,
numConstraints,
*m_solverInfo,
m_debugDrawer,
m_dispatcher
);
}
};
///
/// btConstraintSolverPoolMt
@@ -209,7 +152,12 @@ void btConstraintSolverPoolMt::reset()
/// btDiscreteDynamicsWorldMt
///
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btConstraintSolverPoolMt* constraintSolver, btCollisionConfiguration* collisionConfiguration)
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,
btBroadphaseInterface* pairCache,
btConstraintSolverPoolMt* constraintSolver,
btConstraintSolver* constraintSolverMt,
btCollisionConfiguration* collisionConfiguration
)
: btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration)
{
if (m_ownsIslandManager)
@@ -217,31 +165,18 @@ btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, b
m_islandManager->~btSimulationIslandManager();
btAlignedFree( m_islandManager);
}
{
void* mem = btAlignedAlloc(sizeof(InplaceSolverIslandCallbackMt),16);
m_solverIslandCallbackMt = new (mem) InplaceSolverIslandCallbackMt (m_constraintSolver, 0, dispatcher);
}
{
void* mem = btAlignedAlloc(sizeof(btSimulationIslandManagerMt),16);
btSimulationIslandManagerMt* im = new (mem) btSimulationIslandManagerMt();
im->setMinimumSolverBatchSize( m_solverInfo.m_minimumSolverBatchSize );
m_islandManager = im;
}
m_constraintSolverMt = constraintSolverMt;
}
btDiscreteDynamicsWorldMt::~btDiscreteDynamicsWorldMt()
{
if (m_solverIslandCallbackMt)
{
m_solverIslandCallbackMt->~InplaceSolverIslandCallbackMt();
btAlignedFree(m_solverIslandCallbackMt);
}
if (m_ownsConstraintSolver)
{
m_constraintSolver->~btConstraintSolver();
btAlignedFree(m_constraintSolver);
}
}
@@ -249,12 +184,17 @@ void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo
{
BT_PROFILE("solveConstraints");
m_solverIslandCallbackMt->setup(&solverInfo, getDebugDrawer());
m_constraintSolver->prepareSolve(getCollisionWorld()->getNumCollisionObjects(), getCollisionWorld()->getDispatcher()->getNumManifolds());
/// solve all the constraints for this island
btSimulationIslandManagerMt* im = static_cast<btSimulationIslandManagerMt*>(m_islandManager);
im->buildAndProcessIslands( getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_constraints, m_solverIslandCallbackMt );
btSimulationIslandManagerMt::SolverParams solverParams;
solverParams.m_solverPool = m_constraintSolver;
solverParams.m_solverMt = m_constraintSolverMt;
solverParams.m_solverInfo = &solverInfo;
solverParams.m_debugDrawer = m_debugDrawer;
solverParams.m_dispatcher = getCollisionWorld()->getDispatcher();
im->buildAndProcessIslands( getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_constraints, solverParams );
m_constraintSolver->allSolved(solverInfo, m_debugDrawer);
}
@@ -325,3 +265,14 @@ void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
}
}
int btDiscreteDynamicsWorldMt::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep )
{
int numSubSteps = btDiscreteDynamicsWorld::stepSimulation(timeStep, maxSubSteps, fixedTimeStep);
if (btITaskScheduler* scheduler = btGetTaskScheduler())
{
// tell Bullet's threads to sleep, so other threads can run
scheduler->sleepWorkerThreadsHint();
}
return numSubSteps;
}

View File

@@ -21,7 +21,6 @@ subject to the following restrictions:
#include "btSimulationIslandManagerMt.h"
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
struct InplaceSolverIslandCallbackMt;
///
/// btConstraintSolverPoolMt - masquerades as a constraint solver, but really it is a threadsafe pool of them.
@@ -88,7 +87,7 @@ private:
ATTRIBUTE_ALIGNED16(class) btDiscreteDynamicsWorldMt : public btDiscreteDynamicsWorld
{
protected:
InplaceSolverIslandCallbackMt* m_solverIslandCallbackMt;
btConstraintSolver* m_constraintSolverMt;
virtual void solveConstraints(btContactSolverInfo& solverInfo) BT_OVERRIDE;
@@ -126,9 +125,12 @@ public:
btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,
btBroadphaseInterface* pairCache,
btConstraintSolverPoolMt* constraintSolver, // Note this should be a solver-pool for multi-threading
btConstraintSolver* constraintSolverMt, // single multi-threaded solver for large islands (or NULL)
btCollisionConfiguration* collisionConfiguration
);
virtual ~btDiscreteDynamicsWorldMt();
virtual int stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep ) BT_OVERRIDE;
};
#endif //BT_DISCRETE_DYNAMICS_WORLD_H

View File

@@ -22,6 +22,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h" // for s_minimumContactManifoldsForBatching
//#include <stdio.h>
#include "LinearMath/btQuickprof.h"
@@ -275,7 +276,7 @@ btSimulationIslandManagerMt::Island* btSimulationIslandManagerMt::allocateIsland
void btSimulationIslandManagerMt::buildIslands( btDispatcher* dispatcher, btCollisionWorld* collisionWorld )
{
BT_PROFILE("islandUnionFindAndQuickSort");
BT_PROFILE("buildIslands");
btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
@@ -544,59 +545,103 @@ void btSimulationIslandManagerMt::mergeIslands()
}
void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
void btSimulationIslandManagerMt::solveIsland(btConstraintSolver* solver, Island& island, const SolverParams& solverParams)
{
btPersistentManifold** manifolds = island.manifoldArray.size() ? &island.manifoldArray[ 0 ] : NULL;
btTypedConstraint** constraintsPtr = island.constraintArray.size() ? &island.constraintArray[ 0 ] : NULL;
solver->solveGroup( &island.bodyArray[ 0 ],
island.bodyArray.size(),
manifolds,
island.manifoldArray.size(),
constraintsPtr,
island.constraintArray.size(),
*solverParams.m_solverInfo,
solverParams.m_debugDrawer,
solverParams.m_dispatcher
);
}
void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams )
{
BT_PROFILE( "serialIslandDispatch" );
// serial dispatch
btAlignedObjectArray<Island*>& islands = *islandsPtr;
btConstraintSolver* solver = solverParams.m_solverMt ? solverParams.m_solverMt : solverParams.m_solverPool;
for ( int i = 0; i < islands.size(); ++i )
{
Island* island = islands[ i ];
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
callback->processIsland( &island->bodyArray[ 0 ],
island->bodyArray.size(),
manifolds,
island->manifoldArray.size(),
constraintsPtr,
island->constraintArray.size(),
island->id
);
solveIsland(solver, *islands[ i ], solverParams);
}
}
struct UpdateIslandDispatcher : public btIParallelForBody
{
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr;
btSimulationIslandManagerMt::IslandCallback* callback;
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>& m_islandsPtr;
const btSimulationIslandManagerMt::SolverParams& m_solverParams;
UpdateIslandDispatcher(btAlignedObjectArray<btSimulationIslandManagerMt::Island*>& islandsPtr, const btSimulationIslandManagerMt::SolverParams& solverParams)
: m_islandsPtr(islandsPtr), m_solverParams(solverParams)
{}
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
{
btConstraintSolver* solver = m_solverParams.m_solverPool;
for ( int i = iBegin; i < iEnd; ++i )
{
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ];
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
callback->processIsland( &island->bodyArray[ 0 ],
island->bodyArray.size(),
manifolds,
island->manifoldArray.size(),
constraintsPtr,
island->constraintArray.size(),
island->id
);
btSimulationIslandManagerMt::Island* island = m_islandsPtr[ i ];
btSimulationIslandManagerMt::solveIsland( solver, *island, m_solverParams );
}
}
};
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams )
{
BT_PROFILE( "parallelIslandDispatch" );
int grainSize = 1; // iterations per task
UpdateIslandDispatcher dispatcher;
dispatcher.islandsPtr = islandsPtr;
dispatcher.callback = callback;
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
//
// if there are islands with many contacts, it may be faster to submit these
// large islands *serially* to a single parallel constraint solver, and then later
// submit the remaining smaller islands in parallel to multiple sequential solvers.
//
// Some task schedulers do not deal well with nested parallelFor loops. One implementation
// of OpenMP was actually slower than doing everything single-threaded. Intel TBB
// on the other hand, seems to do a pretty respectable job with it.
//
// When solving islands in parallel, the worst case performance happens when there
// is one very large island and then perhaps a smattering of very small
// islands -- one worker thread takes the large island and the remaining workers
// tear through the smaller islands and then sit idle waiting for the first worker
// to finish. Solving islands in parallel works best when there are numerous small
// islands, roughly equal in size.
//
// By contrast, the other approach -- the parallel constraint solver -- is only
// able to deliver a worthwhile speedup when the island is large. For smaller islands,
// it is difficult to extract a useful amount of parallelism -- the overhead of grouping
// the constraints into batches and sending the batches to worker threads can nullify
// any gains from parallelism.
//
UpdateIslandDispatcher dispatcher(*islandsPtr, solverParams);
// We take advantage of the fact the islands are sorted in order of decreasing size
int iBegin = 0;
if (solverParams.m_solverMt)
{
while ( iBegin < islandsPtr->size() )
{
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ iBegin ];
if ( island->manifoldArray.size() < btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching )
{
// OK to submit the rest of the array in parallel
break;
}
// serial dispatch to parallel solver for large islands (if any)
solveIsland(solverParams.m_solverMt, *island, solverParams);
++iBegin;
}
}
// parallel dispatch to sequential solvers for rest
btParallelFor( iBegin, islandsPtr->size(), 1, dispatcher );
}
@@ -604,15 +649,14 @@ void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<I
void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatcher,
btCollisionWorld* collisionWorld,
btAlignedObjectArray<btTypedConstraint*>& constraints,
IslandCallback* callback
const SolverParams& solverParams
)
{
BT_PROFILE("buildAndProcessIslands");
btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
buildIslands(dispatcher,collisionWorld);
BT_PROFILE("processIslands");
if(!getSplitIslands())
{
btPersistentManifold** manifolds = dispatcher->getInternalManifoldPointer();
@@ -644,14 +688,17 @@ void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatch
}
}
btTypedConstraint** constraintsPtr = constraints.size() ? &constraints[ 0 ] : NULL;
callback->processIsland(&collisionObjects[0],
collisionObjects.size(),
manifolds,
maxNumManifolds,
constraintsPtr,
constraints.size(),
-1
);
btConstraintSolver* solver = solverParams.m_solverMt ? solverParams.m_solverMt : solverParams.m_solverPool;
solver->solveGroup(&collisionObjects[0],
collisionObjects.size(),
manifolds,
maxNumManifolds,
constraintsPtr,
constraints.size(),
*solverParams.m_solverInfo,
solverParams.m_debugDrawer,
solverParams.m_dispatcher
);
}
else
{
@@ -671,6 +718,6 @@ void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatch
mergeIslands();
}
// dispatch islands to solver
m_islandDispatch( &m_activeIslands, callback );
m_islandDispatch( &m_activeIslands, solverParams );
}
}

View File

@@ -19,7 +19,9 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
class btTypedConstraint;
class btConstraintSolver;
struct btContactSolverInfo;
class btIDebugDraw;
///
/// SimulationIslandManagerMt -- Multithread capable version of SimulationIslandManager
@@ -45,22 +47,19 @@ public:
void append( const Island& other ); // add bodies, manifolds, constraints to my own
};
struct IslandCallback
struct SolverParams
{
virtual ~IslandCallback() {};
virtual void processIsland( btCollisionObject** bodies,
int numBodies,
btPersistentManifold** manifolds,
int numManifolds,
btTypedConstraint** constraints,
int numConstraints,
int islandId
) = 0;
btConstraintSolver* m_solverPool;
btConstraintSolver* m_solverMt;
btContactSolverInfo* m_solverInfo;
btIDebugDraw* m_debugDrawer;
btDispatcher* m_dispatcher;
};
typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray<Island*>* islands, IslandCallback* callback );
static void serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback );
static void parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback );
static void solveIsland(btConstraintSolver* solver, Island& island, const SolverParams& solverParams);
typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray<Island*>* islands, const SolverParams& solverParams );
static void serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams );
static void parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams );
protected:
btAlignedObjectArray<Island*> m_allocatedIslands; // owner of all Islands
btAlignedObjectArray<Island*> m_activeIslands; // islands actively in use
@@ -83,7 +82,11 @@ public:
btSimulationIslandManagerMt();
virtual ~btSimulationIslandManagerMt();
virtual void buildAndProcessIslands( btDispatcher* dispatcher, btCollisionWorld* collisionWorld, btAlignedObjectArray<btTypedConstraint*>& constraints, IslandCallback* callback );
virtual void buildAndProcessIslands( btDispatcher* dispatcher,
btCollisionWorld* collisionWorld,
btAlignedObjectArray<btTypedConstraint*>& constraints,
const SolverParams& solverParams
);
virtual void buildIslands(btDispatcher* dispatcher,btCollisionWorld* colWorld);
@@ -106,5 +109,6 @@ public:
}
};
#endif //BT_SIMULATION_ISLAND_MANAGER_H

View File

@@ -14,6 +14,9 @@ SET(LinearMath_SRCS
btSerializer64.cpp
btThreads.cpp
btVector3.cpp
TaskScheduler/btTaskScheduler.cpp
TaskScheduler/btThreadSupportPosix.cpp
TaskScheduler/btThreadSupportWin32.cpp
)
SET(LinearMath_HDRS
@@ -44,6 +47,7 @@ SET(LinearMath_HDRS
btTransform.h
btTransformUtil.h
btVector3.h
TaskScheduler/btThreadSupportInterface.h
)
ADD_LIBRARY(LinearMath ${LinearMath_SRCS} ${LinearMath_HDRS})

View File

@@ -0,0 +1,788 @@
#include "LinearMath/btMinMax.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btQuickprof.h"
#include <stdio.h>
#include <algorithm>
#if BT_THREADSAFE
#include "btThreadSupportInterface.h"
#if defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
typedef unsigned long long btU64;
static const int kCacheLineSize = 64;
void btSpinPause()
{
#if defined( _WIN32 )
YieldProcessor();
#endif
}
struct WorkerThreadStatus
{
enum Type
{
kInvalid,
kWaitingForWork,
kWorking,
kSleeping,
};
};
ATTRIBUTE_ALIGNED64(class) WorkerThreadDirectives
{
static const int kMaxThreadCount = BT_MAX_THREAD_COUNT;
// directives for all worker threads packed into a single cacheline
char m_threadDirs[kMaxThreadCount];
public:
enum Type
{
kInvalid,
kGoToSleep, // go to sleep
kStayAwakeButIdle, // wait for not checking job queue
kScanForJobs, // actively scan job queue for jobs
};
WorkerThreadDirectives()
{
for ( int i = 0; i < kMaxThreadCount; ++i )
{
m_threadDirs[ i ] = 0;
}
}
Type getDirective(int threadId)
{
btAssert(threadId < kMaxThreadCount);
return static_cast<Type>(m_threadDirs[threadId]);
}
void setDirectiveByRange(int threadBegin, int threadEnd, Type dir)
{
btAssert( threadBegin < threadEnd );
btAssert( threadEnd <= kMaxThreadCount );
char dirChar = static_cast<char>(dir);
for ( int i = threadBegin; i < threadEnd; ++i )
{
m_threadDirs[ i ] = dirChar;
}
}
};
class JobQueue;
ATTRIBUTE_ALIGNED64(struct) ThreadLocalStorage
{
int m_threadId;
WorkerThreadStatus::Type m_status;
int m_numJobsFinished;
btSpinMutex m_mutex;
btScalar m_sumResult;
WorkerThreadDirectives * m_directive;
JobQueue* m_queue;
btClock* m_clock;
unsigned int m_cooldownTime;
};
struct IJob
{
virtual void executeJob(int threadId) = 0;
};
class ParallelForJob : public IJob
{
const btIParallelForBody* m_body;
int m_begin;
int m_end;
public:
ParallelForJob( int iBegin, int iEnd, const btIParallelForBody& body )
{
m_body = &body;
m_begin = iBegin;
m_end = iEnd;
}
virtual void executeJob(int threadId) BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
m_body->forLoop( m_begin, m_end );
}
};
class ParallelSumJob : public IJob
{
const btIParallelSumBody* m_body;
ThreadLocalStorage* m_threadLocalStoreArray;
int m_begin;
int m_end;
public:
ParallelSumJob( int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalStorage* tls )
{
m_body = &body;
m_threadLocalStoreArray = tls;
m_begin = iBegin;
m_end = iEnd;
}
virtual void executeJob( int threadId ) BT_OVERRIDE
{
BT_PROFILE( "executeJob" );
// call the functor body to do the work
btScalar val = m_body->sumLoop( m_begin, m_end );
#if BT_PARALLEL_SUM_DETERMINISTISM
// by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision)
const float TRUNC_SCALE = float(1<<19);
val = floor(val*TRUNC_SCALE+0.5f)/TRUNC_SCALE; // truncate some bits
#endif
m_threadLocalStoreArray[threadId].m_sumResult += val;
}
};
ATTRIBUTE_ALIGNED64(class) JobQueue
{
btThreadSupportInterface* m_threadSupport;
btCriticalSection* m_queueLock;
btSpinMutex m_mutex;
btAlignedObjectArray<IJob*> m_jobQueue;
char* m_jobMem;
int m_jobMemSize;
bool m_queueIsEmpty;
int m_tailIndex;
int m_headIndex;
int m_allocSize;
bool m_useSpinMutex;
btAlignedObjectArray<JobQueue*> m_neighborContexts;
char m_cachePadding[kCacheLineSize]; // prevent false sharing
void freeJobMem()
{
if ( m_jobMem )
{
// free old
btAlignedFree(m_jobMem);
m_jobMem = NULL;
}
}
void resizeJobMem(int newSize)
{
if (newSize > m_jobMemSize)
{
freeJobMem();
m_jobMem = static_cast<char*>(btAlignedAlloc(newSize, kCacheLineSize));
m_jobMemSize = newSize;
}
}
public:
JobQueue()
{
m_jobMem = NULL;
m_jobMemSize = 0;
m_threadSupport = NULL;
m_queueLock = NULL;
m_headIndex = 0;
m_tailIndex = 0;
m_useSpinMutex = false;
}
~JobQueue()
{
freeJobMem();
if (m_queueLock && m_threadSupport)
{
m_threadSupport->deleteCriticalSection(m_queueLock);
m_queueLock = NULL;
}
}
void init(btThreadSupportInterface* threadSup, btAlignedObjectArray<JobQueue>* contextArray)
{
m_threadSupport = threadSup;
if (threadSup)
{
m_queueLock = m_threadSupport->createCriticalSection();
}
setupJobStealing(contextArray, contextArray->size());
}
void setupJobStealing(btAlignedObjectArray<JobQueue>* contextArray, int numActiveContexts)
{
btAlignedObjectArray<JobQueue>& contexts = *contextArray;
int selfIndex = 0;
for (int i = 0; i < contexts.size(); ++i)
{
if ( this == &contexts[ i ] )
{
selfIndex = i;
break;
}
}
int numNeighbors = btMin(2, contexts.size() - 1);
int neighborOffsets[ ] = {-1, 1, -2, 2, -3, 3};
int numOffsets = sizeof(neighborOffsets)/sizeof(neighborOffsets[0]);
m_neighborContexts.reserve( numNeighbors );
m_neighborContexts.resizeNoInitialize(0);
for (int i = 0; i < numOffsets && m_neighborContexts.size() < numNeighbors; i++)
{
int neighborIndex = selfIndex + neighborOffsets[i];
if ( neighborIndex >= 0 && neighborIndex < numActiveContexts)
{
m_neighborContexts.push_back( &contexts[ neighborIndex ] );
}
}
}
bool isQueueEmpty() const {return m_queueIsEmpty;}
void lockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.lock();
}
else
{
m_queueLock->lock();
}
}
void unlockQueue()
{
if ( m_useSpinMutex )
{
m_mutex.unlock();
}
else
{
m_queueLock->unlock();
}
}
void clearQueue(int jobCount, int jobSize)
{
lockQueue();
m_headIndex = 0;
m_tailIndex = 0;
m_allocSize = 0;
m_queueIsEmpty = true;
int jobBufSize = jobSize * jobCount;
// make sure we have enough memory allocated to store jobs
if ( jobBufSize > m_jobMemSize )
{
resizeJobMem( jobBufSize );
}
// make sure job queue is big enough
if ( jobCount > m_jobQueue.capacity() )
{
m_jobQueue.reserve( jobCount );
}
unlockQueue();
m_jobQueue.resizeNoInitialize( 0 );
}
void* allocJobMem(int jobSize)
{
btAssert(m_jobMemSize >= (m_allocSize + jobSize));
void* jobMem = &m_jobMem[m_allocSize];
m_allocSize += jobSize;
return jobMem;
}
void submitJob( IJob* job )
{
btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
m_jobQueue.push_back( job );
lockQueue();
m_tailIndex++;
m_queueIsEmpty = false;
unlockQueue();
}
IJob* consumeJobFromOwnQueue()
{
if ( m_queueIsEmpty )
{
// lock free path. even if this is taken erroneously it isn't harmful
return NULL;
}
IJob* job = NULL;
lockQueue();
if ( !m_queueIsEmpty )
{
job = m_jobQueue[ m_headIndex++ ];
btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
if ( m_headIndex == m_tailIndex )
{
m_queueIsEmpty = true;
}
}
unlockQueue();
return job;
}
IJob* consumeJob()
{
if (IJob* job = consumeJobFromOwnQueue())
{
return job;
}
// own queue is empty, try to steal from neighbor
for (int i = 0; i < m_neighborContexts.size(); ++i)
{
JobQueue* otherContext = m_neighborContexts[ i ];
if ( IJob* job = otherContext->consumeJobFromOwnQueue() )
{
return job;
}
}
return NULL;
}
};
static void WorkerThreadFunc( void* userPtr )
{
BT_PROFILE( "WorkerThreadFunc" );
ThreadLocalStorage* localStorage = (ThreadLocalStorage*) userPtr;
JobQueue* jobQueue = localStorage->m_queue;
bool shouldSleep = false;
int threadId = localStorage->m_threadId;
while (! shouldSleep)
{
// do work
localStorage->m_mutex.lock();
while ( IJob* job = jobQueue->consumeJob() )
{
localStorage->m_status = WorkerThreadStatus::kWorking;
job->executeJob( threadId );
localStorage->m_numJobsFinished++;
}
localStorage->m_status = WorkerThreadStatus::kWaitingForWork;
localStorage->m_mutex.unlock();
btU64 clockStart = localStorage->m_clock->getTimeMicroseconds();
// while queue is empty,
while (jobQueue->isQueueEmpty())
{
// todo: spin wait a bit to avoid hammering the empty queue
btSpinPause();
if ( localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kGoToSleep )
{
shouldSleep = true;
break;
}
// if jobs are incoming,
if ( localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs )
{
clockStart = localStorage->m_clock->getTimeMicroseconds(); // reset clock
}
else
{
for ( int i = 0; i < 50; ++i )
{
btSpinPause();
btSpinPause();
btSpinPause();
btSpinPause();
if (localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs || !jobQueue->isQueueEmpty())
{
break;
}
}
// if no jobs incoming and queue has been empty for the cooldown time, sleep
btU64 timeElapsed = localStorage->m_clock->getTimeMicroseconds() - clockStart;
if (timeElapsed > localStorage->m_cooldownTime)
{
shouldSleep = true;
break;
}
}
}
}
// go sleep
localStorage->m_mutex.lock();
localStorage->m_status = WorkerThreadStatus::kSleeping;
localStorage->m_mutex.unlock();
}
class btTaskSchedulerDefault : public btITaskScheduler
{
btThreadSupportInterface* m_threadSupport;
WorkerThreadDirectives* m_workerDirective;
btAlignedObjectArray<JobQueue> m_jobQueues;
btAlignedObjectArray<JobQueue*> m_perThreadJobQueues;
btAlignedObjectArray<ThreadLocalStorage> m_threadLocalStorage;
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
btClock m_clock;
int m_numThreads;
int m_numWorkerThreads;
int m_numActiveJobQueues;
int m_maxNumThreads;
int m_numJobs;
static const int kFirstWorkerThreadId = 1;
public:
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
{
m_threadSupport = NULL;
m_workerDirective = NULL;
}
virtual ~btTaskSchedulerDefault()
{
waitForWorkersToSleep();
if (m_threadSupport)
{
delete m_threadSupport;
m_threadSupport = NULL;
}
if (m_workerDirective)
{
btAlignedFree(m_workerDirective);
m_workerDirective = NULL;
}
}
void init()
{
btThreadSupportInterface::ConstructionInfo constructionInfo( "TaskScheduler", WorkerThreadFunc );
m_threadSupport = btThreadSupportInterface::create( constructionInfo );
m_workerDirective = static_cast<WorkerThreadDirectives*>(btAlignedAlloc(sizeof(*m_workerDirective), 64));
m_numWorkerThreads = m_threadSupport->getNumWorkerThreads();
m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1;
m_numThreads = m_maxNumThreads;
// ideal to have one job queue for each physical processor (except for the main thread which needs no queue)
int numThreadsPerQueue = m_threadSupport->getLogicalToPhysicalCoreRatio();
int numJobQueues = (numThreadsPerQueue == 1) ? (m_maxNumThreads-1) : (m_maxNumThreads / numThreadsPerQueue);
m_jobQueues.resize(numJobQueues);
m_numActiveJobQueues = numJobQueues;
for ( int i = 0; i < m_jobQueues.size(); ++i )
{
m_jobQueues[i].init( m_threadSupport, &m_jobQueues );
}
m_perThreadJobQueues.resize(m_numThreads);
for ( int i = 0; i < m_numThreads; i++ )
{
JobQueue* jq = NULL;
// only worker threads get a job queue
if (i > 0)
{
if (numThreadsPerQueue == 1)
{
// one queue per worker thread
jq = &m_jobQueues[ i - kFirstWorkerThreadId ];
}
else
{
// 2 threads share each queue
jq = &m_jobQueues[ i / numThreadsPerQueue ];
}
}
m_perThreadJobQueues[i] = jq;
}
m_threadLocalStorage.resize(m_numThreads);
for ( int i = 0; i < m_numThreads; i++ )
{
ThreadLocalStorage& storage = m_threadLocalStorage[i];
storage.m_threadId = i;
storage.m_directive = m_workerDirective;
storage.m_status = WorkerThreadStatus::kSleeping;
storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do
storage.m_clock = &m_clock;
storage.m_queue = m_perThreadJobQueues[i];
}
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep ); // no work for them yet
setNumThreads( m_threadSupport->getCacheFriendlyNumThreads() );
}
void setWorkerDirectives(WorkerThreadDirectives::Type dir)
{
m_workerDirective->setDirectiveByRange(kFirstWorkerThreadId, m_numThreads, dir);
}
virtual int getMaxNumThreads() const BT_OVERRIDE
{
return m_maxNumThreads;
}
virtual int getNumThreads() const BT_OVERRIDE
{
return m_numThreads;
}
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
{
m_numThreads = btMax( btMin(numThreads, int(m_maxNumThreads)), 1 );
m_numWorkerThreads = m_numThreads - 1;
m_numActiveJobQueues = 0;
// if there is at least 1 worker,
if ( m_numWorkerThreads > 0 )
{
// re-setup job stealing between queues to avoid attempting to steal from an inactive job queue
JobQueue* lastActiveContext = m_perThreadJobQueues[ m_numThreads - 1 ];
int iLastActiveContext = lastActiveContext - &m_jobQueues[0];
m_numActiveJobQueues = iLastActiveContext + 1;
for ( int i = 0; i < m_jobQueues.size(); ++i )
{
m_jobQueues[ i ].setupJobStealing( &m_jobQueues, m_numActiveJobQueues );
}
}
m_workerDirective->setDirectiveByRange(m_numThreads, BT_MAX_THREAD_COUNT, WorkerThreadDirectives::kGoToSleep);
}
void waitJobs()
{
BT_PROFILE( "waitJobs" );
// have the main thread work until the job queues are empty
int numMainThreadJobsFinished = 0;
for ( int i = 0; i < m_numActiveJobQueues; ++i )
{
while ( IJob* job = m_jobQueues[i].consumeJob() )
{
job->executeJob( 0 );
numMainThreadJobsFinished++;
}
}
// done with jobs for now, tell workers to rest (but not sleep)
setWorkerDirectives( WorkerThreadDirectives::kStayAwakeButIdle );
btU64 clockStart = m_clock.getTimeMicroseconds();
// wait for workers to finish any jobs in progress
while ( true )
{
int numWorkerJobsFinished = 0;
for ( int iThread = kFirstWorkerThreadId; iThread < m_numThreads; ++iThread )
{
ThreadLocalStorage* storage = &m_threadLocalStorage[iThread];
storage->m_mutex.lock();
numWorkerJobsFinished += storage->m_numJobsFinished;
storage->m_mutex.unlock();
}
if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs)
{
break;
}
btU64 timeElapsed = m_clock.getTimeMicroseconds() - clockStart;
btAssert(timeElapsed < 1000);
if (timeElapsed > 100000)
{
break;
}
btSpinPause();
}
}
void wakeWorkers(int numWorkersToWake)
{
BT_PROFILE( "wakeWorkers" );
btAssert( m_workerDirective->getDirective(1) == WorkerThreadDirectives::kScanForJobs );
int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads);
int numActiveWorkers = 0;
for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
{
// note this count of active workers is not necessarily totally reliable, because a worker thread could be
// just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare.
ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
if (storage.m_status != WorkerThreadStatus::kSleeping)
{
numActiveWorkers++;
}
}
for ( int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker )
{
ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
if (storage.m_status == WorkerThreadStatus::kSleeping)
{
m_threadSupport->runTask( iWorker, &storage );
numActiveWorkers++;
}
}
}
void waitForWorkersToSleep()
{
BT_PROFILE( "waitForWorkersToSleep" );
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
m_threadSupport->waitForAllTasks();
for ( int i = kFirstWorkerThreadId; i < m_numThreads; i++ )
{
ThreadLocalStorage& storage = m_threadLocalStorage[i];
btAssert( storage.m_status == WorkerThreadStatus::kSleeping );
}
}
virtual void sleepWorkerThreadsHint() BT_OVERRIDE
{
BT_PROFILE( "sleepWorkerThreadsHint" );
// hint the task scheduler that we may not be using these threads for a little while
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
}
void prepareWorkerThreads()
{
for ( int i = kFirstWorkerThreadId; i < m_numThreads; ++i )
{
ThreadLocalStorage& storage = m_threadLocalStorage[i];
storage.m_mutex.lock();
storage.m_numJobsFinished = 0;
storage.m_mutex.unlock();
}
setWorkerDirectives( WorkerThreadDirectives::kScanForJobs );
}
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
typedef ParallelForJob JobType;
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
m_numJobs = jobCount;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
int jobSize = sizeof( JobType );
for (int i = 0; i < m_numActiveJobQueues; ++i)
{
m_jobQueues[i].clearQueue( jobCount, jobSize );
}
// prepare worker threads for incoming work
prepareWorkerThreads();
// submit all of the jobs
int iJob = 0;
int iThread = kFirstWorkerThreadId; // first worker thread
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
JobQueue* jq = m_perThreadJobQueues[ iThread ];
btAssert(jq);
btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
void* jobMem = jq->allocJobMem(jobSize);
JobType* job = new ( jobMem ) ParallelForJob( i, iE, body ); // placement new
jq->submitJob( job );
iJob++;
iThread++;
if ( iThread >= m_numThreads )
{
iThread = kFirstWorkerThreadId; // first worker thread
}
}
wakeWorkers( jobCount - 1 );
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
}
else
{
BT_PROFILE( "parallelFor_mainThread" );
// just run on main thread
body.forLoop( iBegin, iEnd );
}
}
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_ThreadSupport" );
btAssert( iEnd >= iBegin );
btAssert( grainSize >= 1 );
int iterationCount = iEnd - iBegin;
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
{
typedef ParallelSumJob JobType;
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
m_numJobs = jobCount;
btAssert( jobCount >= 2 ); // need more than one job for multithreading
int jobSize = sizeof( JobType );
for (int i = 0; i < m_numActiveJobQueues; ++i)
{
m_jobQueues[i].clearQueue( jobCount, jobSize );
}
// initialize summation
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
{
m_threadLocalStorage[iThread].m_sumResult = btScalar(0);
}
// prepare worker threads for incoming work
prepareWorkerThreads();
// submit all of the jobs
int iJob = 0;
int iThread = kFirstWorkerThreadId; // first worker thread
for ( int i = iBegin; i < iEnd; i += grainSize )
{
btAssert( iJob < jobCount );
int iE = btMin( i + grainSize, iEnd );
JobQueue* jq = m_perThreadJobQueues[ iThread ];
btAssert(jq);
btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
void* jobMem = jq->allocJobMem(jobSize);
JobType* job = new ( jobMem ) ParallelSumJob( i, iE, body, &m_threadLocalStorage[0] ); // placement new
jq->submitJob( job );
iJob++;
iThread++;
if ( iThread >= m_numThreads )
{
iThread = kFirstWorkerThreadId; // first worker thread
}
}
wakeWorkers( jobCount - 1 );
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
// add up all the thread sums
btScalar sum = btScalar(0);
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
{
sum += m_threadLocalStorage[ iThread ].m_sumResult;
}
m_antiNestingLock.unlock();
return sum;
}
else
{
BT_PROFILE( "parallelSum_mainThread" );
// just run on main thread
return body.sumLoop( iBegin, iEnd );
}
}
};
btITaskScheduler* btCreateDefaultTaskScheduler()
{
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
ts->init();
return ts;
}
#else // #if BT_THREADSAFE
btITaskScheduler* btCreateDefaultTaskScheduler()
{
return NULL;
}
#endif // #else // #if BT_THREADSAFE

View File

@@ -0,0 +1,70 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_THREAD_SUPPORT_INTERFACE_H
#define BT_THREAD_SUPPORT_INTERFACE_H
class btCriticalSection
{
public:
btCriticalSection() {}
virtual ~btCriticalSection() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};
class btThreadSupportInterface
{
public:
virtual ~btThreadSupportInterface() {}
virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
virtual void runTask( int threadIndex, void* userData ) = 0;
virtual void waitForAllTasks() = 0;
virtual btCriticalSection* createCriticalSection() = 0;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) = 0;
typedef void( *ThreadFunc )( void* userPtr );
struct ConstructionInfo
{
ConstructionInfo( const char* uniqueName,
ThreadFunc userThreadFunc,
int threadStackSize = 65535
)
:m_uniqueName( uniqueName ),
m_userThreadFunc( userThreadFunc ),
m_threadStackSize( threadStackSize )
{
}
const char* m_uniqueName;
ThreadFunc m_userThreadFunc;
int m_threadStackSize;
};
static btThreadSupportInterface* create( const ConstructionInfo& info );
};
#endif //BT_THREAD_SUPPORT_INTERFACE_H

View File

@@ -0,0 +1,364 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#if BT_THREADSAFE && !defined( _WIN32 )
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "LinearMath/btMinMax.h"
#include "btThreadSupportInterface.h"
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
#endif //_XOPEN_SOURCE
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h> //for sysconf
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int btGetNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#else
int btGetNumHardwareThreads()
{
return sysconf( _SC_NPROCESSORS_ONLN );
}
#endif
// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportPosix : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
pthread_t thread;
//each tread will wait until this signal to start its work
sem_t* startSemaphore;
// this is a copy of m_mainSemaphore,
//each tread will signal once it is finished with its work
sem_t* m_mainSemaphore;
unsigned long threadUsed;
};
private:
typedef unsigned long long UINT64;
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
// m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
sem_t* m_mainSemaphore;
int m_numThreads;
UINT64 m_startedThreadsMask;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
public:
btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportPosix();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
// TODO: return the number of logical processors sharing the first L3 cache
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
// TODO: detect if CPU has hyperthreading enabled
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
};
#define checkPThreadFunction(returnValue) \
if(0 != returnValue) { \
printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
}
// The number of threads should be equal to the number of available cores
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
btThreadSupportPosix::btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo )
{
startThreads( threadConstructionInfo );
}
// cleanup/shutdown Libspe2
btThreadSupportPosix::~btThreadSupportPosix()
{
stopThreads();
}
#if (defined (__APPLE__))
#define NAMED_SEMAPHORES
#endif
static sem_t* createSem( const char* baseName )
{
static int semCount = 0;
#ifdef NAMED_SEMAPHORES
/// Named semaphore begin
char name[ 32 ];
snprintf( name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++ );
sem_t* tempSem = sem_open( name, O_CREAT, 0600, 0 );
if ( tempSem != reinterpret_cast<sem_t *>( SEM_FAILED ) )
{
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
}
else
{
//printf("Error creating Semaphore %d\n", errno);
exit( -1 );
}
/// Named semaphore end
#else
sem_t* tempSem = new sem_t;
checkPThreadFunction( sem_init( tempSem, 0, 0 ) );
#endif
return tempSem;
}
static void destroySem( sem_t* semaphore )
{
#ifdef NAMED_SEMAPHORES
checkPThreadFunction( sem_close( semaphore ) );
#else
checkPThreadFunction( sem_destroy( semaphore ) );
delete semaphore;
#endif
}
static void *threadFunction( void *argument )
{
btThreadSupportPosix::btThreadStatus* status = ( btThreadSupportPosix::btThreadStatus* )argument;
while ( 1 )
{
checkPThreadFunction( sem_wait( status->startSemaphore ) );
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr );
status->m_status = 2;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
status->threadUsed++;
}
else
{
//exit Thread
status->m_status = 3;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
printf( "Thread with taskId %i exiting\n", status->m_taskId );
break;
}
}
printf( "Thread TERMINATED\n" );
}
///send messages to SPUs
void btThreadSupportPosix::runTask( int threadIndex, void* userData )
{
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( threadIndex >= 0 );
btAssert( threadIndex < m_activeThreadStatus.size() );
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadsMask |= UINT64( 1 ) << threadIndex;
// fire event to start new task
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
}
///check for messages from SPUs
int btThreadSupportPosix::waitForResponse()
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert( m_activeThreadStatus.size() );
// wait for any of the threads to finish
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
// get at least one thread which has finished
size_t last = -1;
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
if ( 2 == m_activeThreadStatus[ t ].m_status )
{
last = t;
break;
}
}
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
// need to find an active spu
btAssert( last >= 0 );
m_startedThreadsMask &= ~( UINT64( 1 ) << last );
return last;
}
void btThreadSupportPosix::waitForAllTasks()
{
while ( m_startedThreadsMask )
{
waitForResponse();
}
}
void btThreadSupportPosix::startThreads( const ConstructionInfo& threadConstructionInfo )
{
m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
printf( "%s creating %i threads.\n", __FUNCTION__, m_numThreads );
m_activeThreadStatus.resize( m_numThreads );
m_startedThreadsMask = 0;
m_mainSemaphore = createSem( "main" );
//checkPThreadFunction(sem_wait(mainSemaphore));
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
threadStatus.startSemaphore = createSem( "threadLocal" );
checkPThreadFunction( pthread_create( &threadStatus.thread, NULL, &threadFunction, (void*) &threadStatus ) );
threadStatus.m_userPtr = 0;
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_mainSemaphore = m_mainSemaphore;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
threadStatus.threadUsed = 0;
printf( "started thread %d \n", i );
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportPosix::stopThreads()
{
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ t ];
printf( "%s: Thread %i used: %ld\n", __FUNCTION__, int( t ), threadStatus.threadUsed );
threadStatus.m_userPtr = 0;
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
printf( "destroy semaphore\n" );
destroySem( threadStatus.startSemaphore );
printf( "semaphore destroyed\n" );
checkPThreadFunction( pthread_join( threadStatus.thread, 0 ) );
}
printf( "destroy main semaphore\n" );
destroySem( m_mainSemaphore );
printf( "main semaphore destroyed\n" );
m_activeThreadStatus.clear();
}
class btCriticalSectionPosix : public btCriticalSection
{
pthread_mutex_t m_mutex;
public:
btCriticalSectionPosix()
{
pthread_mutex_init( &m_mutex, NULL );
}
virtual ~btCriticalSectionPosix()
{
pthread_mutex_destroy( &m_mutex );
}
virtual void lock()
{
pthread_mutex_lock( &m_mutex );
}
virtual void unlock()
{
pthread_mutex_unlock( &m_mutex );
}
};
btCriticalSection* btThreadSupportPosix::createCriticalSection()
{
return new btCriticalSectionPosix();
}
void btThreadSupportPosix::deleteCriticalSection( btCriticalSection* cs )
{
delete cs;
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
{
return new btThreadSupportPosix( info );
}
#endif // BT_THREADSAFE && !defined( _WIN32 )

View File

@@ -0,0 +1,472 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#if defined( _WIN32 ) && BT_THREADSAFE
#include "LinearMath/btScalar.h"
#include "LinearMath/btMinMax.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btThreads.h"
#include "btThreadSupportInterface.h"
#include <windows.h>
#include <stdio.h>
struct btProcessorInfo
{
int numLogicalProcessors;
int numCores;
int numNumaNodes;
int numL1Cache;
int numL2Cache;
int numL3Cache;
int numPhysicalPackages;
static const int maxNumTeamMasks = 32;
int numTeamMasks;
UINT64 processorTeamMasks[ maxNumTeamMasks ];
};
UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return procInfo.processorTeamMasks[ i ];
}
}
return 0;
}
int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return i;
}
}
return -1;
}
int countSetBits( ULONG64 bits )
{
int count = 0;
while ( bits )
{
if ( bits & 1 )
{
count++;
}
bits >>= 1;
}
return count;
}
typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
void getProcessorInformation( btProcessorInfo* procInfo )
{
memset( procInfo, 0, sizeof( *procInfo ) );
Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
(Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
if ( getLogicalProcInfo == NULL )
{
// no info
return;
}
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
DWORD bufSize = 0;
while ( true )
{
if ( getLogicalProcInfo( buf, &bufSize ) )
{
break;
}
else
{
if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
{
if ( buf )
{
free( buf );
}
buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
}
}
}
int len = bufSize / sizeof( *buf );
for ( int i = 0; i < len; ++i )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
switch ( info->Relationship )
{
case RelationNumaNode:
procInfo->numNumaNodes++;
break;
case RelationProcessorCore:
procInfo->numCores++;
procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
break;
case RelationCache:
if ( info->Cache.Level == 1 )
{
procInfo->numL1Cache++;
}
else if ( info->Cache.Level == 2 )
{
procInfo->numL2Cache++;
}
else if ( info->Cache.Level == 3 )
{
procInfo->numL3Cache++;
// processors that share L3 cache are considered to be on the same team
// because they can more easily work together on the same data.
// Large performance penalties will occur if 2 or more threads from different
// teams attempt to frequently read and modify the same cache lines.
//
// On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
// 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
// CCXs are operating on the same data, many cycles will be spent keeping the
// two caches coherent.
if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
{
procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
procInfo->numTeamMasks++;
}
}
break;
case RelationProcessorPackage:
procInfo->numPhysicalPackages++;
break;
}
}
free( buf );
}
///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportWin32 : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
void* m_eventStartHandle;
char m_eventStartHandleName[ 32 ];
void* m_eventCompleteHandle;
char m_eventCompleteHandleName[ 32 ];
};
private:
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
btAlignedObjectArray<void*> m_completeHandles;
int m_numThreads;
DWORD_PTR m_startedThreadMask;
btProcessorInfo m_processorInfo;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
public:
btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportWin32();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
};
btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
{
startThreads( threadConstructionInfo );
}
btThreadSupportWin32::~btThreadSupportWin32()
{
stopThreads();
}
DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
{
btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
while ( 1 )
{
WaitForSingleObject( status->m_eventStartHandle, INFINITE );
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr );
status->m_status = 2;
SetEvent( status->m_eventCompleteHandle );
}
else
{
//exit Thread
status->m_status = 3;
printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
SetEvent( status->m_eventCompleteHandle );
break;
}
}
printf( "Thread TERMINATED\n" );
return 0;
}
void btThreadSupportWin32::runTask( int threadIndex, void* userData )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( threadIndex >= 0 );
btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
///fire event to start new task
SetEvent( threadStatus.m_eventStartHandle );
}
int btThreadSupportWin32::waitForResponse()
{
btAssert( m_activeThreadStatus.size() );
int last = -1;
DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
btAssert( res != WAIT_FAILED );
last = res - WAIT_OBJECT_0;
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btAssert( threadStatus.m_threadHandle );
btAssert( threadStatus.m_eventCompleteHandle );
//WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
///need to find an active spu
btAssert( last >= 0 );
m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
return last;
}
void btThreadSupportWin32::waitForAllTasks()
{
while ( m_startedThreadMask )
{
waitForResponse();
}
}
void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
{
static int uniqueId = 0;
uniqueId++;
btProcessorInfo& procInfo = m_processorInfo;
getProcessorInformation( &procInfo );
DWORD_PTR dwProcessAffinityMask = 0;
DWORD_PTR dwSystemAffinityMask = 0;
if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
{
dwProcessAffinityMask = 0;
}
///The number of threads should be equal to the number of available cores - 1
m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
m_activeThreadStatus.resize( m_numThreads );
m_completeHandles.resize( m_numThreads );
m_startedThreadMask = 0;
// set main thread affinity
if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
{
SetThreadAffinityMask( GetCurrentThread(), mask );
SetThreadIdealProcessor( GetCurrentThread(), 0 );
}
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
LPVOID lpParameter = &threadStatus;
DWORD dwCreationFlags = 0;
LPDWORD lpThreadId = 0;
threadStatus.m_userPtr = 0;
sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
//SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
// highest priority -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
//SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
{
int processorId = i + 1; // leave processor 0 for main thread
DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
if ( teamMask )
{
// bind each thread to only execute on processors of it's assigned team
// - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
// - for multi-socket Intel this will keep threads from migrating from one socket to another
// - for AMD Ryzen this will keep threads from migrating from one CCX to another
DWORD_PTR mask = teamMask & dwProcessAffinityMask;
if ( mask )
{
SetThreadAffinityMask( handle, mask );
}
}
SetThreadIdealProcessor( handle, processorId );
}
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_threadHandle = handle;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportWin32::stopThreads()
{
for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
if ( threadStatus.m_status > 0 )
{
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
}
threadStatus.m_userPtr = NULL;
SetEvent( threadStatus.m_eventStartHandle );
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
CloseHandle( threadStatus.m_eventCompleteHandle );
CloseHandle( threadStatus.m_eventStartHandle );
CloseHandle( threadStatus.m_threadHandle );
}
m_activeThreadStatus.clear();
m_completeHandles.clear();
}
class btWin32CriticalSection : public btCriticalSection
{
private:
CRITICAL_SECTION mCriticalSection;
public:
btWin32CriticalSection()
{
InitializeCriticalSection( &mCriticalSection );
}
~btWin32CriticalSection()
{
DeleteCriticalSection( &mCriticalSection );
}
void lock()
{
EnterCriticalSection( &mCriticalSection );
}
void unlock()
{
LeaveCriticalSection( &mCriticalSection );
}
};
btCriticalSection* btThreadSupportWin32::createCriticalSection()
{
unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
return cs;
}
void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
{
criticalSection->~btCriticalSection();
btAlignedFree( criticalSection );
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
{
return new btThreadSupportWin32( info );
}
#endif //defined(_WIN32) && BT_THREADSAFE

View File

@@ -453,6 +453,33 @@ void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBod
#endif// #if BT_THREADSAFE
}
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
{
#if BT_THREADSAFE
#if BT_DETECT_BAD_THREAD_INDEX
if ( !btThreadsAreRunning() )
{
// clear out thread ids
for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
{
gDebugThreadIds[ i ] = kInvalidThreadId;
}
}
#endif // #if BT_DETECT_BAD_THREAD_INDEX
btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
#else // #if BT_THREADSAFE
// non-parallel version of btParallelSum
btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
return body.sumLoop( iBegin, iEnd );
#endif //#else // #if BT_THREADSAFE
}
///
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
@@ -470,6 +497,11 @@ public:
BT_PROFILE( "parallelFor_sequential" );
body.forLoop( iBegin, iEnd );
}
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_sequential" );
return body.sumLoop( iBegin, iEnd );
}
};
@@ -514,11 +546,25 @@ public:
#pragma omp parallel for schedule( static, 1 )
for ( int i = iBegin; i < iEnd; i += grainSize )
{
BT_PROFILE( "OpenMP_job" );
BT_PROFILE( "OpenMP_forJob" );
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
}
btPopThreadsAreRunning();
}
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_OpenMP" );
btPushThreadsAreRunning();
btScalar sum = btScalar( 0 );
#pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
for ( int i = iBegin; i < iEnd; i += grainSize )
{
BT_PROFILE( "OpenMP_sumJob" );
sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
}
btPopThreadsAreRunning();
return sum;
}
};
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
@@ -571,22 +617,21 @@ public:
btResetThreadIndexCounter();
}
}
struct BodyAdapter
struct ForBodyAdapter
{
const btIParallelForBody* mBody;
ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
void operator()( const tbb::blocked_range<int>& range ) const
{
BT_PROFILE( "TBB_job" );
BT_PROFILE( "TBB_forJob" );
mBody->forLoop( range.begin(), range.end() );
}
};
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelFor_TBB" );
// TBB dispatch
BodyAdapter tbbBody;
tbbBody.mBody = &body;
ForBodyAdapter tbbBody( &body );
btPushThreadsAreRunning();
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody,
@@ -594,6 +639,29 @@ public:
);
btPopThreadsAreRunning();
}
struct SumBodyAdapter
{
const btIParallelSumBody* mBody;
btScalar mSum;
SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
void operator()( const tbb::blocked_range<int>& range )
{
BT_PROFILE( "TBB_sumJob" );
mSum += mBody->sumLoop( range.begin(), range.end() );
}
};
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_TBB" );
SumBodyAdapter tbbBody( &body );
btPushThreadsAreRunning();
tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
btPopThreadsAreRunning();
return tbbBody.mSum;
}
};
#endif // #if BT_USE_TBB && BT_THREADSAFE
@@ -605,6 +673,7 @@ public:
class btTaskSchedulerPPL : public btITaskScheduler
{
int m_numThreads;
concurrency::combinable<btScalar> m_sum; // for parallelSum
public:
btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
{
@@ -644,15 +713,16 @@ public:
btResetThreadIndexCounter();
}
}
struct BodyAdapter
struct ForBodyAdapter
{
const btIParallelForBody* mBody;
int mGrainSize;
int mIndexEnd;
ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
void operator()( int i ) const
{
BT_PROFILE( "PPL_job" );
BT_PROFILE( "PPL_forJob" );
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
}
};
@@ -660,10 +730,7 @@ public:
{
BT_PROFILE( "parallelFor_PPL" );
// PPL dispatch
BodyAdapter pplBody;
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
ForBodyAdapter pplBody( &body, grainSize, iEnd );
btPushThreadsAreRunning();
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
@@ -673,6 +740,36 @@ public:
);
btPopThreadsAreRunning();
}
struct SumBodyAdapter
{
const btIParallelSumBody* mBody;
concurrency::combinable<btScalar>* mSum;
int mGrainSize;
int mIndexEnd;
SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
void operator()( int i ) const
{
BT_PROFILE( "PPL_sumJob" );
mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
}
};
static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
{
BT_PROFILE( "parallelSum_PPL" );
m_sum.clear();
SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
btPushThreadsAreRunning();
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
btPopThreadsAreRunning();
return m_sum.combine( sumFunc );
}
};
#endif // #if BT_USE_PPL && BT_THREADSAFE

View File

@@ -107,6 +107,17 @@ public:
virtual void forLoop( int iBegin, int iEnd ) const = 0;
};
//
// btIParallelSumBody -- subclass this to express work that can be done in parallel
// and produces a sum over all loop elements
//
class btIParallelSumBody
{
public:
virtual ~btIParallelSumBody() {}
virtual btScalar sumLoop( int iBegin, int iEnd ) const = 0;
};
//
// btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to
// worker threads
@@ -122,6 +133,8 @@ public:
virtual int getNumThreads() const = 0;
virtual void setNumThreads( int numThreads ) = 0;
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0;
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) = 0;
virtual void sleepWorkerThreadsHint() {} // hint the task scheduler that we may not be using these threads for a little while
// internal use only
virtual void activate();
@@ -143,6 +156,9 @@ btITaskScheduler* btGetTaskScheduler();
// get non-threaded task scheduler (always available)
btITaskScheduler* btGetSequentialTaskScheduler();
// create a default task scheduler (Win32 or pthreads based)
btITaskScheduler* btCreateDefaultTaskScheduler();
// get OpenMP task scheduler (if available, otherwise returns null)
btITaskScheduler* btGetOpenMPTaskScheduler();
@@ -156,5 +172,9 @@ btITaskScheduler* btGetPPLTaskScheduler();
// (iterations may be done out of order, so no dependencies are allowed)
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body );
// btParallelSum -- call this to dispatch work like a for-loop, returns the sum of all iterations
// (iterations may be done out of order, so no dependencies are allowed)
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body );
#endif

View File

@@ -9,5 +9,7 @@
}
files {
"*.cpp",
"*.h"
"*.h",
"TaskScheduler/*.cpp",
"TaskScheduler/*.h"
}