Merge pull request #1579 from lunkhound/parallel-solver-wip3
Multithreaded constraint solver
This commit is contained in:
@@ -28,14 +28,14 @@ OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON)
|
||||
OPTION(BUILD_SHARED_LIBS "Use shared libraries" OFF)
|
||||
OPTION(USE_SOFT_BODY_MULTI_BODY_DYNAMICS_WORLD "Use btSoftMultiBodyDynamicsWorld" ON)
|
||||
|
||||
OPTION(BULLET2_USE_THREAD_LOCKS "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF)
|
||||
IF (BULLET2_USE_THREAD_LOCKS)
|
||||
OPTION(BULLET2_MULTITHREADING "Build Bullet 2 libraries with mutex locking around certain operations (required for multi-threading)" OFF)
|
||||
IF (BULLET2_MULTITHREADING)
|
||||
OPTION(BULLET2_USE_OPEN_MP_MULTITHREADING "Build Bullet 2 with support for multi-threading with OpenMP (requires a compiler with OpenMP support)" OFF)
|
||||
OPTION(BULLET2_USE_TBB_MULTITHREADING "Build Bullet 2 with support for multi-threading with Intel Threading Building Blocks (requires the TBB library to be already installed)" OFF)
|
||||
IF (MSVC)
|
||||
OPTION(BULLET2_USE_PPL_MULTITHREADING "Build Bullet 2 with support for multi-threading with Microsoft Parallel Patterns Library (requires MSVC compiler)" OFF)
|
||||
ENDIF (MSVC)
|
||||
ENDIF (BULLET2_USE_THREAD_LOCKS)
|
||||
ENDIF (BULLET2_MULTITHREADING)
|
||||
|
||||
|
||||
IF(NOT WIN32)
|
||||
@@ -225,12 +225,15 @@ IF(USE_GRAPHICAL_BENCHMARK)
|
||||
ADD_DEFINITIONS( -DUSE_GRAPHICAL_BENCHMARK)
|
||||
ENDIF (USE_GRAPHICAL_BENCHMARK)
|
||||
|
||||
IF(BULLET2_USE_THREAD_LOCKS)
|
||||
IF(BULLET2_MULTITHREADING)
|
||||
ADD_DEFINITIONS( -DBT_THREADSAFE=1 )
|
||||
IF (NOT MSVC)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
ENDIF (NOT MSVC)
|
||||
ENDIF (BULLET2_USE_THREAD_LOCKS)
|
||||
IF (NOT WIN32)
|
||||
LINK_LIBRARIES( pthread )
|
||||
ENDIF (NOT WIN32)
|
||||
ENDIF (BULLET2_MULTITHREADING)
|
||||
|
||||
IF (BULLET2_USE_OPEN_MP_MULTITHREADING)
|
||||
ADD_DEFINITIONS("-DBT_USE_OPENMP=1")
|
||||
|
||||
@@ -182,6 +182,14 @@ end
|
||||
trigger = "audio",
|
||||
description = "Enable audio"
|
||||
}
|
||||
newoption
|
||||
{
|
||||
trigger = "enable_multithreading",
|
||||
description = "enable CPU multithreading for bullet2 libs"
|
||||
}
|
||||
if _OPTIONS["enable_multithreading"] then
|
||||
defines {"BT_THREADSAFE=1"}
|
||||
end
|
||||
if _OPTIONS["double"] then
|
||||
defines {"BT_USE_DOUBLE_PRECISION"}
|
||||
end
|
||||
|
||||
@@ -226,7 +226,6 @@ SET(BulletExampleBrowser_SRCS
|
||||
../MultiThreading/b3PosixThreadSupport.cpp
|
||||
../MultiThreading/b3Win32ThreadSupport.cpp
|
||||
../MultiThreading/b3ThreadSupportInterface.cpp
|
||||
../MultiThreading/btTaskScheduler.cpp
|
||||
../RenderingExamples/TinyRendererSetup.cpp
|
||||
../RenderingExamples/TimeSeriesCanvas.cpp
|
||||
../RenderingExamples/TimeSeriesCanvas.h
|
||||
|
||||
@@ -29,17 +29,17 @@ class btCollisionShape;
|
||||
#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h"
|
||||
#include "BulletDynamics/Dynamics/btSimulationIslandManagerMt.h" // for setSplitIslands()
|
||||
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btNNCGConstraintSolver.h"
|
||||
#include "BulletDynamics/MLCPSolvers/btMLCPSolver.h"
|
||||
#include "BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h"
|
||||
#include "BulletDynamics/MLCPSolvers/btDantzigSolver.h"
|
||||
#include "BulletDynamics/MLCPSolvers/btLemkeSolver.h"
|
||||
#include "../MultiThreading/btTaskScheduler.h"
|
||||
|
||||
|
||||
static int gNumIslands = 0;
|
||||
|
||||
bool gAllowNestedParallelForLoops = false;
|
||||
|
||||
class Profiler
|
||||
{
|
||||
@@ -52,6 +52,10 @@ public:
|
||||
kRecordPredictUnconstrainedMotion,
|
||||
kRecordCreatePredictiveContacts,
|
||||
kRecordIntegrateTransforms,
|
||||
kRecordSolverTotal,
|
||||
kRecordSolverSetup,
|
||||
kRecordSolverIterations,
|
||||
kRecordSolverFinish,
|
||||
kRecordCount
|
||||
};
|
||||
|
||||
@@ -139,6 +143,41 @@ static void profileEndCallback( btDynamicsWorld *world, btScalar timeStep )
|
||||
}
|
||||
|
||||
|
||||
class MySequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolverMt
|
||||
{
|
||||
typedef btSequentialImpulseConstraintSolverMt ParentClass;
|
||||
public:
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
MySequentialImpulseConstraintSolverMt() {}
|
||||
|
||||
// for profiling
|
||||
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE
|
||||
{
|
||||
ProfileHelper prof(Profiler::kRecordSolverSetup);
|
||||
btScalar ret = ParentClass::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
|
||||
return ret;
|
||||
}
|
||||
virtual btScalar solveGroupCacheFriendlyIterations( btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer ) BT_OVERRIDE
|
||||
{
|
||||
ProfileHelper prof(Profiler::kRecordSolverIterations);
|
||||
btScalar ret = ParentClass::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer );
|
||||
return ret;
|
||||
}
|
||||
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal) BT_OVERRIDE
|
||||
{
|
||||
ProfileHelper prof(Profiler::kRecordSolverFinish);
|
||||
btScalar ret = ParentClass::solveGroupCacheFriendlyFinish(bodies, numBodies, infoGlobal);
|
||||
return ret;
|
||||
}
|
||||
virtual btScalar solveGroup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) BT_OVERRIDE
|
||||
{
|
||||
ProfileHelper prof(Profiler::kRecordSolverTotal);
|
||||
btScalar ret = ParentClass::solveGroup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer, dispatcher);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
///
|
||||
/// MyCollisionDispatcher -- subclassed for profiling purposes
|
||||
///
|
||||
@@ -161,11 +200,11 @@ public:
|
||||
///
|
||||
/// myParallelIslandDispatch -- wrap default parallel dispatch for profiling and to get the number of simulation islands
|
||||
//
|
||||
void myParallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, btSimulationIslandManagerMt::IslandCallback* callback )
|
||||
void myParallelIslandDispatch( btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr, const btSimulationIslandManagerMt::SolverParams& solverParams)
|
||||
{
|
||||
ProfileHelper prof( Profiler::kRecordDispatchIslands );
|
||||
gNumIslands = islandsPtr->size();
|
||||
btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, callback );
|
||||
btSimulationIslandManagerMt::parallelIslandDispatch( islandsPtr, solverParams );
|
||||
}
|
||||
|
||||
|
||||
@@ -200,9 +239,10 @@ public:
|
||||
MyDiscreteDynamicsWorld( btDispatcher* dispatcher,
|
||||
btBroadphaseInterface* pairCache,
|
||||
btConstraintSolverPoolMt* constraintSolver,
|
||||
btSequentialImpulseConstraintSolverMt* constraintSolverMt,
|
||||
btCollisionConfiguration* collisionConfiguration
|
||||
) :
|
||||
btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, collisionConfiguration )
|
||||
btDiscreteDynamicsWorldMt( dispatcher, pairCache, constraintSolver, constraintSolverMt, collisionConfiguration )
|
||||
{
|
||||
btSimulationIslandManagerMt* islandMgr = static_cast<btSimulationIslandManagerMt*>( m_islandManager );
|
||||
islandMgr->setIslandDispatchFunction( myParallelIslandDispatch );
|
||||
@@ -218,6 +258,8 @@ btConstraintSolver* createSolverByType( SolverType t )
|
||||
{
|
||||
case SOLVER_TYPE_SEQUENTIAL_IMPULSE:
|
||||
return new btSequentialImpulseConstraintSolver();
|
||||
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT:
|
||||
return new MySequentialImpulseConstraintSolverMt();
|
||||
case SOLVER_TYPE_NNCG:
|
||||
return new btNNCGConstraintSolver();
|
||||
case SOLVER_TYPE_MLCP_PGS:
|
||||
@@ -253,7 +295,7 @@ public:
|
||||
{
|
||||
addTaskScheduler( btGetSequentialTaskScheduler() );
|
||||
#if BT_THREADSAFE
|
||||
if ( btITaskScheduler* ts = createDefaultTaskScheduler() )
|
||||
if ( btITaskScheduler* ts = btCreateDefaultTaskScheduler() )
|
||||
{
|
||||
m_allocatedTaskSchedulers.push_back( ts );
|
||||
addTaskScheduler( ts );
|
||||
@@ -306,11 +348,12 @@ static btTaskSchedulerManager gTaskSchedulerMgr;
|
||||
#if BT_THREADSAFE
|
||||
static bool gMultithreadedWorld = true;
|
||||
static bool gDisplayProfileInfo = true;
|
||||
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT;
|
||||
#else
|
||||
static bool gMultithreadedWorld = false;
|
||||
static bool gDisplayProfileInfo = false;
|
||||
#endif
|
||||
static SolverType gSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
|
||||
#endif
|
||||
static int gSolverMode = SOLVER_SIMD |
|
||||
SOLVER_USE_WARMSTARTING |
|
||||
// SOLVER_RANDMIZE_ORDER |
|
||||
@@ -318,9 +361,11 @@ static int gSolverMode = SOLVER_SIMD |
|
||||
// SOLVER_USE_2_FRICTION_DIRECTIONS |
|
||||
0;
|
||||
static btScalar gSliderSolverIterations = 10.0f; // should be int
|
||||
|
||||
static btScalar gSliderNumThreads = 1.0f; // should be int
|
||||
|
||||
static btScalar gSliderIslandBatchingThreshold = 0.0f; // should be int
|
||||
static btScalar gSliderMinBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_minBatchSize); // should be int
|
||||
static btScalar gSliderMaxBatchSize = btScalar(btSequentialImpulseConstraintSolverMt::s_maxBatchSize); // should be int
|
||||
static btScalar gSliderLeastSquaresResidualThreshold = 0.0f;
|
||||
|
||||
////////////////////////////////////
|
||||
CommonRigidBodyMTBase::CommonRigidBodyMTBase( struct GUIHelperInterface* helper )
|
||||
@@ -419,6 +464,23 @@ void setTaskSchedulerComboBoxCallback(int combobox, const char* item, void* user
|
||||
}
|
||||
|
||||
|
||||
void setBatchingMethodComboBoxCallback(int combobox, const char* item, void* userPointer)
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
const char** items = static_cast<const char**>( userPointer );
|
||||
for ( int i = 0; i < btBatchedConstraints::BATCHING_METHOD_COUNT; ++i )
|
||||
{
|
||||
if ( strcmp( item, items[ i ] ) == 0 )
|
||||
{
|
||||
// change the task scheduler
|
||||
btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod = static_cast<btBatchedConstraints::BatchingMethod>( i );
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
|
||||
static void setThreadCountCallback(float val, void* userPtr)
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
@@ -435,13 +497,43 @@ static void setSolverIterationCountCallback(float val, void* userPtr)
|
||||
}
|
||||
}
|
||||
|
||||
static void setLargeIslandManifoldCountCallback( float val, void* userPtr )
|
||||
{
|
||||
btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching = int( gSliderIslandBatchingThreshold );
|
||||
}
|
||||
|
||||
static void setMinBatchSizeCallback( float val, void* userPtr )
|
||||
{
|
||||
gSliderMaxBatchSize = (std::max)(gSliderMinBatchSize, gSliderMaxBatchSize);
|
||||
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
|
||||
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
|
||||
}
|
||||
|
||||
static void setMaxBatchSizeCallback( float val, void* userPtr )
|
||||
{
|
||||
gSliderMinBatchSize = (std::min)(gSliderMinBatchSize, gSliderMaxBatchSize);
|
||||
btSequentialImpulseConstraintSolverMt::s_minBatchSize = int(gSliderMinBatchSize);
|
||||
btSequentialImpulseConstraintSolverMt::s_maxBatchSize = int(gSliderMaxBatchSize);
|
||||
}
|
||||
|
||||
static void setLeastSquaresResidualThresholdCallback( float val, void* userPtr )
|
||||
{
|
||||
if (btDiscreteDynamicsWorld* world = reinterpret_cast<btDiscreteDynamicsWorld*>(userPtr))
|
||||
{
|
||||
world->getSolverInfo().m_leastSquaresResidualThreshold = gSliderLeastSquaresResidualThreshold;
|
||||
}
|
||||
}
|
||||
|
||||
void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
|
||||
{
|
||||
gNumIslands = 0;
|
||||
m_solverType = gSolverType;
|
||||
#if BT_THREADSAFE && (BT_USE_OPENMP || BT_USE_PPL || BT_USE_TBB)
|
||||
#if BT_THREADSAFE
|
||||
btAssert( btGetTaskScheduler() != NULL );
|
||||
if (NULL != btGetTaskScheduler() && gTaskSchedulerMgr.getNumTaskSchedulers() > 1)
|
||||
{
|
||||
m_multithreadCapable = true;
|
||||
}
|
||||
#endif
|
||||
if ( gMultithreadedWorld )
|
||||
{
|
||||
@@ -457,16 +549,28 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
|
||||
|
||||
btConstraintSolverPoolMt* solverPool;
|
||||
{
|
||||
SolverType poolSolverType = m_solverType;
|
||||
if (poolSolverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT)
|
||||
{
|
||||
// pool solvers shouldn't be parallel solvers, we don't allow that kind of
|
||||
// nested parallelism because of performance issues
|
||||
poolSolverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
|
||||
}
|
||||
btConstraintSolver* solvers[ BT_MAX_THREAD_COUNT ];
|
||||
int maxThreadCount = BT_MAX_THREAD_COUNT;
|
||||
for ( int i = 0; i < maxThreadCount; ++i )
|
||||
{
|
||||
solvers[ i ] = createSolverByType( m_solverType );
|
||||
solvers[ i ] = createSolverByType( poolSolverType );
|
||||
}
|
||||
solverPool = new btConstraintSolverPoolMt( solvers, maxThreadCount );
|
||||
m_solver = solverPool;
|
||||
}
|
||||
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, m_collisionConfiguration );
|
||||
btSequentialImpulseConstraintSolverMt* solverMt = NULL;
|
||||
if ( m_solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
|
||||
{
|
||||
solverMt = new MySequentialImpulseConstraintSolverMt();
|
||||
}
|
||||
btDiscreteDynamicsWorld* world = new MyDiscreteDynamicsWorld( m_dispatcher, m_broadphase, solverPool, solverMt, m_collisionConfiguration );
|
||||
m_dynamicsWorld = world;
|
||||
m_multithreadedWorld = true;
|
||||
btAssert( btGetTaskScheduler() != NULL );
|
||||
@@ -486,7 +590,14 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
|
||||
|
||||
m_broadphase = new btDbvtBroadphase();
|
||||
|
||||
m_solver = createSolverByType( m_solverType );
|
||||
SolverType solverType = m_solverType;
|
||||
if ( solverType == SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT )
|
||||
{
|
||||
// using the parallel solver with the single-threaded world works, but is
|
||||
// disabled here to avoid confusion
|
||||
solverType = SOLVER_TYPE_SEQUENTIAL_IMPULSE;
|
||||
}
|
||||
m_solver = createSolverByType( solverType );
|
||||
|
||||
m_dynamicsWorld = new btDiscreteDynamicsWorld( m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration );
|
||||
}
|
||||
@@ -494,6 +605,7 @@ void CommonRigidBodyMTBase::createEmptyDynamicsWorld()
|
||||
m_dynamicsWorld->setInternalTickCallback( profileEndCallback, NULL, false );
|
||||
m_dynamicsWorld->setGravity( btVector3( 0, -10, 0 ) );
|
||||
m_dynamicsWorld->getSolverInfo().m_solverMode = gSolverMode;
|
||||
m_dynamicsWorld->getSolverInfo().m_numIterations = btMax(1, int(gSliderSolverIterations));
|
||||
createDefaultParameters();
|
||||
}
|
||||
|
||||
@@ -504,16 +616,18 @@ void CommonRigidBodyMTBase::createDefaultParameters()
|
||||
{
|
||||
// create a button to toggle multithreaded world
|
||||
ButtonParams button( "Multithreaded world enable", 0, true );
|
||||
button.m_initialState = gMultithreadedWorld;
|
||||
button.m_userPointer = &gMultithreadedWorld;
|
||||
bool* ptr = &gMultithreadedWorld;
|
||||
button.m_initialState = *ptr;
|
||||
button.m_userPointer = ptr;
|
||||
button.m_callback = boolPtrButtonCallback;
|
||||
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
|
||||
}
|
||||
{
|
||||
// create a button to toggle profile printing
|
||||
ButtonParams button( "Display solver info", 0, true );
|
||||
button.m_initialState = gDisplayProfileInfo;
|
||||
button.m_userPointer = &gDisplayProfileInfo;
|
||||
bool* ptr = &gDisplayProfileInfo;
|
||||
button.m_initialState = *ptr;
|
||||
button.m_userPointer = ptr;
|
||||
button.m_callback = boolPtrButtonCallback;
|
||||
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
|
||||
}
|
||||
@@ -544,6 +658,16 @@ void CommonRigidBodyMTBase::createDefaultParameters()
|
||||
slider.m_clampToIntegers = true;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
// a slider for the solver leastSquaresResidualThreshold (used to run fewer solver iterations when convergence is good)
|
||||
SliderParams slider( "Solver residual thresh", &gSliderLeastSquaresResidualThreshold );
|
||||
slider.m_minVal = 0.0f;
|
||||
slider.m_maxVal = 0.25f;
|
||||
slider.m_callback = setLeastSquaresResidualThresholdCallback;
|
||||
slider.m_userPointer = m_dynamicsWorld;
|
||||
slider.m_clampToIntegers = false;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
ButtonParams button( "Solver use SIMD", 0, true );
|
||||
button.m_buttonId = SOLVER_SIMD;
|
||||
@@ -618,20 +742,86 @@ void CommonRigidBodyMTBase::createDefaultParameters()
|
||||
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
|
||||
}
|
||||
{
|
||||
// create a slider to set the number of threads to use
|
||||
int numThreads = btGetTaskScheduler()->getNumThreads();
|
||||
// if slider has not been set yet (by another demo),
|
||||
if ( gSliderNumThreads <= 1.0f )
|
||||
{
|
||||
// create a slider to set the number of threads to use
|
||||
int numThreads = btGetTaskScheduler()->getNumThreads();
|
||||
gSliderNumThreads = float( numThreads );
|
||||
}
|
||||
int maxNumThreads = btGetTaskScheduler()->getMaxNumThreads();
|
||||
SliderParams slider("Thread count", &gSliderNumThreads);
|
||||
slider.m_minVal = 1.0f;
|
||||
slider.m_maxVal = float( BT_MAX_THREAD_COUNT );
|
||||
slider.m_maxVal = float( maxNumThreads );
|
||||
slider.m_callback = setThreadCountCallback;
|
||||
slider.m_clampToIntegers = true;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
// a slider for the number of manifolds an island needs to be too large for parallel dispatch
|
||||
if (gSliderIslandBatchingThreshold < 1.0)
|
||||
{
|
||||
gSliderIslandBatchingThreshold = float(btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching);
|
||||
}
|
||||
SliderParams slider( "IslandBatchThresh", &gSliderIslandBatchingThreshold );
|
||||
slider.m_minVal = 1.0f;
|
||||
slider.m_maxVal = 2000.0f;
|
||||
slider.m_callback = setLargeIslandManifoldCountCallback;
|
||||
slider.m_userPointer = NULL;
|
||||
slider.m_clampToIntegers = true;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
// create a combo box for selecting the batching method
|
||||
static const char* sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_COUNT ];
|
||||
{
|
||||
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_2D ] = "Batching: 2D Grid";
|
||||
sBatchingMethodComboBoxItems[ btBatchedConstraints::BATCHING_METHOD_SPATIAL_GRID_3D ] = "Batching: 3D Grid";
|
||||
};
|
||||
ComboBoxParams comboParams;
|
||||
comboParams.m_userPointer = sBatchingMethodComboBoxItems;
|
||||
comboParams.m_numItems = btBatchedConstraints::BATCHING_METHOD_COUNT;
|
||||
comboParams.m_startItem = static_cast<int>(btSequentialImpulseConstraintSolverMt::s_contactBatchingMethod);
|
||||
comboParams.m_items = sBatchingMethodComboBoxItems;
|
||||
comboParams.m_callback = setBatchingMethodComboBoxCallback;
|
||||
m_guiHelper->getParameterInterface()->registerComboBox( comboParams );
|
||||
}
|
||||
{
|
||||
// a slider for the sequentialImpulseConstraintSolverMt min batch size (when batching)
|
||||
SliderParams slider( "Min batch size", &gSliderMinBatchSize );
|
||||
slider.m_minVal = 1.0f;
|
||||
slider.m_maxVal = 1000.0f;
|
||||
slider.m_callback = setMinBatchSizeCallback;
|
||||
slider.m_userPointer = NULL;
|
||||
slider.m_clampToIntegers = true;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
// a slider for the sequentialImpulseConstraintSolverMt max batch size (when batching)
|
||||
SliderParams slider( "Max batch size", &gSliderMaxBatchSize );
|
||||
slider.m_minVal = 1.0f;
|
||||
slider.m_maxVal = 1000.0f;
|
||||
slider.m_callback = setMaxBatchSizeCallback;
|
||||
slider.m_userPointer = NULL;
|
||||
slider.m_clampToIntegers = true;
|
||||
m_guiHelper->getParameterInterface()->registerSliderFloatParameter( slider );
|
||||
}
|
||||
{
|
||||
// create a button to toggle debug drawing of batching visualization
|
||||
ButtonParams button( "Visualize batching", 0, true );
|
||||
bool* ptr = &btBatchedConstraints::s_debugDrawBatches;
|
||||
button.m_initialState = *ptr;
|
||||
button.m_userPointer = ptr;
|
||||
button.m_callback = boolPtrButtonCallback;
|
||||
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
|
||||
}
|
||||
{
|
||||
ButtonParams button( "Allow Nested ParallelFor", 0, true );
|
||||
button.m_initialState = btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
|
||||
button.m_userPointer = &btSequentialImpulseConstraintSolverMt::s_allowNestedParallelForLoops;
|
||||
button.m_callback = boolPtrButtonCallback;
|
||||
m_guiHelper->getParameterInterface()->registerButtonParameter( button );
|
||||
}
|
||||
#endif // #if BT_THREADSAFE
|
||||
}
|
||||
}
|
||||
@@ -643,6 +833,7 @@ void CommonRigidBodyMTBase::drawScreenText()
|
||||
int xCoord = 400;
|
||||
int yCoord = 30;
|
||||
int yStep = 30;
|
||||
int indent = 30;
|
||||
if (m_solverType != gSolverType)
|
||||
{
|
||||
sprintf( msg, "restart example to change solver type" );
|
||||
@@ -721,6 +912,34 @@ void CommonRigidBodyMTBase::drawScreenText()
|
||||
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
|
||||
yCoord += yStep;
|
||||
|
||||
sprintf( msg,
|
||||
"SolverTotal %5.3f ms",
|
||||
gProfiler.getAverageTime( Profiler::kRecordSolverTotal )*0.001f
|
||||
);
|
||||
m_guiHelper->getAppInterface()->drawText( msg, xCoord, yCoord, 0.4f );
|
||||
yCoord += yStep;
|
||||
|
||||
sprintf( msg,
|
||||
"SolverSetup %5.3f ms",
|
||||
gProfiler.getAverageTime( Profiler::kRecordSolverSetup )*0.001f
|
||||
);
|
||||
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
|
||||
yCoord += yStep;
|
||||
|
||||
sprintf( msg,
|
||||
"SolverIterations %5.3f ms",
|
||||
gProfiler.getAverageTime( Profiler::kRecordSolverIterations )*0.001f
|
||||
);
|
||||
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
|
||||
yCoord += yStep;
|
||||
|
||||
sprintf( msg,
|
||||
"SolverFinish %5.3f ms",
|
||||
gProfiler.getAverageTime( Profiler::kRecordSolverFinish )*0.001f
|
||||
);
|
||||
m_guiHelper->getAppInterface()->drawText( msg, xCoord + indent, yCoord, 0.4f );
|
||||
yCoord += yStep;
|
||||
|
||||
sprintf( msg,
|
||||
"PredictUnconstrainedMotion %5.3f ms",
|
||||
gProfiler.getAverageTime( Profiler::kRecordPredictUnconstrainedMotion )*0.001f
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
enum SolverType
|
||||
{
|
||||
SOLVER_TYPE_SEQUENTIAL_IMPULSE,
|
||||
SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT,
|
||||
SOLVER_TYPE_NNCG,
|
||||
SOLVER_TYPE_MLCP_PGS,
|
||||
SOLVER_TYPE_MLCP_DANTZIG,
|
||||
@@ -27,6 +28,7 @@ inline const char* getSolverTypeName( SolverType t )
|
||||
switch (t)
|
||||
{
|
||||
case SOLVER_TYPE_SEQUENTIAL_IMPULSE: return "SequentialImpulse";
|
||||
case SOLVER_TYPE_SEQUENTIAL_IMPULSE_MT: return "SequentialImpulseMt";
|
||||
case SOLVER_TYPE_NNCG: return "NNCG";
|
||||
case SOLVER_TYPE_MLCP_PGS: return "MLCP ProjectedGaussSeidel";
|
||||
case SOLVER_TYPE_MLCP_DANTZIG: return "MLCP Dantzig";
|
||||
|
||||
@@ -25,10 +25,10 @@ subject to the following restrictions:
|
||||
|
||||
|
||||
|
||||
static btScalar gSliderStackRows = 8.0f;
|
||||
static btScalar gSliderStackColumns = 6.0f;
|
||||
static btScalar gSliderStackHeight = 10.0f;
|
||||
static btScalar gSliderStackWidth = 1.0f;
|
||||
static btScalar gSliderStackRows = 1.0f;
|
||||
static btScalar gSliderStackColumns = 1.0f;
|
||||
static btScalar gSliderStackHeight = 15.0f;
|
||||
static btScalar gSliderStackWidth = 8.0f;
|
||||
static btScalar gSliderGroundHorizontalAmplitude = 0.0f;
|
||||
static btScalar gSliderGroundVerticalAmplitude = 0.0f;
|
||||
static btScalar gSliderGroundTilt = 0.0f;
|
||||
@@ -75,6 +75,21 @@ public:
|
||||
btScalar tilt = gSliderGroundTilt * SIMD_2_PI / 360.0f;
|
||||
return btQuaternion( btVector3( 1.0f, 0.0f, 0.0f ), tilt );
|
||||
}
|
||||
struct TestSumBody : public btIParallelSumBody
|
||||
{
|
||||
virtual btScalar sumLoop( int iBegin, int iEnd ) const BT_OVERRIDE
|
||||
{
|
||||
btScalar sum = 0.0f;
|
||||
for (int i = iBegin; i < iEnd; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
sum += 1.0f / btScalar(i);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
};
|
||||
virtual void stepSimulation( float deltaTime ) BT_OVERRIDE
|
||||
{
|
||||
if ( m_dynamicsWorld )
|
||||
@@ -115,6 +130,14 @@ public:
|
||||
// always step by 1/60 for benchmarking
|
||||
m_dynamicsWorld->stepSimulation( 1.0f / 60.0f, 0 );
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
// test parallelSum
|
||||
TestSumBody testSumBody;
|
||||
float testSum = btParallelSum( 1, 10000000, 10000, testSumBody );
|
||||
printf( "sum = %f\n", testSum );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void initPhysics() BT_OVERRIDE;
|
||||
|
||||
@@ -1,448 +0,0 @@
|
||||
|
||||
#include "LinearMath/btTransform.h"
|
||||
#include "../Utils/b3Clock.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "LinearMath/btThreads.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
typedef void( *btThreadFunc )( void* userPtr, void* lsMemory );
|
||||
typedef void* ( *btThreadLocalStorageFunc )();
|
||||
|
||||
#if BT_THREADSAFE
|
||||
|
||||
#if defined( _WIN32 )
|
||||
|
||||
#include "b3Win32ThreadSupport.h"
|
||||
|
||||
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName )
|
||||
{
|
||||
b3Win32ThreadSupport::Win32ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
|
||||
//constructionInfo.m_priority = 0; // highest priority (the default) -- can cause erratic performance when numThreads > numCores
|
||||
// we don't want worker threads to be higher priority than the main thread or the main thread could get
|
||||
// totally shut out and unable to tell the workers to stop
|
||||
constructionInfo.m_priority = -1; // normal priority
|
||||
b3Win32ThreadSupport* threadSupport = new b3Win32ThreadSupport( constructionInfo );
|
||||
return threadSupport;
|
||||
}
|
||||
|
||||
#else // #if defined( _WIN32 )
|
||||
|
||||
#include "b3PosixThreadSupport.h"
|
||||
|
||||
b3ThreadSupportInterface* createThreadSupport( int numThreads, btThreadFunc threadFunc, btThreadLocalStorageFunc localStoreFunc, const char* uniqueName)
|
||||
{
|
||||
b3PosixThreadSupport::ThreadConstructionInfo constructionInfo( uniqueName, threadFunc, localStoreFunc, numThreads );
|
||||
b3ThreadSupportInterface* threadSupport = new b3PosixThreadSupport( constructionInfo );
|
||||
return threadSupport;
|
||||
}
|
||||
|
||||
#endif // #else // #if defined( _WIN32 )
|
||||
|
||||
|
||||
///
|
||||
/// getNumHardwareThreads()
|
||||
///
|
||||
///
|
||||
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
|
||||
///
|
||||
#if __cplusplus >= 201103L
|
||||
|
||||
#include <thread>
|
||||
|
||||
int getNumHardwareThreads()
|
||||
{
|
||||
return std::thread::hardware_concurrency();
|
||||
}
|
||||
|
||||
#elif defined( _WIN32 )
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
int getNumHardwareThreads()
|
||||
{
|
||||
// caps out at 32
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo( &info );
|
||||
return info.dwNumberOfProcessors;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int getNumHardwareThreads()
|
||||
{
|
||||
return 0; // don't know
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
struct WorkerThreadStatus
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
kInvalid,
|
||||
kWaitingForWork,
|
||||
kWorking,
|
||||
kSleeping,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct IJob
|
||||
{
|
||||
virtual void executeJob() = 0;
|
||||
};
|
||||
|
||||
class ParallelForJob : public IJob
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
int mBegin;
|
||||
int mEnd;
|
||||
|
||||
public:
|
||||
ParallelForJob()
|
||||
{
|
||||
mBody = NULL;
|
||||
mBegin = 0;
|
||||
mEnd = 0;
|
||||
}
|
||||
void init( int iBegin, int iEnd, const btIParallelForBody& body )
|
||||
{
|
||||
mBody = &body;
|
||||
mBegin = iBegin;
|
||||
mEnd = iEnd;
|
||||
}
|
||||
virtual void executeJob() BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "executeJob" );
|
||||
|
||||
// call the functor body to do the work
|
||||
mBody->forLoop( mBegin, mEnd );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct JobContext
|
||||
{
|
||||
JobContext()
|
||||
{
|
||||
m_queueLock = NULL;
|
||||
m_headIndex = 0;
|
||||
m_tailIndex = 0;
|
||||
m_workersShouldCheckQueue = false;
|
||||
m_useSpinMutex = false;
|
||||
}
|
||||
b3CriticalSection* m_queueLock;
|
||||
btSpinMutex m_mutex;
|
||||
volatile bool m_workersShouldCheckQueue;
|
||||
|
||||
btAlignedObjectArray<IJob*> m_jobQueue;
|
||||
bool m_queueIsEmpty;
|
||||
int m_tailIndex;
|
||||
int m_headIndex;
|
||||
bool m_useSpinMutex;
|
||||
|
||||
void lockQueue()
|
||||
{
|
||||
if ( m_useSpinMutex )
|
||||
{
|
||||
m_mutex.lock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_queueLock->lock();
|
||||
}
|
||||
}
|
||||
void unlockQueue()
|
||||
{
|
||||
if ( m_useSpinMutex )
|
||||
{
|
||||
m_mutex.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_queueLock->unlock();
|
||||
}
|
||||
}
|
||||
void clearQueue()
|
||||
{
|
||||
lockQueue();
|
||||
m_headIndex = 0;
|
||||
m_tailIndex = 0;
|
||||
m_queueIsEmpty = true;
|
||||
unlockQueue();
|
||||
m_jobQueue.resizeNoInitialize( 0 );
|
||||
}
|
||||
void submitJob( IJob* job )
|
||||
{
|
||||
m_jobQueue.push_back( job );
|
||||
lockQueue();
|
||||
m_tailIndex++;
|
||||
m_queueIsEmpty = false;
|
||||
unlockQueue();
|
||||
}
|
||||
IJob* consumeJob()
|
||||
{
|
||||
if ( m_queueIsEmpty )
|
||||
{
|
||||
// lock free path. even if this is taken erroneously it isn't harmful
|
||||
return NULL;
|
||||
}
|
||||
IJob* job = NULL;
|
||||
lockQueue();
|
||||
if ( !m_queueIsEmpty )
|
||||
{
|
||||
job = m_jobQueue[ m_headIndex++ ];
|
||||
if ( m_headIndex == m_tailIndex )
|
||||
{
|
||||
m_queueIsEmpty = true;
|
||||
}
|
||||
}
|
||||
unlockQueue();
|
||||
return job;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct WorkerThreadLocalStorage
|
||||
{
|
||||
int threadId;
|
||||
WorkerThreadStatus::Type status;
|
||||
};
|
||||
|
||||
|
||||
static void WorkerThreadFunc( void* userPtr, void* lsMemory )
|
||||
{
|
||||
BT_PROFILE( "WorkerThreadFunc" );
|
||||
WorkerThreadLocalStorage* localStorage = (WorkerThreadLocalStorage*) lsMemory;
|
||||
localStorage->status = WorkerThreadStatus::kWaitingForWork;
|
||||
//printf( "WorkerThreadFunc: worker %d start working\n", localStorage->threadId );
|
||||
|
||||
JobContext* jobContext = (JobContext*) userPtr;
|
||||
|
||||
while ( jobContext->m_workersShouldCheckQueue )
|
||||
{
|
||||
if ( IJob* job = jobContext->consumeJob() )
|
||||
{
|
||||
localStorage->status = WorkerThreadStatus::kWorking;
|
||||
job->executeJob();
|
||||
localStorage->status = WorkerThreadStatus::kWaitingForWork;
|
||||
}
|
||||
else
|
||||
{
|
||||
// todo: spin wait a bit to avoid hammering the empty queue
|
||||
}
|
||||
}
|
||||
|
||||
//printf( "WorkerThreadFunc stop working\n" );
|
||||
localStorage->status = WorkerThreadStatus::kSleeping;
|
||||
// go idle
|
||||
}
|
||||
|
||||
|
||||
static void* WorkerThreadAllocFunc()
|
||||
{
|
||||
return new WorkerThreadLocalStorage;
|
||||
}
|
||||
|
||||
|
||||
|
||||
class btTaskSchedulerDefault : public btITaskScheduler
|
||||
{
|
||||
JobContext m_jobContext;
|
||||
b3ThreadSupportInterface* m_threadSupport;
|
||||
btAlignedObjectArray<ParallelForJob> m_jobs;
|
||||
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
|
||||
int m_numThreads;
|
||||
int m_numWorkerThreads;
|
||||
int m_numWorkersRunning;
|
||||
public:
|
||||
|
||||
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
|
||||
{
|
||||
m_threadSupport = NULL;
|
||||
m_numThreads = getNumHardwareThreads();
|
||||
// if can't detect number of cores,
|
||||
if ( m_numThreads == 0 )
|
||||
{
|
||||
// take a guess
|
||||
m_numThreads = 4;
|
||||
}
|
||||
m_numWorkerThreads = m_numThreads - 1;
|
||||
m_numWorkersRunning = 0;
|
||||
}
|
||||
|
||||
virtual ~btTaskSchedulerDefault()
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
|
||||
void init()
|
||||
{
|
||||
int maxNumWorkerThreads = BT_MAX_THREAD_COUNT - 1;
|
||||
m_threadSupport = createThreadSupport( maxNumWorkerThreads, WorkerThreadFunc, WorkerThreadAllocFunc, "TaskScheduler" );
|
||||
m_jobContext.m_queueLock = m_threadSupport->createCriticalSection();
|
||||
for ( int i = 0; i < maxNumWorkerThreads; i++ )
|
||||
{
|
||||
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
|
||||
btAssert( storage );
|
||||
storage->threadId = i;
|
||||
storage->status = WorkerThreadStatus::kSleeping;
|
||||
}
|
||||
setWorkersActive( false ); // no work for them yet
|
||||
}
|
||||
|
||||
virtual void shutdown()
|
||||
{
|
||||
setWorkersActive( false );
|
||||
waitForWorkersToSleep();
|
||||
m_threadSupport->deleteCriticalSection( m_jobContext.m_queueLock );
|
||||
m_jobContext.m_queueLock = NULL;
|
||||
|
||||
delete m_threadSupport;
|
||||
m_threadSupport = NULL;
|
||||
}
|
||||
|
||||
void setWorkersActive( bool active )
|
||||
{
|
||||
m_jobContext.m_workersShouldCheckQueue = active;
|
||||
}
|
||||
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return BT_MAX_THREAD_COUNT;
|
||||
}
|
||||
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = btMax( btMin(numThreads, int(BT_MAX_THREAD_COUNT)), 1 );
|
||||
m_numWorkerThreads = m_numThreads - 1;
|
||||
}
|
||||
|
||||
void waitJobs()
|
||||
{
|
||||
BT_PROFILE( "waitJobs" );
|
||||
// have the main thread work until the job queue is empty
|
||||
for ( ;; )
|
||||
{
|
||||
if ( IJob* job = m_jobContext.consumeJob() )
|
||||
{
|
||||
job->executeJob();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// done with jobs for now, tell workers to rest
|
||||
setWorkersActive( false );
|
||||
waitForWorkersToSleep();
|
||||
}
|
||||
|
||||
void wakeWorkers()
|
||||
{
|
||||
BT_PROFILE( "wakeWorkers" );
|
||||
btAssert( m_jobContext.m_workersShouldCheckQueue );
|
||||
// tell each worker thread to start working
|
||||
for ( int i = 0; i < m_numWorkerThreads; i++ )
|
||||
{
|
||||
m_threadSupport->runTask( B3_THREAD_SCHEDULE_TASK, &m_jobContext, i );
|
||||
m_numWorkersRunning++;
|
||||
}
|
||||
}
|
||||
|
||||
void waitForWorkersToSleep()
|
||||
{
|
||||
BT_PROFILE( "waitForWorkersToSleep" );
|
||||
while ( m_numWorkersRunning > 0 )
|
||||
{
|
||||
int iThread;
|
||||
int threadStatus;
|
||||
m_threadSupport->waitForResponse( &iThread, &threadStatus ); // wait for worker threads to finish working
|
||||
m_numWorkersRunning--;
|
||||
}
|
||||
//m_threadSupport->waitForAllTasksToComplete();
|
||||
for ( int i = 0; i < m_numWorkerThreads; i++ )
|
||||
{
|
||||
//m_threadSupport->waitForTaskCompleted( i );
|
||||
WorkerThreadLocalStorage* storage = (WorkerThreadLocalStorage*) m_threadSupport->getThreadLocalMemory( i );
|
||||
btAssert( storage );
|
||||
btAssert( storage->status == WorkerThreadStatus::kSleeping );
|
||||
}
|
||||
}
|
||||
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_ThreadSupport" );
|
||||
btAssert( iEnd >= iBegin );
|
||||
btAssert( grainSize >= 1 );
|
||||
int iterationCount = iEnd - iBegin;
|
||||
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
|
||||
{
|
||||
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
|
||||
btAssert( jobCount >= 2 ); // need more than one job for multithreading
|
||||
if ( jobCount > m_jobs.size() )
|
||||
{
|
||||
m_jobs.resize( jobCount );
|
||||
}
|
||||
if ( jobCount > m_jobContext.m_jobQueue.capacity() )
|
||||
{
|
||||
m_jobContext.m_jobQueue.reserve( jobCount );
|
||||
}
|
||||
|
||||
m_jobContext.clearQueue();
|
||||
// prepare worker threads for incoming work
|
||||
setWorkersActive( true );
|
||||
wakeWorkers();
|
||||
// submit all of the jobs
|
||||
int iJob = 0;
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
btAssert( iJob < jobCount );
|
||||
int iE = btMin( i + grainSize, iEnd );
|
||||
ParallelForJob& job = m_jobs[ iJob ];
|
||||
job.init( i, iE, body );
|
||||
m_jobContext.submitJob( &job );
|
||||
iJob++;
|
||||
}
|
||||
|
||||
// put the main thread to work on emptying the job queue and then wait for all workers to finish
|
||||
waitJobs();
|
||||
m_antiNestingLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE( "parallelFor_mainThread" );
|
||||
// just run on main thread
|
||||
body.forLoop( iBegin, iEnd );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
btITaskScheduler* createDefaultTaskScheduler()
|
||||
{
|
||||
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
|
||||
ts->init();
|
||||
return ts;
|
||||
}
|
||||
|
||||
#else // #if BT_THREADSAFE
|
||||
|
||||
btITaskScheduler* createDefaultTaskScheduler()
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif // #else // #if BT_THREADSAFE
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef BT_TASK_SCHEDULER_H
|
||||
#define BT_TASK_SCHEDULER_H
|
||||
|
||||
|
||||
class btITaskScheduler;
|
||||
|
||||
btITaskScheduler* createDefaultTaskScheduler();
|
||||
|
||||
|
||||
#endif // BT_TASK_SCHEDULER_H
|
||||
6
setup.py
6
setup.py
@@ -134,6 +134,9 @@ sources = ["examples/pybullet/pybullet.c"]\
|
||||
+["src/LinearMath/btConvexHullComputer.cpp"]\
|
||||
+["src/LinearMath/btQuickprof.cpp"]\
|
||||
+["src/LinearMath/btThreads.cpp"]\
|
||||
+["src/LinearMath/TaskScheduler/btTaskScheduler.cpp"]\
|
||||
+["src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp"]\
|
||||
+["src/LinearMath/TaskScheduler/btThreadSupportWin32.cpp"]\
|
||||
+["src/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp"]\
|
||||
+["src/BulletCollision/BroadphaseCollision/btDbvt.cpp"]\
|
||||
+["src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp"]\
|
||||
@@ -233,6 +236,7 @@ sources = ["examples/pybullet/pybullet.c"]\
|
||||
+["src/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp"]\
|
||||
+["src/BulletDynamics/Dynamics/btDiscreteDynamicsWorldMt.cpp"]\
|
||||
+["src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp"]\
|
||||
@@ -249,6 +253,7 @@ sources = ["examples/pybullet/pybullet.c"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp"]\
|
||||
+["src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.cpp"]\
|
||||
+["src/BulletDynamics/MLCPSolvers/btDantzigLCP.cpp"]\
|
||||
+["src/BulletDynamics/MLCPSolvers/btLemkeAlgorithm.cpp"]\
|
||||
+["src/BulletDynamics/MLCPSolvers/btMLCPSolver.cpp"]\
|
||||
@@ -479,4 +484,3 @@ setup(
|
||||
packages=[x for x in find_packages('examples/pybullet/gym')],
|
||||
package_data = { 'pybullet_data': need_files }
|
||||
)
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@ SET(BulletDynamics_SRCS
|
||||
ConstraintSolver/btHingeConstraint.cpp
|
||||
ConstraintSolver/btPoint2PointConstraint.cpp
|
||||
ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
|
||||
ConstraintSolver/btSequentialImpulseConstraintSolverMt.cpp
|
||||
ConstraintSolver/btBatchedConstraints.cpp
|
||||
ConstraintSolver/btNNCGConstraintSolver.cpp
|
||||
ConstraintSolver/btSliderConstraint.cpp
|
||||
ConstraintSolver/btSolve2LinearConstraint.cpp
|
||||
@@ -62,6 +64,7 @@ SET(ConstraintSolver_HDRS
|
||||
ConstraintSolver/btJacobianEntry.h
|
||||
ConstraintSolver/btPoint2PointConstraint.h
|
||||
ConstraintSolver/btSequentialImpulseConstraintSolver.h
|
||||
ConstraintSolver/btSequentialImpulseConstraintSolverMt.h
|
||||
ConstraintSolver/btNNCGConstraintSolver.h
|
||||
ConstraintSolver/btSliderConstraint.h
|
||||
ConstraintSolver/btSolve2LinearConstraint.h
|
||||
|
||||
1129
src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp
Normal file
1129
src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp
Normal file
File diff suppressed because it is too large
Load Diff
66
src/BulletDynamics/ConstraintSolver/btBatchedConstraints.h
Normal file
66
src/BulletDynamics/ConstraintSolver/btBatchedConstraints.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_BATCHED_CONSTRAINTS_H
|
||||
#define BT_BATCHED_CONSTRAINTS_H
|
||||
|
||||
#include "LinearMath/btThreads.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSolverConstraint.h"
|
||||
|
||||
|
||||
class btIDebugDraw;
|
||||
|
||||
struct btBatchedConstraints
|
||||
{
|
||||
enum BatchingMethod
|
||||
{
|
||||
BATCHING_METHOD_SPATIAL_GRID_2D,
|
||||
BATCHING_METHOD_SPATIAL_GRID_3D,
|
||||
BATCHING_METHOD_COUNT
|
||||
};
|
||||
struct Range
|
||||
{
|
||||
int begin;
|
||||
int end;
|
||||
|
||||
Range() : begin( 0 ), end( 0 ) {}
|
||||
Range( int _beg, int _end ) : begin( _beg ), end( _end ) {}
|
||||
};
|
||||
|
||||
btAlignedObjectArray<int> m_constraintIndices;
|
||||
btAlignedObjectArray<Range> m_batches; // each batch is a range of indices in the m_constraintIndices array
|
||||
btAlignedObjectArray<Range> m_phases; // each phase is range of indices in the m_batches array
|
||||
btAlignedObjectArray<char> m_phaseGrainSize; // max grain size for each phase
|
||||
btAlignedObjectArray<int> m_phaseOrder; // phases can be done in any order, so we can randomize the order here
|
||||
btIDebugDraw* m_debugDrawer;
|
||||
|
||||
static bool s_debugDrawBatches;
|
||||
|
||||
btBatchedConstraints() {m_debugDrawer=NULL;}
|
||||
void setup( btConstraintArray* constraints,
|
||||
const btAlignedObjectArray<btSolverBody>& bodies,
|
||||
BatchingMethod batchingMethod,
|
||||
int minBatchSize,
|
||||
int maxBatchSize,
|
||||
btAlignedObjectArray<char>* scratchMemory
|
||||
);
|
||||
bool validate( btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies ) const;
|
||||
};
|
||||
|
||||
|
||||
#endif // BT_BATCHED_CONSTRAINTS_H
|
||||
|
||||
@@ -1258,6 +1258,256 @@ void btSequentialImpulseConstraintSolver::convertContacts(btPersistentManifold**
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void btSequentialImpulseConstraintSolver::convertJoint(btSolverConstraint* currentConstraintRow,
|
||||
btTypedConstraint* constraint,
|
||||
const btTypedConstraint::btConstraintInfo1& info1,
|
||||
int solverBodyIdA,
|
||||
int solverBodyIdB,
|
||||
const btContactSolverInfo& infoGlobal
|
||||
)
|
||||
{
|
||||
const btRigidBody& rbA = constraint->getRigidBodyA();
|
||||
const btRigidBody& rbB = constraint->getRigidBodyB();
|
||||
|
||||
const btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
|
||||
const btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
|
||||
|
||||
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
|
||||
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
|
||||
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
|
||||
|
||||
for (int j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
memset(¤tConstraintRow[j],0,sizeof(btSolverConstraint));
|
||||
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
|
||||
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
|
||||
currentConstraintRow[j].m_appliedImpulse = 0.f;
|
||||
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
|
||||
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
|
||||
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
|
||||
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
|
||||
}
|
||||
|
||||
// these vectors are already cleared in initSolverBody, no need to redundantly clear again
|
||||
btAssert(bodyAPtr->getDeltaLinearVelocity().isZero());
|
||||
btAssert(bodyAPtr->getDeltaAngularVelocity().isZero());
|
||||
btAssert(bodyAPtr->getPushVelocity().isZero());
|
||||
btAssert(bodyAPtr->getTurnVelocity().isZero());
|
||||
btAssert(bodyBPtr->getDeltaLinearVelocity().isZero());
|
||||
btAssert(bodyBPtr->getDeltaAngularVelocity().isZero());
|
||||
btAssert(bodyBPtr->getPushVelocity().isZero());
|
||||
btAssert(bodyBPtr->getTurnVelocity().isZero());
|
||||
//bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
//bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
|
||||
|
||||
btTypedConstraint::btConstraintInfo2 info2;
|
||||
info2.fps = 1.f/infoGlobal.m_timeStep;
|
||||
info2.erp = infoGlobal.m_erp;
|
||||
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
|
||||
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
|
||||
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
|
||||
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
|
||||
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
|
||||
///the size of btSolverConstraint needs be a multiple of btScalar
|
||||
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
|
||||
info2.m_constraintError = ¤tConstraintRow->m_rhs;
|
||||
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
|
||||
info2.m_damping = infoGlobal.m_damping;
|
||||
info2.cfm = ¤tConstraintRow->m_cfm;
|
||||
info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;
|
||||
info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;
|
||||
info2.m_numIterations = infoGlobal.m_numIterations;
|
||||
constraint->getInfo2(&info2);
|
||||
|
||||
///finalize the constraint setup
|
||||
for (int j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
btSolverConstraint& solverConstraint = currentConstraintRow[j];
|
||||
|
||||
if (solverConstraint.m_upperLimit>=constraint->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_upperLimit = constraint->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
if (solverConstraint.m_lowerLimit<=-constraint->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_lowerLimit = -constraint->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
solverConstraint.m_originalContactPoint = constraint;
|
||||
|
||||
{
|
||||
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
|
||||
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
|
||||
}
|
||||
{
|
||||
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
|
||||
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
|
||||
}
|
||||
|
||||
{
|
||||
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
|
||||
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
|
||||
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
|
||||
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
|
||||
|
||||
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
|
||||
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
|
||||
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
|
||||
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
|
||||
btScalar fsum = btFabs(sum);
|
||||
btAssert(fsum > SIMD_EPSILON);
|
||||
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
|
||||
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
|
||||
}
|
||||
|
||||
{
|
||||
btScalar rel_vel;
|
||||
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
|
||||
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
|
||||
|
||||
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
|
||||
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
|
||||
|
||||
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
|
||||
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
|
||||
|
||||
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
|
||||
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
|
||||
|
||||
rel_vel = vel1Dotn+vel2Dotn;
|
||||
btScalar restitution = 0.f;
|
||||
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
|
||||
btScalar velocityError = restitution - rel_vel * info2.m_damping;
|
||||
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
|
||||
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
|
||||
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
|
||||
solverConstraint.m_appliedImpulse = 0.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void btSequentialImpulseConstraintSolver::convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
BT_PROFILE("convertJoints");
|
||||
for (int j=0;j<numConstraints;j++)
|
||||
{
|
||||
btTypedConstraint* constraint = constraints[j];
|
||||
constraint->buildJacobian();
|
||||
constraint->internalSetAppliedImpulse(0.0f);
|
||||
}
|
||||
|
||||
int totalNumRows = 0;
|
||||
|
||||
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
|
||||
//calculate the total number of contraint rows
|
||||
for (int i=0;i<numConstraints;i++)
|
||||
{
|
||||
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
btJointFeedback* fb = constraints[i]->getJointFeedback();
|
||||
if (fb)
|
||||
{
|
||||
fb->m_appliedForceBodyA.setZero();
|
||||
fb->m_appliedTorqueBodyA.setZero();
|
||||
fb->m_appliedForceBodyB.setZero();
|
||||
fb->m_appliedTorqueBodyB.setZero();
|
||||
}
|
||||
|
||||
if (constraints[i]->isEnabled())
|
||||
{
|
||||
constraints[i]->getInfo1(&info1);
|
||||
} else
|
||||
{
|
||||
info1.m_numConstraintRows = 0;
|
||||
info1.nub = 0;
|
||||
}
|
||||
totalNumRows += info1.m_numConstraintRows;
|
||||
}
|
||||
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
|
||||
|
||||
|
||||
///setup the btSolverConstraints
|
||||
int currentRow = 0;
|
||||
|
||||
for (int i=0;i<numConstraints;i++)
|
||||
{
|
||||
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
|
||||
if (info1.m_numConstraintRows)
|
||||
{
|
||||
btAssert(currentRow<totalNumRows);
|
||||
|
||||
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
|
||||
btTypedConstraint* constraint = constraints[i];
|
||||
btRigidBody& rbA = constraint->getRigidBodyA();
|
||||
btRigidBody& rbB = constraint->getRigidBodyB();
|
||||
|
||||
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
|
||||
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
|
||||
|
||||
convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
|
||||
}
|
||||
currentRow+=info1.m_numConstraintRows;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void btSequentialImpulseConstraintSolver::convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
BT_PROFILE("convertBodies");
|
||||
for (int i = 0; i < numBodies; i++)
|
||||
{
|
||||
bodies[i]->setCompanionId(-1);
|
||||
}
|
||||
#if BT_THREADSAFE
|
||||
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
|
||||
#endif // BT_THREADSAFE
|
||||
|
||||
m_tmpSolverBodyPool.reserve(numBodies+1);
|
||||
m_tmpSolverBodyPool.resize(0);
|
||||
|
||||
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
|
||||
//initSolverBody(&fixedBody,0);
|
||||
|
||||
for (int i=0;i<numBodies;i++)
|
||||
{
|
||||
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
|
||||
|
||||
btRigidBody* body = btRigidBody::upcast(bodies[i]);
|
||||
if (body && body->getInvMass())
|
||||
{
|
||||
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
|
||||
btVector3 gyroForce (0,0,0);
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
|
||||
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
|
||||
}
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
|
||||
solverBody.m_externalTorqueImpulse += gyroForce;
|
||||
}
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
|
||||
solverBody.m_externalTorqueImpulse += gyroForce;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
|
||||
{
|
||||
m_fixedBodyId = -1;
|
||||
@@ -1344,250 +1594,13 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
|
||||
#endif //BT_ADDITIONAL_DEBUG
|
||||
|
||||
|
||||
for (int i = 0; i < numBodies; i++)
|
||||
{
|
||||
bodies[i]->setCompanionId(-1);
|
||||
}
|
||||
#if BT_THREADSAFE
|
||||
m_kinematicBodyUniqueIdToSolverBodyTable.resize( 0 );
|
||||
#endif // BT_THREADSAFE
|
||||
|
||||
m_tmpSolverBodyPool.reserve(numBodies+1);
|
||||
m_tmpSolverBodyPool.resize(0);
|
||||
|
||||
//btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
|
||||
//initSolverBody(&fixedBody,0);
|
||||
|
||||
//convert all bodies
|
||||
convertBodies(bodies, numBodies, infoGlobal);
|
||||
|
||||
|
||||
for (int i=0;i<numBodies;i++)
|
||||
{
|
||||
int bodyId = getOrInitSolverBody(*bodies[i],infoGlobal.m_timeStep);
|
||||
|
||||
btRigidBody* body = btRigidBody::upcast(bodies[i]);
|
||||
if (body && body->getInvMass())
|
||||
{
|
||||
btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
|
||||
btVector3 gyroForce (0,0,0);
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_EXPLICIT)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicForceExplicit(infoGlobal.m_maxGyroscopicForce);
|
||||
solverBody.m_externalTorqueImpulse -= gyroForce*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
|
||||
}
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_WORLD)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicImpulseImplicit_World(infoGlobal.m_timeStep);
|
||||
solverBody.m_externalTorqueImpulse += gyroForce;
|
||||
}
|
||||
if (body->getFlags()&BT_ENABLE_GYROSCOPIC_FORCE_IMPLICIT_BODY)
|
||||
{
|
||||
gyroForce = body->computeGyroscopicImpulseImplicit_Body(infoGlobal.m_timeStep);
|
||||
solverBody.m_externalTorqueImpulse += gyroForce;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (1)
|
||||
{
|
||||
int j;
|
||||
for (j=0;j<numConstraints;j++)
|
||||
{
|
||||
btTypedConstraint* constraint = constraints[j];
|
||||
constraint->buildJacobian();
|
||||
constraint->internalSetAppliedImpulse(0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
//btRigidBody* rb0=0,*rb1=0;
|
||||
|
||||
//if (1)
|
||||
{
|
||||
{
|
||||
|
||||
int totalNumRows = 0;
|
||||
int i;
|
||||
|
||||
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
|
||||
//calculate the total number of contraint rows
|
||||
for (i=0;i<numConstraints;i++)
|
||||
{
|
||||
btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
btJointFeedback* fb = constraints[i]->getJointFeedback();
|
||||
if (fb)
|
||||
{
|
||||
fb->m_appliedForceBodyA.setZero();
|
||||
fb->m_appliedTorqueBodyA.setZero();
|
||||
fb->m_appliedForceBodyB.setZero();
|
||||
fb->m_appliedTorqueBodyB.setZero();
|
||||
}
|
||||
|
||||
if (constraints[i]->isEnabled())
|
||||
{
|
||||
constraints[i]->getInfo1(&info1);
|
||||
} else
|
||||
{
|
||||
info1.m_numConstraintRows = 0;
|
||||
info1.nub = 0;
|
||||
}
|
||||
totalNumRows += info1.m_numConstraintRows;
|
||||
}
|
||||
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
|
||||
|
||||
|
||||
///setup the btSolverConstraints
|
||||
int currentRow = 0;
|
||||
|
||||
for (i=0;i<numConstraints;i++)
|
||||
{
|
||||
const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
|
||||
if (info1.m_numConstraintRows)
|
||||
{
|
||||
btAssert(currentRow<totalNumRows);
|
||||
|
||||
btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
|
||||
btTypedConstraint* constraint = constraints[i];
|
||||
btRigidBody& rbA = constraint->getRigidBodyA();
|
||||
btRigidBody& rbB = constraint->getRigidBodyB();
|
||||
|
||||
int solverBodyIdA = getOrInitSolverBody(rbA,infoGlobal.m_timeStep);
|
||||
int solverBodyIdB = getOrInitSolverBody(rbB,infoGlobal.m_timeStep);
|
||||
|
||||
btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
|
||||
btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
|
||||
|
||||
|
||||
|
||||
|
||||
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
|
||||
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
|
||||
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
|
||||
|
||||
|
||||
int j;
|
||||
for ( j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
memset(¤tConstraintRow[j],0,sizeof(btSolverConstraint));
|
||||
currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
|
||||
currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
|
||||
currentConstraintRow[j].m_appliedImpulse = 0.f;
|
||||
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
|
||||
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
|
||||
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
|
||||
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
|
||||
}
|
||||
|
||||
bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
|
||||
|
||||
btTypedConstraint::btConstraintInfo2 info2;
|
||||
info2.fps = 1.f/infoGlobal.m_timeStep;
|
||||
info2.erp = infoGlobal.m_erp;
|
||||
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal1;
|
||||
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
|
||||
info2.m_J2linearAxis = currentConstraintRow->m_contactNormal2;
|
||||
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
|
||||
info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
|
||||
///the size of btSolverConstraint needs be a multiple of btScalar
|
||||
btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
|
||||
info2.m_constraintError = ¤tConstraintRow->m_rhs;
|
||||
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
|
||||
info2.m_damping = infoGlobal.m_damping;
|
||||
info2.cfm = ¤tConstraintRow->m_cfm;
|
||||
info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;
|
||||
info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;
|
||||
info2.m_numIterations = infoGlobal.m_numIterations;
|
||||
constraints[i]->getInfo2(&info2);
|
||||
|
||||
///finalize the constraint setup
|
||||
for ( j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
btSolverConstraint& solverConstraint = currentConstraintRow[j];
|
||||
|
||||
if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
solverConstraint.m_originalContactPoint = constraint;
|
||||
|
||||
{
|
||||
const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
|
||||
solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
|
||||
}
|
||||
{
|
||||
const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
|
||||
solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
|
||||
}
|
||||
|
||||
{
|
||||
btVector3 iMJlA = solverConstraint.m_contactNormal1*rbA.getInvMass();
|
||||
btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
|
||||
btVector3 iMJlB = solverConstraint.m_contactNormal2*rbB.getInvMass();//sign of normal?
|
||||
btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
|
||||
|
||||
btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal1);
|
||||
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
|
||||
sum += iMJlB.dot(solverConstraint.m_contactNormal2);
|
||||
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
|
||||
btScalar fsum = btFabs(sum);
|
||||
btAssert(fsum > SIMD_EPSILON);
|
||||
btScalar sorRelaxation = 1.f;//todo: get from globalInfo?
|
||||
solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?sorRelaxation/sum : 0.f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
btScalar rel_vel;
|
||||
btVector3 externalForceImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalForceImpulse : btVector3(0,0,0);
|
||||
btVector3 externalTorqueImpulseA = bodyAPtr->m_originalBody ? bodyAPtr->m_externalTorqueImpulse : btVector3(0,0,0);
|
||||
|
||||
btVector3 externalForceImpulseB = bodyBPtr->m_originalBody ? bodyBPtr->m_externalForceImpulse : btVector3(0,0,0);
|
||||
btVector3 externalTorqueImpulseB = bodyBPtr->m_originalBody ?bodyBPtr->m_externalTorqueImpulse : btVector3(0,0,0);
|
||||
|
||||
btScalar vel1Dotn = solverConstraint.m_contactNormal1.dot(rbA.getLinearVelocity()+externalForceImpulseA)
|
||||
+ solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity()+externalTorqueImpulseA);
|
||||
|
||||
btScalar vel2Dotn = solverConstraint.m_contactNormal2.dot(rbB.getLinearVelocity()+externalForceImpulseB)
|
||||
+ solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity()+externalTorqueImpulseB);
|
||||
|
||||
rel_vel = vel1Dotn+vel2Dotn;
|
||||
btScalar restitution = 0.f;
|
||||
btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
|
||||
btScalar velocityError = restitution - rel_vel * info2.m_damping;
|
||||
btScalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
|
||||
btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
|
||||
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
|
||||
solverConstraint.m_appliedImpulse = 0.f;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
|
||||
}
|
||||
}
|
||||
convertJoints(constraints, numConstraints, infoGlobal);
|
||||
|
||||
convertContacts(manifoldPtr,numManifolds,infoGlobal);
|
||||
|
||||
}
|
||||
|
||||
// btContactSolverInfo info = infoGlobal;
|
||||
|
||||
@@ -1627,6 +1640,7 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
|
||||
|
||||
btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/)
|
||||
{
|
||||
BT_PROFILE("solveSingleIteration");
|
||||
btScalar leastSquaresResidual = 0.f;
|
||||
|
||||
int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
|
||||
@@ -1805,6 +1819,7 @@ btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration
|
||||
|
||||
void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
|
||||
{
|
||||
BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations");
|
||||
int iteration;
|
||||
if (infoGlobal.m_splitImpulse)
|
||||
{
|
||||
@@ -1863,14 +1878,9 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
|
||||
void btSequentialImpulseConstraintSolver::writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
|
||||
int i,j;
|
||||
|
||||
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
|
||||
{
|
||||
for (j=0;j<numPoolConstraints;j++)
|
||||
for (int j=iBegin; j<iEnd; j++)
|
||||
{
|
||||
const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
|
||||
btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
|
||||
@@ -1888,8 +1898,9 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
|
||||
}
|
||||
}
|
||||
|
||||
numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
|
||||
for (j=0;j<numPoolConstraints;j++)
|
||||
void btSequentialImpulseConstraintSolver::writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
for (int j=iBegin; j<iEnd; j++)
|
||||
{
|
||||
const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
|
||||
btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
|
||||
@@ -1909,10 +1920,12 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
|
||||
constr->setEnabled(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
for ( i=0;i<m_tmpSolverBodyPool.size();i++)
|
||||
void btSequentialImpulseConstraintSolver::writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
for (int i=iBegin; i<iEnd; i++)
|
||||
{
|
||||
btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
|
||||
if (body)
|
||||
@@ -1936,6 +1949,19 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCo
|
||||
m_tmpSolverBodyPool[i].m_originalBody->setCompanionId(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
|
||||
{
|
||||
BT_PROFILE("solveGroupCacheFriendlyFinish");
|
||||
|
||||
if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
|
||||
{
|
||||
writeBackContacts(0, m_tmpSolverContactConstraintPool.size(), infoGlobal);
|
||||
}
|
||||
|
||||
writeBackJoints(0, m_tmpSolverNonContactConstraintPool.size(), infoGlobal);
|
||||
writeBackBodies(0, m_tmpSolverBodyPool.size(), infoGlobal);
|
||||
|
||||
m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
|
||||
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);
|
||||
|
||||
@@ -95,6 +95,10 @@ protected:
|
||||
|
||||
void convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
|
||||
|
||||
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
|
||||
void convertJoint(btSolverConstraint* destConstraintRow, btTypedConstraint* srcConstraint, const btTypedConstraint::btConstraintInfo1& info1, int solverBodyIdA, int solverBodyIdB, const btContactSolverInfo& infoGlobal);
|
||||
|
||||
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal);
|
||||
|
||||
btSimdScalar resolveSplitPenetrationSIMD(btSolverBody& bodyA,btSolverBody& bodyB, const btSolverConstraint& contactConstraint)
|
||||
{
|
||||
@@ -121,7 +125,9 @@ protected:
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
void writeBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
void writeBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
void writeBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
|
||||
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal);
|
||||
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
|
||||
#define BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
|
||||
|
||||
#include "btSequentialImpulseConstraintSolver.h"
|
||||
#include "btBatchedConstraints.h"
|
||||
#include "LinearMath/btThreads.h"
|
||||
|
||||
///
|
||||
/// btSequentialImpulseConstraintSolverMt
|
||||
///
|
||||
/// A multithreaded variant of the sequential impulse constraint solver. The constraints to be solved are grouped into
|
||||
/// batches and phases where each batch of constraints within a given phase can be solved in parallel with the rest.
|
||||
/// Ideally we want as few phases as possible, and each phase should have many batches, and all of the batches should
|
||||
/// have about the same number of constraints.
|
||||
/// This method works best on a large island of many constraints.
|
||||
///
|
||||
/// Supports all of the features of the normal sequential impulse solver such as:
|
||||
/// - split penetration impulse
|
||||
/// - rolling friction
|
||||
/// - interleaving constraints
|
||||
/// - warmstarting
|
||||
/// - 2 friction directions
|
||||
/// - randomized constraint ordering
|
||||
/// - early termination when leastSquaresResidualThreshold is satisfied
|
||||
///
|
||||
/// When the SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS flag is enabled, unlike the normal SequentialImpulse solver,
|
||||
/// the rolling friction is interleaved as well.
|
||||
/// Interleaving the contact penetration constraints with friction reduces the number of parallel loops that need to be done,
|
||||
/// which reduces threading overhead so it can be a performance win, however, it does seem to produce a less stable simulation,
|
||||
/// at least on stacks of blocks.
|
||||
///
|
||||
/// When the SOLVER_RANDMIZE_ORDER flag is enabled, the ordering of phases, and the ordering of constraints within each batch
|
||||
/// is randomized, however it does not swap constraints between batches.
|
||||
/// This is to avoid regenerating the batches for each solver iteration which would be quite costly in performance.
|
||||
///
|
||||
/// Note that a non-zero leastSquaresResidualThreshold could possibly affect the determinism of the simulation
|
||||
/// if the task scheduler's parallelSum operation is non-deterministic. The parallelSum operation can be non-deterministic
|
||||
/// because floating point addition is not associative due to rounding errors.
|
||||
/// The task scheduler can and should ensure that the result of any parallelSum operation is deterministic.
|
||||
///
|
||||
ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolverMt : public btSequentialImpulseConstraintSolver
|
||||
{
|
||||
public:
|
||||
virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
|
||||
virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
|
||||
virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) BT_OVERRIDE;
|
||||
virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
|
||||
|
||||
// temp struct used to collect info from persistent manifolds into a cache-friendly struct using multiple threads
|
||||
struct btContactManifoldCachedInfo
|
||||
{
|
||||
static const int MAX_NUM_CONTACT_POINTS = 4;
|
||||
|
||||
int numTouchingContacts;
|
||||
int solverBodyIds[ 2 ];
|
||||
int contactIndex;
|
||||
int rollingFrictionIndex;
|
||||
bool contactHasRollingFriction[ MAX_NUM_CONTACT_POINTS ];
|
||||
btManifoldPoint* contactPoints[ MAX_NUM_CONTACT_POINTS ];
|
||||
};
|
||||
// temp struct used for setting up joint constraints in parallel
|
||||
struct JointParams
|
||||
{
|
||||
int m_solverConstraint;
|
||||
int m_solverBodyA;
|
||||
int m_solverBodyB;
|
||||
};
|
||||
void internalInitMultipleJoints(btTypedConstraint** constraints, int iBegin, int iEnd);
|
||||
void internalConvertMultipleJoints( const btAlignedObjectArray<JointParams>& jointParamsArray, btTypedConstraint** constraints, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal );
|
||||
|
||||
// parameters to control batching
|
||||
static bool s_allowNestedParallelForLoops; // whether to allow nested parallel operations
|
||||
static int s_minimumContactManifoldsForBatching; // don't even try to batch if fewer manifolds than this
|
||||
static btBatchedConstraints::BatchingMethod s_contactBatchingMethod;
|
||||
static btBatchedConstraints::BatchingMethod s_jointBatchingMethod;
|
||||
static int s_minBatchSize; // desired number of constraints per batch
|
||||
static int s_maxBatchSize;
|
||||
|
||||
protected:
|
||||
static const int CACHE_LINE_SIZE = 64;
|
||||
|
||||
btBatchedConstraints m_batchedContactConstraints;
|
||||
btBatchedConstraints m_batchedJointConstraints;
|
||||
int m_numFrictionDirections;
|
||||
bool m_useBatching;
|
||||
bool m_useObsoleteJointConstraints;
|
||||
btAlignedObjectArray<btContactManifoldCachedInfo> m_manifoldCachedInfoArray;
|
||||
btAlignedObjectArray<int> m_rollingFrictionIndexTable; // lookup table mapping contact index to rolling friction index
|
||||
btSpinMutex m_bodySolverArrayMutex;
|
||||
char m_antiFalseSharingPadding[CACHE_LINE_SIZE]; // padding to keep mutexes in separate cachelines
|
||||
btSpinMutex m_kinematicBodyUniqueIdToSolverBodyTableMutex;
|
||||
btAlignedObjectArray<char> m_scratchMemory;
|
||||
|
||||
virtual void randomizeConstraintOrdering( int iteration, int numIterations );
|
||||
virtual btScalar resolveAllJointConstraints( int iteration );
|
||||
virtual btScalar resolveAllContactConstraints();
|
||||
virtual btScalar resolveAllContactFrictionConstraints();
|
||||
virtual btScalar resolveAllContactConstraintsInterleaved();
|
||||
virtual btScalar resolveAllRollingFrictionConstraints();
|
||||
|
||||
virtual void setupBatchedContactConstraints();
|
||||
virtual void setupBatchedJointConstraints();
|
||||
virtual void convertJoints(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
|
||||
virtual void convertContacts(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
|
||||
virtual void convertBodies(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) BT_OVERRIDE;
|
||||
|
||||
int getOrInitSolverBodyThreadsafe(btCollisionObject& body, btScalar timeStep);
|
||||
void allocAllContactConstraints(btPersistentManifold** manifoldPtr, int numManifolds, const btContactSolverInfo& infoGlobal);
|
||||
void setupAllContactConstraints(const btContactSolverInfo& infoGlobal);
|
||||
void randomizeBatchedConstraintOrdering( btBatchedConstraints* batchedConstraints );
|
||||
|
||||
public:
|
||||
|
||||
BT_DECLARE_ALIGNED_ALLOCATOR();
|
||||
|
||||
btSequentialImpulseConstraintSolverMt();
|
||||
virtual ~btSequentialImpulseConstraintSolverMt();
|
||||
|
||||
btScalar resolveMultipleJointConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd, int iteration );
|
||||
btScalar resolveMultipleContactConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
|
||||
btScalar resolveMultipleContactSplitPenetrationImpulseConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
|
||||
btScalar resolveMultipleContactFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
|
||||
btScalar resolveMultipleContactRollingFrictionConstraints( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
|
||||
btScalar resolveMultipleContactConstraintsInterleaved( const btAlignedObjectArray<int>& consIndices, int batchBegin, int batchEnd );
|
||||
|
||||
void internalCollectContactManifoldCachedInfo(btContactManifoldCachedInfo* cachedInfoArray, btPersistentManifold** manifold, int numManifolds, const btContactSolverInfo& infoGlobal);
|
||||
void internalAllocContactConstraints(const btContactManifoldCachedInfo* cachedInfoArray, int numManifolds);
|
||||
void internalSetupContactConstraints(int iContact, const btContactSolverInfo& infoGlobal);
|
||||
void internalConvertBodies(btCollisionObject** bodies, int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
void internalWriteBackContacts(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
void internalWriteBackJoints(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
void internalWriteBackBodies(int iBegin, int iEnd, const btContactSolverInfo& infoGlobal);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_MT_H
|
||||
|
||||
@@ -50,63 +50,6 @@ subject to the following restrictions:
|
||||
#include "LinearMath/btSerializer.h"
|
||||
|
||||
|
||||
struct InplaceSolverIslandCallbackMt : public btSimulationIslandManagerMt::IslandCallback
|
||||
{
|
||||
btContactSolverInfo* m_solverInfo;
|
||||
btConstraintSolver* m_solver;
|
||||
btIDebugDraw* m_debugDrawer;
|
||||
btDispatcher* m_dispatcher;
|
||||
|
||||
InplaceSolverIslandCallbackMt(
|
||||
btConstraintSolver* solver,
|
||||
btStackAlloc* stackAlloc,
|
||||
btDispatcher* dispatcher)
|
||||
:m_solverInfo(NULL),
|
||||
m_solver(solver),
|
||||
m_debugDrawer(NULL),
|
||||
m_dispatcher(dispatcher)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
InplaceSolverIslandCallbackMt& operator=(InplaceSolverIslandCallbackMt& other)
|
||||
{
|
||||
btAssert(0);
|
||||
(void)other;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE void setup ( btContactSolverInfo* solverInfo, btIDebugDraw* debugDrawer)
|
||||
{
|
||||
btAssert(solverInfo);
|
||||
m_solverInfo = solverInfo;
|
||||
m_debugDrawer = debugDrawer;
|
||||
}
|
||||
|
||||
|
||||
virtual void processIsland( btCollisionObject** bodies,
|
||||
int numBodies,
|
||||
btPersistentManifold** manifolds,
|
||||
int numManifolds,
|
||||
btTypedConstraint** constraints,
|
||||
int numConstraints,
|
||||
int islandId
|
||||
)
|
||||
{
|
||||
m_solver->solveGroup( bodies,
|
||||
numBodies,
|
||||
manifolds,
|
||||
numManifolds,
|
||||
constraints,
|
||||
numConstraints,
|
||||
*m_solverInfo,
|
||||
m_debugDrawer,
|
||||
m_dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
///
|
||||
/// btConstraintSolverPoolMt
|
||||
@@ -209,7 +152,12 @@ void btConstraintSolverPoolMt::reset()
|
||||
/// btDiscreteDynamicsWorldMt
|
||||
///
|
||||
|
||||
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btConstraintSolverPoolMt* constraintSolver, btCollisionConfiguration* collisionConfiguration)
|
||||
btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,
|
||||
btBroadphaseInterface* pairCache,
|
||||
btConstraintSolverPoolMt* constraintSolver,
|
||||
btConstraintSolver* constraintSolverMt,
|
||||
btCollisionConfiguration* collisionConfiguration
|
||||
)
|
||||
: btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration)
|
||||
{
|
||||
if (m_ownsIslandManager)
|
||||
@@ -217,31 +165,18 @@ btDiscreteDynamicsWorldMt::btDiscreteDynamicsWorldMt(btDispatcher* dispatcher, b
|
||||
m_islandManager->~btSimulationIslandManager();
|
||||
btAlignedFree( m_islandManager);
|
||||
}
|
||||
{
|
||||
void* mem = btAlignedAlloc(sizeof(InplaceSolverIslandCallbackMt),16);
|
||||
m_solverIslandCallbackMt = new (mem) InplaceSolverIslandCallbackMt (m_constraintSolver, 0, dispatcher);
|
||||
}
|
||||
{
|
||||
void* mem = btAlignedAlloc(sizeof(btSimulationIslandManagerMt),16);
|
||||
btSimulationIslandManagerMt* im = new (mem) btSimulationIslandManagerMt();
|
||||
im->setMinimumSolverBatchSize( m_solverInfo.m_minimumSolverBatchSize );
|
||||
m_islandManager = im;
|
||||
}
|
||||
m_constraintSolverMt = constraintSolverMt;
|
||||
}
|
||||
|
||||
|
||||
btDiscreteDynamicsWorldMt::~btDiscreteDynamicsWorldMt()
|
||||
{
|
||||
if (m_solverIslandCallbackMt)
|
||||
{
|
||||
m_solverIslandCallbackMt->~InplaceSolverIslandCallbackMt();
|
||||
btAlignedFree(m_solverIslandCallbackMt);
|
||||
}
|
||||
if (m_ownsConstraintSolver)
|
||||
{
|
||||
m_constraintSolver->~btConstraintSolver();
|
||||
btAlignedFree(m_constraintSolver);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -249,12 +184,17 @@ void btDiscreteDynamicsWorldMt::solveConstraints(btContactSolverInfo& solverInfo
|
||||
{
|
||||
BT_PROFILE("solveConstraints");
|
||||
|
||||
m_solverIslandCallbackMt->setup(&solverInfo, getDebugDrawer());
|
||||
m_constraintSolver->prepareSolve(getCollisionWorld()->getNumCollisionObjects(), getCollisionWorld()->getDispatcher()->getNumManifolds());
|
||||
|
||||
/// solve all the constraints for this island
|
||||
btSimulationIslandManagerMt* im = static_cast<btSimulationIslandManagerMt*>(m_islandManager);
|
||||
im->buildAndProcessIslands( getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_constraints, m_solverIslandCallbackMt );
|
||||
btSimulationIslandManagerMt::SolverParams solverParams;
|
||||
solverParams.m_solverPool = m_constraintSolver;
|
||||
solverParams.m_solverMt = m_constraintSolverMt;
|
||||
solverParams.m_solverInfo = &solverInfo;
|
||||
solverParams.m_debugDrawer = m_debugDrawer;
|
||||
solverParams.m_dispatcher = getCollisionWorld()->getDispatcher();
|
||||
im->buildAndProcessIslands( getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_constraints, solverParams );
|
||||
|
||||
m_constraintSolver->allSolved(solverInfo, m_debugDrawer);
|
||||
}
|
||||
@@ -325,3 +265,14 @@ void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int btDiscreteDynamicsWorldMt::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep )
|
||||
{
|
||||
int numSubSteps = btDiscreteDynamicsWorld::stepSimulation(timeStep, maxSubSteps, fixedTimeStep);
|
||||
if (btITaskScheduler* scheduler = btGetTaskScheduler())
|
||||
{
|
||||
// tell Bullet's threads to sleep, so other threads can run
|
||||
scheduler->sleepWorkerThreadsHint();
|
||||
}
|
||||
return numSubSteps;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ subject to the following restrictions:
|
||||
#include "btSimulationIslandManagerMt.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
|
||||
|
||||
struct InplaceSolverIslandCallbackMt;
|
||||
|
||||
///
|
||||
/// btConstraintSolverPoolMt - masquerades as a constraint solver, but really it is a threadsafe pool of them.
|
||||
@@ -88,7 +87,7 @@ private:
|
||||
ATTRIBUTE_ALIGNED16(class) btDiscreteDynamicsWorldMt : public btDiscreteDynamicsWorld
|
||||
{
|
||||
protected:
|
||||
InplaceSolverIslandCallbackMt* m_solverIslandCallbackMt;
|
||||
btConstraintSolver* m_constraintSolverMt;
|
||||
|
||||
virtual void solveConstraints(btContactSolverInfo& solverInfo) BT_OVERRIDE;
|
||||
|
||||
@@ -126,9 +125,12 @@ public:
|
||||
btDiscreteDynamicsWorldMt(btDispatcher* dispatcher,
|
||||
btBroadphaseInterface* pairCache,
|
||||
btConstraintSolverPoolMt* constraintSolver, // Note this should be a solver-pool for multi-threading
|
||||
btConstraintSolver* constraintSolverMt, // single multi-threaded solver for large islands (or NULL)
|
||||
btCollisionConfiguration* collisionConfiguration
|
||||
);
|
||||
virtual ~btDiscreteDynamicsWorldMt();
|
||||
|
||||
virtual int stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep ) BT_OVERRIDE;
|
||||
};
|
||||
|
||||
#endif //BT_DISCRETE_DYNAMICS_WORLD_H
|
||||
|
||||
@@ -22,6 +22,7 @@ subject to the following restrictions:
|
||||
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
|
||||
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h" // for s_minimumContactManifoldsForBatching
|
||||
|
||||
//#include <stdio.h>
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
@@ -275,7 +276,7 @@ btSimulationIslandManagerMt::Island* btSimulationIslandManagerMt::allocateIsland
|
||||
void btSimulationIslandManagerMt::buildIslands( btDispatcher* dispatcher, btCollisionWorld* collisionWorld )
|
||||
{
|
||||
|
||||
BT_PROFILE("islandUnionFindAndQuickSort");
|
||||
BT_PROFILE("buildIslands");
|
||||
|
||||
btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
|
||||
|
||||
@@ -544,59 +545,103 @@ void btSimulationIslandManagerMt::mergeIslands()
|
||||
}
|
||||
|
||||
|
||||
void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
|
||||
void btSimulationIslandManagerMt::solveIsland(btConstraintSolver* solver, Island& island, const SolverParams& solverParams)
|
||||
{
|
||||
btPersistentManifold** manifolds = island.manifoldArray.size() ? &island.manifoldArray[ 0 ] : NULL;
|
||||
btTypedConstraint** constraintsPtr = island.constraintArray.size() ? &island.constraintArray[ 0 ] : NULL;
|
||||
solver->solveGroup( &island.bodyArray[ 0 ],
|
||||
island.bodyArray.size(),
|
||||
manifolds,
|
||||
island.manifoldArray.size(),
|
||||
constraintsPtr,
|
||||
island.constraintArray.size(),
|
||||
*solverParams.m_solverInfo,
|
||||
solverParams.m_debugDrawer,
|
||||
solverParams.m_dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
void btSimulationIslandManagerMt::serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams )
|
||||
{
|
||||
BT_PROFILE( "serialIslandDispatch" );
|
||||
// serial dispatch
|
||||
btAlignedObjectArray<Island*>& islands = *islandsPtr;
|
||||
btConstraintSolver* solver = solverParams.m_solverMt ? solverParams.m_solverMt : solverParams.m_solverPool;
|
||||
for ( int i = 0; i < islands.size(); ++i )
|
||||
{
|
||||
Island* island = islands[ i ];
|
||||
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
|
||||
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
|
||||
callback->processIsland( &island->bodyArray[ 0 ],
|
||||
island->bodyArray.size(),
|
||||
manifolds,
|
||||
island->manifoldArray.size(),
|
||||
constraintsPtr,
|
||||
island->constraintArray.size(),
|
||||
island->id
|
||||
);
|
||||
solveIsland(solver, *islands[ i ], solverParams);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct UpdateIslandDispatcher : public btIParallelForBody
|
||||
{
|
||||
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>* islandsPtr;
|
||||
btSimulationIslandManagerMt::IslandCallback* callback;
|
||||
btAlignedObjectArray<btSimulationIslandManagerMt::Island*>& m_islandsPtr;
|
||||
const btSimulationIslandManagerMt::SolverParams& m_solverParams;
|
||||
|
||||
UpdateIslandDispatcher(btAlignedObjectArray<btSimulationIslandManagerMt::Island*>& islandsPtr, const btSimulationIslandManagerMt::SolverParams& solverParams)
|
||||
: m_islandsPtr(islandsPtr), m_solverParams(solverParams)
|
||||
{}
|
||||
|
||||
void forLoop( int iBegin, int iEnd ) const BT_OVERRIDE
|
||||
{
|
||||
btConstraintSolver* solver = m_solverParams.m_solverPool;
|
||||
for ( int i = iBegin; i < iEnd; ++i )
|
||||
{
|
||||
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ i ];
|
||||
btPersistentManifold** manifolds = island->manifoldArray.size() ? &island->manifoldArray[ 0 ] : NULL;
|
||||
btTypedConstraint** constraintsPtr = island->constraintArray.size() ? &island->constraintArray[ 0 ] : NULL;
|
||||
callback->processIsland( &island->bodyArray[ 0 ],
|
||||
island->bodyArray.size(),
|
||||
manifolds,
|
||||
island->manifoldArray.size(),
|
||||
constraintsPtr,
|
||||
island->constraintArray.size(),
|
||||
island->id
|
||||
);
|
||||
btSimulationIslandManagerMt::Island* island = m_islandsPtr[ i ];
|
||||
btSimulationIslandManagerMt::solveIsland( solver, *island, m_solverParams );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
|
||||
|
||||
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams )
|
||||
{
|
||||
BT_PROFILE( "parallelIslandDispatch" );
|
||||
int grainSize = 1; // iterations per task
|
||||
UpdateIslandDispatcher dispatcher;
|
||||
dispatcher.islandsPtr = islandsPtr;
|
||||
dispatcher.callback = callback;
|
||||
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
|
||||
//
|
||||
// if there are islands with many contacts, it may be faster to submit these
|
||||
// large islands *serially* to a single parallel constraint solver, and then later
|
||||
// submit the remaining smaller islands in parallel to multiple sequential solvers.
|
||||
//
|
||||
// Some task schedulers do not deal well with nested parallelFor loops. One implementation
|
||||
// of OpenMP was actually slower than doing everything single-threaded. Intel TBB
|
||||
// on the other hand, seems to do a pretty respectable job with it.
|
||||
//
|
||||
// When solving islands in parallel, the worst case performance happens when there
|
||||
// is one very large island and then perhaps a smattering of very small
|
||||
// islands -- one worker thread takes the large island and the remaining workers
|
||||
// tear through the smaller islands and then sit idle waiting for the first worker
|
||||
// to finish. Solving islands in parallel works best when there are numerous small
|
||||
// islands, roughly equal in size.
|
||||
//
|
||||
// By contrast, the other approach -- the parallel constraint solver -- is only
|
||||
// able to deliver a worthwhile speedup when the island is large. For smaller islands,
|
||||
// it is difficult to extract a useful amount of parallelism -- the overhead of grouping
|
||||
// the constraints into batches and sending the batches to worker threads can nullify
|
||||
// any gains from parallelism.
|
||||
//
|
||||
|
||||
UpdateIslandDispatcher dispatcher(*islandsPtr, solverParams);
|
||||
// We take advantage of the fact the islands are sorted in order of decreasing size
|
||||
int iBegin = 0;
|
||||
if (solverParams.m_solverMt)
|
||||
{
|
||||
while ( iBegin < islandsPtr->size() )
|
||||
{
|
||||
btSimulationIslandManagerMt::Island* island = ( *islandsPtr )[ iBegin ];
|
||||
if ( island->manifoldArray.size() < btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching )
|
||||
{
|
||||
// OK to submit the rest of the array in parallel
|
||||
break;
|
||||
}
|
||||
// serial dispatch to parallel solver for large islands (if any)
|
||||
solveIsland(solverParams.m_solverMt, *island, solverParams);
|
||||
++iBegin;
|
||||
}
|
||||
}
|
||||
// parallel dispatch to sequential solvers for rest
|
||||
btParallelFor( iBegin, islandsPtr->size(), 1, dispatcher );
|
||||
}
|
||||
|
||||
|
||||
@@ -604,15 +649,14 @@ void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<I
|
||||
void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatcher,
|
||||
btCollisionWorld* collisionWorld,
|
||||
btAlignedObjectArray<btTypedConstraint*>& constraints,
|
||||
IslandCallback* callback
|
||||
const SolverParams& solverParams
|
||||
)
|
||||
{
|
||||
BT_PROFILE("buildAndProcessIslands");
|
||||
btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
|
||||
|
||||
buildIslands(dispatcher,collisionWorld);
|
||||
|
||||
BT_PROFILE("processIslands");
|
||||
|
||||
if(!getSplitIslands())
|
||||
{
|
||||
btPersistentManifold** manifolds = dispatcher->getInternalManifoldPointer();
|
||||
@@ -644,13 +688,16 @@ void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatch
|
||||
}
|
||||
}
|
||||
btTypedConstraint** constraintsPtr = constraints.size() ? &constraints[ 0 ] : NULL;
|
||||
callback->processIsland(&collisionObjects[0],
|
||||
btConstraintSolver* solver = solverParams.m_solverMt ? solverParams.m_solverMt : solverParams.m_solverPool;
|
||||
solver->solveGroup(&collisionObjects[0],
|
||||
collisionObjects.size(),
|
||||
manifolds,
|
||||
maxNumManifolds,
|
||||
constraintsPtr,
|
||||
constraints.size(),
|
||||
-1
|
||||
*solverParams.m_solverInfo,
|
||||
solverParams.m_debugDrawer,
|
||||
solverParams.m_dispatcher
|
||||
);
|
||||
}
|
||||
else
|
||||
@@ -671,6 +718,6 @@ void btSimulationIslandManagerMt::buildAndProcessIslands( btDispatcher* dispatch
|
||||
mergeIslands();
|
||||
}
|
||||
// dispatch islands to solver
|
||||
m_islandDispatch( &m_activeIslands, callback );
|
||||
m_islandDispatch( &m_activeIslands, solverParams );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,9 @@ subject to the following restrictions:
|
||||
#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
|
||||
|
||||
class btTypedConstraint;
|
||||
|
||||
class btConstraintSolver;
|
||||
struct btContactSolverInfo;
|
||||
class btIDebugDraw;
|
||||
|
||||
///
|
||||
/// SimulationIslandManagerMt -- Multithread capable version of SimulationIslandManager
|
||||
@@ -45,22 +47,19 @@ public:
|
||||
|
||||
void append( const Island& other ); // add bodies, manifolds, constraints to my own
|
||||
};
|
||||
struct IslandCallback
|
||||
struct SolverParams
|
||||
{
|
||||
virtual ~IslandCallback() {};
|
||||
|
||||
virtual void processIsland( btCollisionObject** bodies,
|
||||
int numBodies,
|
||||
btPersistentManifold** manifolds,
|
||||
int numManifolds,
|
||||
btTypedConstraint** constraints,
|
||||
int numConstraints,
|
||||
int islandId
|
||||
) = 0;
|
||||
btConstraintSolver* m_solverPool;
|
||||
btConstraintSolver* m_solverMt;
|
||||
btContactSolverInfo* m_solverInfo;
|
||||
btIDebugDraw* m_debugDrawer;
|
||||
btDispatcher* m_dispatcher;
|
||||
};
|
||||
typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray<Island*>* islands, IslandCallback* callback );
|
||||
static void serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback );
|
||||
static void parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback );
|
||||
static void solveIsland(btConstraintSolver* solver, Island& island, const SolverParams& solverParams);
|
||||
|
||||
typedef void( *IslandDispatchFunc ) ( btAlignedObjectArray<Island*>* islands, const SolverParams& solverParams );
|
||||
static void serialIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams );
|
||||
static void parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, const SolverParams& solverParams );
|
||||
protected:
|
||||
btAlignedObjectArray<Island*> m_allocatedIslands; // owner of all Islands
|
||||
btAlignedObjectArray<Island*> m_activeIslands; // islands actively in use
|
||||
@@ -83,7 +82,11 @@ public:
|
||||
btSimulationIslandManagerMt();
|
||||
virtual ~btSimulationIslandManagerMt();
|
||||
|
||||
virtual void buildAndProcessIslands( btDispatcher* dispatcher, btCollisionWorld* collisionWorld, btAlignedObjectArray<btTypedConstraint*>& constraints, IslandCallback* callback );
|
||||
virtual void buildAndProcessIslands( btDispatcher* dispatcher,
|
||||
btCollisionWorld* collisionWorld,
|
||||
btAlignedObjectArray<btTypedConstraint*>& constraints,
|
||||
const SolverParams& solverParams
|
||||
);
|
||||
|
||||
virtual void buildIslands(btDispatcher* dispatcher,btCollisionWorld* colWorld);
|
||||
|
||||
@@ -106,5 +109,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif //BT_SIMULATION_ISLAND_MANAGER_H
|
||||
|
||||
|
||||
@@ -14,6 +14,9 @@ SET(LinearMath_SRCS
|
||||
btSerializer64.cpp
|
||||
btThreads.cpp
|
||||
btVector3.cpp
|
||||
TaskScheduler/btTaskScheduler.cpp
|
||||
TaskScheduler/btThreadSupportPosix.cpp
|
||||
TaskScheduler/btThreadSupportWin32.cpp
|
||||
)
|
||||
|
||||
SET(LinearMath_HDRS
|
||||
@@ -44,6 +47,7 @@ SET(LinearMath_HDRS
|
||||
btTransform.h
|
||||
btTransformUtil.h
|
||||
btVector3.h
|
||||
TaskScheduler/btThreadSupportInterface.h
|
||||
)
|
||||
|
||||
ADD_LIBRARY(LinearMath ${LinearMath_SRCS} ${LinearMath_HDRS})
|
||||
|
||||
788
src/LinearMath/TaskScheduler/btTaskScheduler.cpp
Normal file
788
src/LinearMath/TaskScheduler/btTaskScheduler.cpp
Normal file
@@ -0,0 +1,788 @@
|
||||
|
||||
#include "LinearMath/btMinMax.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "LinearMath/btThreads.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
|
||||
#if BT_THREADSAFE
|
||||
|
||||
#include "btThreadSupportInterface.h"
|
||||
|
||||
#if defined( _WIN32 )
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
typedef unsigned long long btU64;
|
||||
static const int kCacheLineSize = 64;
|
||||
|
||||
void btSpinPause()
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
YieldProcessor();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
struct WorkerThreadStatus
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
kInvalid,
|
||||
kWaitingForWork,
|
||||
kWorking,
|
||||
kSleeping,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
ATTRIBUTE_ALIGNED64(class) WorkerThreadDirectives
|
||||
{
|
||||
static const int kMaxThreadCount = BT_MAX_THREAD_COUNT;
|
||||
// directives for all worker threads packed into a single cacheline
|
||||
char m_threadDirs[kMaxThreadCount];
|
||||
|
||||
public:
|
||||
enum Type
|
||||
{
|
||||
kInvalid,
|
||||
kGoToSleep, // go to sleep
|
||||
kStayAwakeButIdle, // wait for not checking job queue
|
||||
kScanForJobs, // actively scan job queue for jobs
|
||||
};
|
||||
WorkerThreadDirectives()
|
||||
{
|
||||
for ( int i = 0; i < kMaxThreadCount; ++i )
|
||||
{
|
||||
m_threadDirs[ i ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Type getDirective(int threadId)
|
||||
{
|
||||
btAssert(threadId < kMaxThreadCount);
|
||||
return static_cast<Type>(m_threadDirs[threadId]);
|
||||
}
|
||||
|
||||
void setDirectiveByRange(int threadBegin, int threadEnd, Type dir)
|
||||
{
|
||||
btAssert( threadBegin < threadEnd );
|
||||
btAssert( threadEnd <= kMaxThreadCount );
|
||||
char dirChar = static_cast<char>(dir);
|
||||
for ( int i = threadBegin; i < threadEnd; ++i )
|
||||
{
|
||||
m_threadDirs[ i ] = dirChar;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class JobQueue;
|
||||
|
||||
ATTRIBUTE_ALIGNED64(struct) ThreadLocalStorage
|
||||
{
|
||||
int m_threadId;
|
||||
WorkerThreadStatus::Type m_status;
|
||||
int m_numJobsFinished;
|
||||
btSpinMutex m_mutex;
|
||||
btScalar m_sumResult;
|
||||
WorkerThreadDirectives * m_directive;
|
||||
JobQueue* m_queue;
|
||||
btClock* m_clock;
|
||||
unsigned int m_cooldownTime;
|
||||
};
|
||||
|
||||
|
||||
struct IJob
|
||||
{
|
||||
virtual void executeJob(int threadId) = 0;
|
||||
};
|
||||
|
||||
class ParallelForJob : public IJob
|
||||
{
|
||||
const btIParallelForBody* m_body;
|
||||
int m_begin;
|
||||
int m_end;
|
||||
|
||||
public:
|
||||
ParallelForJob( int iBegin, int iEnd, const btIParallelForBody& body )
|
||||
{
|
||||
m_body = &body;
|
||||
m_begin = iBegin;
|
||||
m_end = iEnd;
|
||||
}
|
||||
virtual void executeJob(int threadId) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "executeJob" );
|
||||
|
||||
// call the functor body to do the work
|
||||
m_body->forLoop( m_begin, m_end );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ParallelSumJob : public IJob
|
||||
{
|
||||
const btIParallelSumBody* m_body;
|
||||
ThreadLocalStorage* m_threadLocalStoreArray;
|
||||
int m_begin;
|
||||
int m_end;
|
||||
|
||||
public:
|
||||
ParallelSumJob( int iBegin, int iEnd, const btIParallelSumBody& body, ThreadLocalStorage* tls )
|
||||
{
|
||||
m_body = &body;
|
||||
m_threadLocalStoreArray = tls;
|
||||
m_begin = iBegin;
|
||||
m_end = iEnd;
|
||||
}
|
||||
virtual void executeJob( int threadId ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "executeJob" );
|
||||
|
||||
// call the functor body to do the work
|
||||
btScalar val = m_body->sumLoop( m_begin, m_end );
|
||||
#if BT_PARALLEL_SUM_DETERMINISTISM
|
||||
// by truncating bits of the result, we can make the parallelSum deterministic (at the expense of precision)
|
||||
const float TRUNC_SCALE = float(1<<19);
|
||||
val = floor(val*TRUNC_SCALE+0.5f)/TRUNC_SCALE; // truncate some bits
|
||||
#endif
|
||||
m_threadLocalStoreArray[threadId].m_sumResult += val;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
ATTRIBUTE_ALIGNED64(class) JobQueue
|
||||
{
|
||||
btThreadSupportInterface* m_threadSupport;
|
||||
btCriticalSection* m_queueLock;
|
||||
btSpinMutex m_mutex;
|
||||
|
||||
btAlignedObjectArray<IJob*> m_jobQueue;
|
||||
char* m_jobMem;
|
||||
int m_jobMemSize;
|
||||
bool m_queueIsEmpty;
|
||||
int m_tailIndex;
|
||||
int m_headIndex;
|
||||
int m_allocSize;
|
||||
bool m_useSpinMutex;
|
||||
btAlignedObjectArray<JobQueue*> m_neighborContexts;
|
||||
char m_cachePadding[kCacheLineSize]; // prevent false sharing
|
||||
|
||||
void freeJobMem()
|
||||
{
|
||||
if ( m_jobMem )
|
||||
{
|
||||
// free old
|
||||
btAlignedFree(m_jobMem);
|
||||
m_jobMem = NULL;
|
||||
}
|
||||
}
|
||||
void resizeJobMem(int newSize)
|
||||
{
|
||||
if (newSize > m_jobMemSize)
|
||||
{
|
||||
freeJobMem();
|
||||
m_jobMem = static_cast<char*>(btAlignedAlloc(newSize, kCacheLineSize));
|
||||
m_jobMemSize = newSize;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
JobQueue()
|
||||
{
|
||||
m_jobMem = NULL;
|
||||
m_jobMemSize = 0;
|
||||
m_threadSupport = NULL;
|
||||
m_queueLock = NULL;
|
||||
m_headIndex = 0;
|
||||
m_tailIndex = 0;
|
||||
m_useSpinMutex = false;
|
||||
}
|
||||
~JobQueue()
|
||||
{
|
||||
freeJobMem();
|
||||
if (m_queueLock && m_threadSupport)
|
||||
{
|
||||
m_threadSupport->deleteCriticalSection(m_queueLock);
|
||||
m_queueLock = NULL;
|
||||
}
|
||||
}
|
||||
void init(btThreadSupportInterface* threadSup, btAlignedObjectArray<JobQueue>* contextArray)
|
||||
{
|
||||
m_threadSupport = threadSup;
|
||||
if (threadSup)
|
||||
{
|
||||
m_queueLock = m_threadSupport->createCriticalSection();
|
||||
}
|
||||
setupJobStealing(contextArray, contextArray->size());
|
||||
}
|
||||
void setupJobStealing(btAlignedObjectArray<JobQueue>* contextArray, int numActiveContexts)
|
||||
{
|
||||
btAlignedObjectArray<JobQueue>& contexts = *contextArray;
|
||||
int selfIndex = 0;
|
||||
for (int i = 0; i < contexts.size(); ++i)
|
||||
{
|
||||
if ( this == &contexts[ i ] )
|
||||
{
|
||||
selfIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int numNeighbors = btMin(2, contexts.size() - 1);
|
||||
int neighborOffsets[ ] = {-1, 1, -2, 2, -3, 3};
|
||||
int numOffsets = sizeof(neighborOffsets)/sizeof(neighborOffsets[0]);
|
||||
m_neighborContexts.reserve( numNeighbors );
|
||||
m_neighborContexts.resizeNoInitialize(0);
|
||||
for (int i = 0; i < numOffsets && m_neighborContexts.size() < numNeighbors; i++)
|
||||
{
|
||||
int neighborIndex = selfIndex + neighborOffsets[i];
|
||||
if ( neighborIndex >= 0 && neighborIndex < numActiveContexts)
|
||||
{
|
||||
m_neighborContexts.push_back( &contexts[ neighborIndex ] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isQueueEmpty() const {return m_queueIsEmpty;}
|
||||
void lockQueue()
|
||||
{
|
||||
if ( m_useSpinMutex )
|
||||
{
|
||||
m_mutex.lock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_queueLock->lock();
|
||||
}
|
||||
}
|
||||
void unlockQueue()
|
||||
{
|
||||
if ( m_useSpinMutex )
|
||||
{
|
||||
m_mutex.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_queueLock->unlock();
|
||||
}
|
||||
}
|
||||
void clearQueue(int jobCount, int jobSize)
|
||||
{
|
||||
lockQueue();
|
||||
m_headIndex = 0;
|
||||
m_tailIndex = 0;
|
||||
m_allocSize = 0;
|
||||
m_queueIsEmpty = true;
|
||||
int jobBufSize = jobSize * jobCount;
|
||||
// make sure we have enough memory allocated to store jobs
|
||||
if ( jobBufSize > m_jobMemSize )
|
||||
{
|
||||
resizeJobMem( jobBufSize );
|
||||
}
|
||||
// make sure job queue is big enough
|
||||
if ( jobCount > m_jobQueue.capacity() )
|
||||
{
|
||||
m_jobQueue.reserve( jobCount );
|
||||
}
|
||||
unlockQueue();
|
||||
m_jobQueue.resizeNoInitialize( 0 );
|
||||
}
|
||||
void* allocJobMem(int jobSize)
|
||||
{
|
||||
btAssert(m_jobMemSize >= (m_allocSize + jobSize));
|
||||
void* jobMem = &m_jobMem[m_allocSize];
|
||||
m_allocSize += jobSize;
|
||||
return jobMem;
|
||||
}
|
||||
void submitJob( IJob* job )
|
||||
{
|
||||
btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
|
||||
m_jobQueue.push_back( job );
|
||||
lockQueue();
|
||||
m_tailIndex++;
|
||||
m_queueIsEmpty = false;
|
||||
unlockQueue();
|
||||
}
|
||||
IJob* consumeJobFromOwnQueue()
|
||||
{
|
||||
if ( m_queueIsEmpty )
|
||||
{
|
||||
// lock free path. even if this is taken erroneously it isn't harmful
|
||||
return NULL;
|
||||
}
|
||||
IJob* job = NULL;
|
||||
lockQueue();
|
||||
if ( !m_queueIsEmpty )
|
||||
{
|
||||
job = m_jobQueue[ m_headIndex++ ];
|
||||
btAssert( reinterpret_cast<char*>( job ) >= &m_jobMem[ 0 ] && reinterpret_cast<char*>( job ) < &m_jobMem[ 0 ] + m_allocSize );
|
||||
if ( m_headIndex == m_tailIndex )
|
||||
{
|
||||
m_queueIsEmpty = true;
|
||||
}
|
||||
}
|
||||
unlockQueue();
|
||||
return job;
|
||||
}
|
||||
IJob* consumeJob()
|
||||
{
|
||||
if (IJob* job = consumeJobFromOwnQueue())
|
||||
{
|
||||
return job;
|
||||
}
|
||||
// own queue is empty, try to steal from neighbor
|
||||
for (int i = 0; i < m_neighborContexts.size(); ++i)
|
||||
{
|
||||
JobQueue* otherContext = m_neighborContexts[ i ];
|
||||
if ( IJob* job = otherContext->consumeJobFromOwnQueue() )
|
||||
{
|
||||
return job;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static void WorkerThreadFunc( void* userPtr )
|
||||
{
|
||||
BT_PROFILE( "WorkerThreadFunc" );
|
||||
ThreadLocalStorage* localStorage = (ThreadLocalStorage*) userPtr;
|
||||
JobQueue* jobQueue = localStorage->m_queue;
|
||||
|
||||
bool shouldSleep = false;
|
||||
int threadId = localStorage->m_threadId;
|
||||
while (! shouldSleep)
|
||||
{
|
||||
// do work
|
||||
localStorage->m_mutex.lock();
|
||||
while ( IJob* job = jobQueue->consumeJob() )
|
||||
{
|
||||
localStorage->m_status = WorkerThreadStatus::kWorking;
|
||||
job->executeJob( threadId );
|
||||
localStorage->m_numJobsFinished++;
|
||||
}
|
||||
localStorage->m_status = WorkerThreadStatus::kWaitingForWork;
|
||||
localStorage->m_mutex.unlock();
|
||||
btU64 clockStart = localStorage->m_clock->getTimeMicroseconds();
|
||||
// while queue is empty,
|
||||
while (jobQueue->isQueueEmpty())
|
||||
{
|
||||
// todo: spin wait a bit to avoid hammering the empty queue
|
||||
btSpinPause();
|
||||
if ( localStorage->m_directive->getDirective(threadId) == WorkerThreadDirectives::kGoToSleep )
|
||||
{
|
||||
shouldSleep = true;
|
||||
break;
|
||||
}
|
||||
// if jobs are incoming,
|
||||
if ( localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs )
|
||||
{
|
||||
clockStart = localStorage->m_clock->getTimeMicroseconds(); // reset clock
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( int i = 0; i < 50; ++i )
|
||||
{
|
||||
btSpinPause();
|
||||
btSpinPause();
|
||||
btSpinPause();
|
||||
btSpinPause();
|
||||
if (localStorage->m_directive->getDirective( threadId ) == WorkerThreadDirectives::kScanForJobs || !jobQueue->isQueueEmpty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if no jobs incoming and queue has been empty for the cooldown time, sleep
|
||||
btU64 timeElapsed = localStorage->m_clock->getTimeMicroseconds() - clockStart;
|
||||
if (timeElapsed > localStorage->m_cooldownTime)
|
||||
{
|
||||
shouldSleep = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// go sleep
|
||||
localStorage->m_mutex.lock();
|
||||
localStorage->m_status = WorkerThreadStatus::kSleeping;
|
||||
localStorage->m_mutex.unlock();
|
||||
}
|
||||
|
||||
|
||||
class btTaskSchedulerDefault : public btITaskScheduler
|
||||
{
|
||||
btThreadSupportInterface* m_threadSupport;
|
||||
WorkerThreadDirectives* m_workerDirective;
|
||||
btAlignedObjectArray<JobQueue> m_jobQueues;
|
||||
btAlignedObjectArray<JobQueue*> m_perThreadJobQueues;
|
||||
btAlignedObjectArray<ThreadLocalStorage> m_threadLocalStorage;
|
||||
btSpinMutex m_antiNestingLock; // prevent nested parallel-for
|
||||
btClock m_clock;
|
||||
int m_numThreads;
|
||||
int m_numWorkerThreads;
|
||||
int m_numActiveJobQueues;
|
||||
int m_maxNumThreads;
|
||||
int m_numJobs;
|
||||
static const int kFirstWorkerThreadId = 1;
|
||||
public:
|
||||
|
||||
btTaskSchedulerDefault() : btITaskScheduler("ThreadSupport")
|
||||
{
|
||||
m_threadSupport = NULL;
|
||||
m_workerDirective = NULL;
|
||||
}
|
||||
|
||||
virtual ~btTaskSchedulerDefault()
|
||||
{
|
||||
waitForWorkersToSleep();
|
||||
if (m_threadSupport)
|
||||
{
|
||||
delete m_threadSupport;
|
||||
m_threadSupport = NULL;
|
||||
}
|
||||
if (m_workerDirective)
|
||||
{
|
||||
btAlignedFree(m_workerDirective);
|
||||
m_workerDirective = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void init()
|
||||
{
|
||||
btThreadSupportInterface::ConstructionInfo constructionInfo( "TaskScheduler", WorkerThreadFunc );
|
||||
m_threadSupport = btThreadSupportInterface::create( constructionInfo );
|
||||
m_workerDirective = static_cast<WorkerThreadDirectives*>(btAlignedAlloc(sizeof(*m_workerDirective), 64));
|
||||
|
||||
m_numWorkerThreads = m_threadSupport->getNumWorkerThreads();
|
||||
m_maxNumThreads = m_threadSupport->getNumWorkerThreads() + 1;
|
||||
m_numThreads = m_maxNumThreads;
|
||||
// ideal to have one job queue for each physical processor (except for the main thread which needs no queue)
|
||||
int numThreadsPerQueue = m_threadSupport->getLogicalToPhysicalCoreRatio();
|
||||
int numJobQueues = (numThreadsPerQueue == 1) ? (m_maxNumThreads-1) : (m_maxNumThreads / numThreadsPerQueue);
|
||||
m_jobQueues.resize(numJobQueues);
|
||||
m_numActiveJobQueues = numJobQueues;
|
||||
for ( int i = 0; i < m_jobQueues.size(); ++i )
|
||||
{
|
||||
m_jobQueues[i].init( m_threadSupport, &m_jobQueues );
|
||||
}
|
||||
m_perThreadJobQueues.resize(m_numThreads);
|
||||
for ( int i = 0; i < m_numThreads; i++ )
|
||||
{
|
||||
JobQueue* jq = NULL;
|
||||
// only worker threads get a job queue
|
||||
if (i > 0)
|
||||
{
|
||||
if (numThreadsPerQueue == 1)
|
||||
{
|
||||
// one queue per worker thread
|
||||
jq = &m_jobQueues[ i - kFirstWorkerThreadId ];
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2 threads share each queue
|
||||
jq = &m_jobQueues[ i / numThreadsPerQueue ];
|
||||
}
|
||||
}
|
||||
m_perThreadJobQueues[i] = jq;
|
||||
}
|
||||
m_threadLocalStorage.resize(m_numThreads);
|
||||
for ( int i = 0; i < m_numThreads; i++ )
|
||||
{
|
||||
ThreadLocalStorage& storage = m_threadLocalStorage[i];
|
||||
storage.m_threadId = i;
|
||||
storage.m_directive = m_workerDirective;
|
||||
storage.m_status = WorkerThreadStatus::kSleeping;
|
||||
storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do
|
||||
storage.m_clock = &m_clock;
|
||||
storage.m_queue = m_perThreadJobQueues[i];
|
||||
}
|
||||
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep ); // no work for them yet
|
||||
setNumThreads( m_threadSupport->getCacheFriendlyNumThreads() );
|
||||
}
|
||||
|
||||
void setWorkerDirectives(WorkerThreadDirectives::Type dir)
|
||||
{
|
||||
m_workerDirective->setDirectiveByRange(kFirstWorkerThreadId, m_numThreads, dir);
|
||||
}
|
||||
|
||||
virtual int getMaxNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_maxNumThreads;
|
||||
}
|
||||
|
||||
virtual int getNumThreads() const BT_OVERRIDE
|
||||
{
|
||||
return m_numThreads;
|
||||
}
|
||||
|
||||
virtual void setNumThreads( int numThreads ) BT_OVERRIDE
|
||||
{
|
||||
m_numThreads = btMax( btMin(numThreads, int(m_maxNumThreads)), 1 );
|
||||
m_numWorkerThreads = m_numThreads - 1;
|
||||
m_numActiveJobQueues = 0;
|
||||
// if there is at least 1 worker,
|
||||
if ( m_numWorkerThreads > 0 )
|
||||
{
|
||||
// re-setup job stealing between queues to avoid attempting to steal from an inactive job queue
|
||||
JobQueue* lastActiveContext = m_perThreadJobQueues[ m_numThreads - 1 ];
|
||||
int iLastActiveContext = lastActiveContext - &m_jobQueues[0];
|
||||
m_numActiveJobQueues = iLastActiveContext + 1;
|
||||
for ( int i = 0; i < m_jobQueues.size(); ++i )
|
||||
{
|
||||
m_jobQueues[ i ].setupJobStealing( &m_jobQueues, m_numActiveJobQueues );
|
||||
}
|
||||
}
|
||||
m_workerDirective->setDirectiveByRange(m_numThreads, BT_MAX_THREAD_COUNT, WorkerThreadDirectives::kGoToSleep);
|
||||
}
|
||||
|
||||
void waitJobs()
|
||||
{
|
||||
BT_PROFILE( "waitJobs" );
|
||||
// have the main thread work until the job queues are empty
|
||||
int numMainThreadJobsFinished = 0;
|
||||
for ( int i = 0; i < m_numActiveJobQueues; ++i )
|
||||
{
|
||||
while ( IJob* job = m_jobQueues[i].consumeJob() )
|
||||
{
|
||||
job->executeJob( 0 );
|
||||
numMainThreadJobsFinished++;
|
||||
}
|
||||
}
|
||||
|
||||
// done with jobs for now, tell workers to rest (but not sleep)
|
||||
setWorkerDirectives( WorkerThreadDirectives::kStayAwakeButIdle );
|
||||
|
||||
btU64 clockStart = m_clock.getTimeMicroseconds();
|
||||
// wait for workers to finish any jobs in progress
|
||||
while ( true )
|
||||
{
|
||||
int numWorkerJobsFinished = 0;
|
||||
for ( int iThread = kFirstWorkerThreadId; iThread < m_numThreads; ++iThread )
|
||||
{
|
||||
ThreadLocalStorage* storage = &m_threadLocalStorage[iThread];
|
||||
storage->m_mutex.lock();
|
||||
numWorkerJobsFinished += storage->m_numJobsFinished;
|
||||
storage->m_mutex.unlock();
|
||||
}
|
||||
if (numWorkerJobsFinished + numMainThreadJobsFinished == m_numJobs)
|
||||
{
|
||||
break;
|
||||
}
|
||||
btU64 timeElapsed = m_clock.getTimeMicroseconds() - clockStart;
|
||||
btAssert(timeElapsed < 1000);
|
||||
if (timeElapsed > 100000)
|
||||
{
|
||||
break;
|
||||
}
|
||||
btSpinPause();
|
||||
}
|
||||
}
|
||||
|
||||
void wakeWorkers(int numWorkersToWake)
|
||||
{
|
||||
BT_PROFILE( "wakeWorkers" );
|
||||
btAssert( m_workerDirective->getDirective(1) == WorkerThreadDirectives::kScanForJobs );
|
||||
int numDesiredWorkers = btMin(numWorkersToWake, m_numWorkerThreads);
|
||||
int numActiveWorkers = 0;
|
||||
for ( int iWorker = 0; iWorker < m_numWorkerThreads; ++iWorker )
|
||||
{
|
||||
// note this count of active workers is not necessarily totally reliable, because a worker thread could be
|
||||
// just about to put itself to sleep. So we may on occasion fail to wake up all the workers. It should be rare.
|
||||
ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
|
||||
if (storage.m_status != WorkerThreadStatus::kSleeping)
|
||||
{
|
||||
numActiveWorkers++;
|
||||
}
|
||||
}
|
||||
for ( int iWorker = 0; iWorker < m_numWorkerThreads && numActiveWorkers < numDesiredWorkers; ++iWorker )
|
||||
{
|
||||
ThreadLocalStorage& storage = m_threadLocalStorage[ kFirstWorkerThreadId + iWorker ];
|
||||
if (storage.m_status == WorkerThreadStatus::kSleeping)
|
||||
{
|
||||
m_threadSupport->runTask( iWorker, &storage );
|
||||
numActiveWorkers++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void waitForWorkersToSleep()
|
||||
{
|
||||
BT_PROFILE( "waitForWorkersToSleep" );
|
||||
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
|
||||
m_threadSupport->waitForAllTasks();
|
||||
for ( int i = kFirstWorkerThreadId; i < m_numThreads; i++ )
|
||||
{
|
||||
ThreadLocalStorage& storage = m_threadLocalStorage[i];
|
||||
btAssert( storage.m_status == WorkerThreadStatus::kSleeping );
|
||||
}
|
||||
}
|
||||
|
||||
virtual void sleepWorkerThreadsHint() BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "sleepWorkerThreadsHint" );
|
||||
// hint the task scheduler that we may not be using these threads for a little while
|
||||
setWorkerDirectives( WorkerThreadDirectives::kGoToSleep );
|
||||
}
|
||||
|
||||
void prepareWorkerThreads()
|
||||
{
|
||||
for ( int i = kFirstWorkerThreadId; i < m_numThreads; ++i )
|
||||
{
|
||||
ThreadLocalStorage& storage = m_threadLocalStorage[i];
|
||||
storage.m_mutex.lock();
|
||||
storage.m_numJobsFinished = 0;
|
||||
storage.m_mutex.unlock();
|
||||
}
|
||||
setWorkerDirectives( WorkerThreadDirectives::kScanForJobs );
|
||||
}
|
||||
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_ThreadSupport" );
|
||||
btAssert( iEnd >= iBegin );
|
||||
btAssert( grainSize >= 1 );
|
||||
int iterationCount = iEnd - iBegin;
|
||||
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
|
||||
{
|
||||
typedef ParallelForJob JobType;
|
||||
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
|
||||
m_numJobs = jobCount;
|
||||
btAssert( jobCount >= 2 ); // need more than one job for multithreading
|
||||
int jobSize = sizeof( JobType );
|
||||
|
||||
for (int i = 0; i < m_numActiveJobQueues; ++i)
|
||||
{
|
||||
m_jobQueues[i].clearQueue( jobCount, jobSize );
|
||||
}
|
||||
// prepare worker threads for incoming work
|
||||
prepareWorkerThreads();
|
||||
// submit all of the jobs
|
||||
int iJob = 0;
|
||||
int iThread = kFirstWorkerThreadId; // first worker thread
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
btAssert( iJob < jobCount );
|
||||
int iE = btMin( i + grainSize, iEnd );
|
||||
JobQueue* jq = m_perThreadJobQueues[ iThread ];
|
||||
btAssert(jq);
|
||||
btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
|
||||
void* jobMem = jq->allocJobMem(jobSize);
|
||||
JobType* job = new ( jobMem ) ParallelForJob( i, iE, body ); // placement new
|
||||
jq->submitJob( job );
|
||||
iJob++;
|
||||
iThread++;
|
||||
if ( iThread >= m_numThreads )
|
||||
{
|
||||
iThread = kFirstWorkerThreadId; // first worker thread
|
||||
}
|
||||
}
|
||||
wakeWorkers( jobCount - 1 );
|
||||
|
||||
// put the main thread to work on emptying the job queue and then wait for all workers to finish
|
||||
waitJobs();
|
||||
m_antiNestingLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE( "parallelFor_mainThread" );
|
||||
// just run on main thread
|
||||
body.forLoop( iBegin, iEnd );
|
||||
}
|
||||
}
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelSum_ThreadSupport" );
|
||||
btAssert( iEnd >= iBegin );
|
||||
btAssert( grainSize >= 1 );
|
||||
int iterationCount = iEnd - iBegin;
|
||||
if ( iterationCount > grainSize && m_numWorkerThreads > 0 && m_antiNestingLock.tryLock() )
|
||||
{
|
||||
typedef ParallelSumJob JobType;
|
||||
int jobCount = ( iterationCount + grainSize - 1 ) / grainSize;
|
||||
m_numJobs = jobCount;
|
||||
btAssert( jobCount >= 2 ); // need more than one job for multithreading
|
||||
int jobSize = sizeof( JobType );
|
||||
for (int i = 0; i < m_numActiveJobQueues; ++i)
|
||||
{
|
||||
m_jobQueues[i].clearQueue( jobCount, jobSize );
|
||||
}
|
||||
|
||||
// initialize summation
|
||||
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
|
||||
{
|
||||
m_threadLocalStorage[iThread].m_sumResult = btScalar(0);
|
||||
}
|
||||
|
||||
// prepare worker threads for incoming work
|
||||
prepareWorkerThreads();
|
||||
// submit all of the jobs
|
||||
int iJob = 0;
|
||||
int iThread = kFirstWorkerThreadId; // first worker thread
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
btAssert( iJob < jobCount );
|
||||
int iE = btMin( i + grainSize, iEnd );
|
||||
JobQueue* jq = m_perThreadJobQueues[ iThread ];
|
||||
btAssert(jq);
|
||||
btAssert((jq - &m_jobQueues[0]) < m_numActiveJobQueues);
|
||||
void* jobMem = jq->allocJobMem(jobSize);
|
||||
JobType* job = new ( jobMem ) ParallelSumJob( i, iE, body, &m_threadLocalStorage[0] ); // placement new
|
||||
jq->submitJob( job );
|
||||
iJob++;
|
||||
iThread++;
|
||||
if ( iThread >= m_numThreads )
|
||||
{
|
||||
iThread = kFirstWorkerThreadId; // first worker thread
|
||||
}
|
||||
}
|
||||
wakeWorkers( jobCount - 1 );
|
||||
|
||||
// put the main thread to work on emptying the job queue and then wait for all workers to finish
|
||||
waitJobs();
|
||||
|
||||
// add up all the thread sums
|
||||
btScalar sum = btScalar(0);
|
||||
for ( int iThread = 0; iThread < m_numThreads; ++iThread )
|
||||
{
|
||||
sum += m_threadLocalStorage[ iThread ].m_sumResult;
|
||||
}
|
||||
m_antiNestingLock.unlock();
|
||||
return sum;
|
||||
}
|
||||
else
|
||||
{
|
||||
BT_PROFILE( "parallelSum_mainThread" );
|
||||
// just run on main thread
|
||||
return body.sumLoop( iBegin, iEnd );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
btITaskScheduler* btCreateDefaultTaskScheduler()
|
||||
{
|
||||
btTaskSchedulerDefault* ts = new btTaskSchedulerDefault();
|
||||
ts->init();
|
||||
return ts;
|
||||
}
|
||||
|
||||
#else // #if BT_THREADSAFE
|
||||
|
||||
btITaskScheduler* btCreateDefaultTaskScheduler()
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif // #else // #if BT_THREADSAFE
|
||||
70
src/LinearMath/TaskScheduler/btThreadSupportInterface.h
Normal file
70
src/LinearMath/TaskScheduler/btThreadSupportInterface.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#ifndef BT_THREAD_SUPPORT_INTERFACE_H
|
||||
#define BT_THREAD_SUPPORT_INTERFACE_H
|
||||
|
||||
|
||||
|
||||
class btCriticalSection
|
||||
{
|
||||
public:
|
||||
btCriticalSection() {}
|
||||
virtual ~btCriticalSection() {}
|
||||
|
||||
virtual void lock() = 0;
|
||||
virtual void unlock() = 0;
|
||||
};
|
||||
|
||||
|
||||
class btThreadSupportInterface
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~btThreadSupportInterface() {}
|
||||
|
||||
virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
|
||||
virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
|
||||
virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
|
||||
virtual void runTask( int threadIndex, void* userData ) = 0;
|
||||
virtual void waitForAllTasks() = 0;
|
||||
|
||||
virtual btCriticalSection* createCriticalSection() = 0;
|
||||
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) = 0;
|
||||
|
||||
typedef void( *ThreadFunc )( void* userPtr );
|
||||
|
||||
struct ConstructionInfo
|
||||
{
|
||||
ConstructionInfo( const char* uniqueName,
|
||||
ThreadFunc userThreadFunc,
|
||||
int threadStackSize = 65535
|
||||
)
|
||||
:m_uniqueName( uniqueName ),
|
||||
m_userThreadFunc( userThreadFunc ),
|
||||
m_threadStackSize( threadStackSize )
|
||||
{
|
||||
}
|
||||
|
||||
const char* m_uniqueName;
|
||||
ThreadFunc m_userThreadFunc;
|
||||
int m_threadStackSize;
|
||||
};
|
||||
|
||||
static btThreadSupportInterface* create( const ConstructionInfo& info );
|
||||
};
|
||||
|
||||
#endif //BT_THREAD_SUPPORT_INTERFACE_H
|
||||
|
||||
364
src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp
Normal file
364
src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp
Normal file
@@ -0,0 +1,364 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
#if BT_THREADSAFE && !defined( _WIN32 )
|
||||
|
||||
|
||||
#include "LinearMath/btScalar.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "LinearMath/btThreads.h"
|
||||
#include "LinearMath/btMinMax.h"
|
||||
#include "btThreadSupportInterface.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#ifndef _XOPEN_SOURCE
|
||||
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
|
||||
#endif //_XOPEN_SOURCE
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <unistd.h> //for sysconf
|
||||
|
||||
|
||||
///
|
||||
/// getNumHardwareThreads()
|
||||
///
|
||||
///
|
||||
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
|
||||
///
|
||||
#if __cplusplus >= 201103L
|
||||
|
||||
#include <thread>
|
||||
|
||||
int btGetNumHardwareThreads()
|
||||
{
|
||||
return std::thread::hardware_concurrency();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int btGetNumHardwareThreads()
|
||||
{
|
||||
return sysconf( _SC_NPROCESSORS_ONLN );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
|
||||
class btThreadSupportPosix : public btThreadSupportInterface
|
||||
{
|
||||
public:
|
||||
struct btThreadStatus
|
||||
{
|
||||
int m_taskId;
|
||||
int m_commandId;
|
||||
int m_status;
|
||||
|
||||
ThreadFunc m_userThreadFunc;
|
||||
void* m_userPtr; //for taskDesc etc
|
||||
|
||||
pthread_t thread;
|
||||
//each tread will wait until this signal to start its work
|
||||
sem_t* startSemaphore;
|
||||
|
||||
// this is a copy of m_mainSemaphore,
|
||||
//each tread will signal once it is finished with its work
|
||||
sem_t* m_mainSemaphore;
|
||||
unsigned long threadUsed;
|
||||
};
|
||||
private:
|
||||
typedef unsigned long long UINT64;
|
||||
|
||||
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
|
||||
// m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
|
||||
sem_t* m_mainSemaphore;
|
||||
int m_numThreads;
|
||||
UINT64 m_startedThreadsMask;
|
||||
void startThreads( const ConstructionInfo& threadInfo );
|
||||
void stopThreads();
|
||||
int waitForResponse();
|
||||
|
||||
public:
|
||||
btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo );
|
||||
virtual ~btThreadSupportPosix();
|
||||
|
||||
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
|
||||
// TODO: return the number of logical processors sharing the first L3 cache
|
||||
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
|
||||
// TODO: detect if CPU has hyperthreading enabled
|
||||
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
|
||||
|
||||
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
|
||||
virtual void waitForAllTasks() BT_OVERRIDE;
|
||||
|
||||
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
|
||||
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
|
||||
};
|
||||
|
||||
|
||||
#define checkPThreadFunction(returnValue) \
|
||||
if(0 != returnValue) { \
|
||||
printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
|
||||
}
|
||||
|
||||
// The number of threads should be equal to the number of available cores
|
||||
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
|
||||
|
||||
|
||||
btThreadSupportPosix::btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo )
|
||||
{
|
||||
startThreads( threadConstructionInfo );
|
||||
}
|
||||
|
||||
// cleanup/shutdown Libspe2
|
||||
btThreadSupportPosix::~btThreadSupportPosix()
|
||||
{
|
||||
stopThreads();
|
||||
}
|
||||
|
||||
#if (defined (__APPLE__))
|
||||
#define NAMED_SEMAPHORES
|
||||
#endif
|
||||
|
||||
|
||||
static sem_t* createSem( const char* baseName )
|
||||
{
|
||||
static int semCount = 0;
|
||||
#ifdef NAMED_SEMAPHORES
|
||||
/// Named semaphore begin
|
||||
char name[ 32 ];
|
||||
snprintf( name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++ );
|
||||
sem_t* tempSem = sem_open( name, O_CREAT, 0600, 0 );
|
||||
|
||||
if ( tempSem != reinterpret_cast<sem_t *>( SEM_FAILED ) )
|
||||
{
|
||||
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
|
||||
}
|
||||
else
|
||||
{
|
||||
//printf("Error creating Semaphore %d\n", errno);
|
||||
exit( -1 );
|
||||
}
|
||||
/// Named semaphore end
|
||||
#else
|
||||
sem_t* tempSem = new sem_t;
|
||||
checkPThreadFunction( sem_init( tempSem, 0, 0 ) );
|
||||
#endif
|
||||
return tempSem;
|
||||
}
|
||||
|
||||
static void destroySem( sem_t* semaphore )
|
||||
{
|
||||
#ifdef NAMED_SEMAPHORES
|
||||
checkPThreadFunction( sem_close( semaphore ) );
|
||||
#else
|
||||
checkPThreadFunction( sem_destroy( semaphore ) );
|
||||
delete semaphore;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void *threadFunction( void *argument )
|
||||
{
|
||||
btThreadSupportPosix::btThreadStatus* status = ( btThreadSupportPosix::btThreadStatus* )argument;
|
||||
|
||||
while ( 1 )
|
||||
{
|
||||
checkPThreadFunction( sem_wait( status->startSemaphore ) );
|
||||
void* userPtr = status->m_userPtr;
|
||||
|
||||
if ( userPtr )
|
||||
{
|
||||
btAssert( status->m_status );
|
||||
status->m_userThreadFunc( userPtr );
|
||||
status->m_status = 2;
|
||||
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
|
||||
status->threadUsed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
//exit Thread
|
||||
status->m_status = 3;
|
||||
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
|
||||
printf( "Thread with taskId %i exiting\n", status->m_taskId );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf( "Thread TERMINATED\n" );
|
||||
}
|
||||
|
||||
///send messages to SPUs
|
||||
void btThreadSupportPosix::runTask( int threadIndex, void* userData )
|
||||
{
|
||||
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
|
||||
btAssert( threadIndex >= 0 );
|
||||
btAssert( threadIndex < m_activeThreadStatus.size() );
|
||||
|
||||
threadStatus.m_commandId = 1;
|
||||
threadStatus.m_status = 1;
|
||||
threadStatus.m_userPtr = userData;
|
||||
m_startedThreadsMask |= UINT64( 1 ) << threadIndex;
|
||||
|
||||
// fire event to start new task
|
||||
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
|
||||
}
|
||||
|
||||
|
||||
///check for messages from SPUs
|
||||
int btThreadSupportPosix::waitForResponse()
|
||||
{
|
||||
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
|
||||
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
|
||||
|
||||
btAssert( m_activeThreadStatus.size() );
|
||||
|
||||
// wait for any of the threads to finish
|
||||
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
|
||||
// get at least one thread which has finished
|
||||
size_t last = -1;
|
||||
|
||||
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
|
||||
{
|
||||
if ( 2 == m_activeThreadStatus[ t ].m_status )
|
||||
{
|
||||
last = t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
|
||||
|
||||
btAssert( threadStatus.m_status > 1 );
|
||||
threadStatus.m_status = 0;
|
||||
|
||||
// need to find an active spu
|
||||
btAssert( last >= 0 );
|
||||
m_startedThreadsMask &= ~( UINT64( 1 ) << last );
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
|
||||
void btThreadSupportPosix::waitForAllTasks()
|
||||
{
|
||||
while ( m_startedThreadsMask )
|
||||
{
|
||||
waitForResponse();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void btThreadSupportPosix::startThreads( const ConstructionInfo& threadConstructionInfo )
|
||||
{
|
||||
m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
|
||||
printf( "%s creating %i threads.\n", __FUNCTION__, m_numThreads );
|
||||
m_activeThreadStatus.resize( m_numThreads );
|
||||
m_startedThreadsMask = 0;
|
||||
|
||||
m_mainSemaphore = createSem( "main" );
|
||||
//checkPThreadFunction(sem_wait(mainSemaphore));
|
||||
|
||||
for ( int i = 0; i < m_numThreads; i++ )
|
||||
{
|
||||
printf( "starting thread %d\n", i );
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
|
||||
threadStatus.startSemaphore = createSem( "threadLocal" );
|
||||
checkPThreadFunction( pthread_create( &threadStatus.thread, NULL, &threadFunction, (void*) &threadStatus ) );
|
||||
|
||||
threadStatus.m_userPtr = 0;
|
||||
threadStatus.m_taskId = i;
|
||||
threadStatus.m_commandId = 0;
|
||||
threadStatus.m_status = 0;
|
||||
threadStatus.m_mainSemaphore = m_mainSemaphore;
|
||||
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
|
||||
threadStatus.threadUsed = 0;
|
||||
|
||||
printf( "started thread %d \n", i );
|
||||
}
|
||||
}
|
||||
|
||||
///tell the task scheduler we are done with the SPU tasks
|
||||
void btThreadSupportPosix::stopThreads()
|
||||
{
|
||||
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
|
||||
{
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ t ];
|
||||
printf( "%s: Thread %i used: %ld\n", __FUNCTION__, int( t ), threadStatus.threadUsed );
|
||||
|
||||
threadStatus.m_userPtr = 0;
|
||||
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
|
||||
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
|
||||
|
||||
printf( "destroy semaphore\n" );
|
||||
destroySem( threadStatus.startSemaphore );
|
||||
printf( "semaphore destroyed\n" );
|
||||
checkPThreadFunction( pthread_join( threadStatus.thread, 0 ) );
|
||||
|
||||
}
|
||||
printf( "destroy main semaphore\n" );
|
||||
destroySem( m_mainSemaphore );
|
||||
printf( "main semaphore destroyed\n" );
|
||||
m_activeThreadStatus.clear();
|
||||
}
|
||||
|
||||
class btCriticalSectionPosix : public btCriticalSection
|
||||
{
|
||||
pthread_mutex_t m_mutex;
|
||||
|
||||
public:
|
||||
btCriticalSectionPosix()
|
||||
{
|
||||
pthread_mutex_init( &m_mutex, NULL );
|
||||
}
|
||||
virtual ~btCriticalSectionPosix()
|
||||
{
|
||||
pthread_mutex_destroy( &m_mutex );
|
||||
}
|
||||
|
||||
virtual void lock()
|
||||
{
|
||||
pthread_mutex_lock( &m_mutex );
|
||||
}
|
||||
virtual void unlock()
|
||||
{
|
||||
pthread_mutex_unlock( &m_mutex );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
btCriticalSection* btThreadSupportPosix::createCriticalSection()
|
||||
{
|
||||
return new btCriticalSectionPosix();
|
||||
}
|
||||
|
||||
void btThreadSupportPosix::deleteCriticalSection( btCriticalSection* cs )
|
||||
{
|
||||
delete cs;
|
||||
}
|
||||
|
||||
|
||||
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
|
||||
{
|
||||
return new btThreadSupportPosix( info );
|
||||
}
|
||||
|
||||
#endif // BT_THREADSAFE && !defined( _WIN32 )
|
||||
|
||||
472
src/LinearMath/TaskScheduler/btThreadSupportWin32.cpp
Normal file
472
src/LinearMath/TaskScheduler/btThreadSupportWin32.cpp
Normal file
@@ -0,0 +1,472 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#if defined( _WIN32 ) && BT_THREADSAFE
|
||||
|
||||
#include "LinearMath/btScalar.h"
|
||||
#include "LinearMath/btMinMax.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
#include "LinearMath/btThreads.h"
|
||||
#include "btThreadSupportInterface.h"
|
||||
#include <windows.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
struct btProcessorInfo
|
||||
{
|
||||
int numLogicalProcessors;
|
||||
int numCores;
|
||||
int numNumaNodes;
|
||||
int numL1Cache;
|
||||
int numL2Cache;
|
||||
int numL3Cache;
|
||||
int numPhysicalPackages;
|
||||
static const int maxNumTeamMasks = 32;
|
||||
int numTeamMasks;
|
||||
UINT64 processorTeamMasks[ maxNumTeamMasks ];
|
||||
};
|
||||
|
||||
UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
|
||||
{
|
||||
UINT64 procMask = UINT64( 1 ) << procId;
|
||||
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
|
||||
{
|
||||
if ( procMask & procInfo.processorTeamMasks[ i ] )
|
||||
{
|
||||
return procInfo.processorTeamMasks[ i ];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
|
||||
{
|
||||
UINT64 procMask = UINT64( 1 ) << procId;
|
||||
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
|
||||
{
|
||||
if ( procMask & procInfo.processorTeamMasks[ i ] )
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int countSetBits( ULONG64 bits )
|
||||
{
|
||||
int count = 0;
|
||||
while ( bits )
|
||||
{
|
||||
if ( bits & 1 )
|
||||
{
|
||||
count++;
|
||||
}
|
||||
bits >>= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
|
||||
|
||||
|
||||
void getProcessorInformation( btProcessorInfo* procInfo )
|
||||
{
|
||||
memset( procInfo, 0, sizeof( *procInfo ) );
|
||||
Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
|
||||
(Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
|
||||
if ( getLogicalProcInfo == NULL )
|
||||
{
|
||||
// no info
|
||||
return;
|
||||
}
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
|
||||
DWORD bufSize = 0;
|
||||
while ( true )
|
||||
{
|
||||
if ( getLogicalProcInfo( buf, &bufSize ) )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
|
||||
{
|
||||
if ( buf )
|
||||
{
|
||||
free( buf );
|
||||
}
|
||||
buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int len = bufSize / sizeof( *buf );
|
||||
for ( int i = 0; i < len; ++i )
|
||||
{
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
|
||||
switch ( info->Relationship )
|
||||
{
|
||||
case RelationNumaNode:
|
||||
procInfo->numNumaNodes++;
|
||||
break;
|
||||
|
||||
case RelationProcessorCore:
|
||||
procInfo->numCores++;
|
||||
procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
|
||||
break;
|
||||
|
||||
case RelationCache:
|
||||
if ( info->Cache.Level == 1 )
|
||||
{
|
||||
procInfo->numL1Cache++;
|
||||
}
|
||||
else if ( info->Cache.Level == 2 )
|
||||
{
|
||||
procInfo->numL2Cache++;
|
||||
}
|
||||
else if ( info->Cache.Level == 3 )
|
||||
{
|
||||
procInfo->numL3Cache++;
|
||||
// processors that share L3 cache are considered to be on the same team
|
||||
// because they can more easily work together on the same data.
|
||||
// Large performance penalties will occur if 2 or more threads from different
|
||||
// teams attempt to frequently read and modify the same cache lines.
|
||||
//
|
||||
// On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
|
||||
// 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
|
||||
// CCXs are operating on the same data, many cycles will be spent keeping the
|
||||
// two caches coherent.
|
||||
if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
|
||||
{
|
||||
procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
|
||||
procInfo->numTeamMasks++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RelationProcessorPackage:
|
||||
procInfo->numPhysicalPackages++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free( buf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
|
||||
class btThreadSupportWin32 : public btThreadSupportInterface
|
||||
{
|
||||
public:
|
||||
struct btThreadStatus
|
||||
{
|
||||
int m_taskId;
|
||||
int m_commandId;
|
||||
int m_status;
|
||||
|
||||
ThreadFunc m_userThreadFunc;
|
||||
void* m_userPtr; //for taskDesc etc
|
||||
|
||||
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
|
||||
|
||||
void* m_eventStartHandle;
|
||||
char m_eventStartHandleName[ 32 ];
|
||||
|
||||
void* m_eventCompleteHandle;
|
||||
char m_eventCompleteHandleName[ 32 ];
|
||||
};
|
||||
|
||||
private:
|
||||
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
|
||||
btAlignedObjectArray<void*> m_completeHandles;
|
||||
int m_numThreads;
|
||||
DWORD_PTR m_startedThreadMask;
|
||||
btProcessorInfo m_processorInfo;
|
||||
|
||||
void startThreads( const ConstructionInfo& threadInfo );
|
||||
void stopThreads();
|
||||
int waitForResponse();
|
||||
|
||||
public:
|
||||
|
||||
btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
|
||||
virtual ~btThreadSupportWin32();
|
||||
|
||||
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
|
||||
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
|
||||
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
|
||||
|
||||
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
|
||||
virtual void waitForAllTasks() BT_OVERRIDE;
|
||||
|
||||
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
|
||||
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
|
||||
};
|
||||
|
||||
|
||||
btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
|
||||
{
|
||||
startThreads( threadConstructionInfo );
|
||||
}
|
||||
|
||||
|
||||
btThreadSupportWin32::~btThreadSupportWin32()
|
||||
{
|
||||
stopThreads();
|
||||
}
|
||||
|
||||
|
||||
DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
|
||||
{
|
||||
btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
|
||||
|
||||
while ( 1 )
|
||||
{
|
||||
WaitForSingleObject( status->m_eventStartHandle, INFINITE );
|
||||
void* userPtr = status->m_userPtr;
|
||||
|
||||
if ( userPtr )
|
||||
{
|
||||
btAssert( status->m_status );
|
||||
status->m_userThreadFunc( userPtr );
|
||||
status->m_status = 2;
|
||||
SetEvent( status->m_eventCompleteHandle );
|
||||
}
|
||||
else
|
||||
{
|
||||
//exit Thread
|
||||
status->m_status = 3;
|
||||
printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
|
||||
SetEvent( status->m_eventCompleteHandle );
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf( "Thread TERMINATED\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void btThreadSupportWin32::runTask( int threadIndex, void* userData )
|
||||
{
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
|
||||
btAssert( threadIndex >= 0 );
|
||||
btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
|
||||
|
||||
threadStatus.m_commandId = 1;
|
||||
threadStatus.m_status = 1;
|
||||
threadStatus.m_userPtr = userData;
|
||||
m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
|
||||
|
||||
///fire event to start new task
|
||||
SetEvent( threadStatus.m_eventStartHandle );
|
||||
}
|
||||
|
||||
|
||||
int btThreadSupportWin32::waitForResponse()
|
||||
{
|
||||
btAssert( m_activeThreadStatus.size() );
|
||||
|
||||
int last = -1;
|
||||
DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
|
||||
btAssert( res != WAIT_FAILED );
|
||||
last = res - WAIT_OBJECT_0;
|
||||
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
|
||||
btAssert( threadStatus.m_threadHandle );
|
||||
btAssert( threadStatus.m_eventCompleteHandle );
|
||||
|
||||
//WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
|
||||
btAssert( threadStatus.m_status > 1 );
|
||||
threadStatus.m_status = 0;
|
||||
|
||||
///need to find an active spu
|
||||
btAssert( last >= 0 );
|
||||
m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
|
||||
void btThreadSupportWin32::waitForAllTasks()
|
||||
{
|
||||
while ( m_startedThreadMask )
|
||||
{
|
||||
waitForResponse();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
|
||||
{
|
||||
static int uniqueId = 0;
|
||||
uniqueId++;
|
||||
btProcessorInfo& procInfo = m_processorInfo;
|
||||
getProcessorInformation( &procInfo );
|
||||
DWORD_PTR dwProcessAffinityMask = 0;
|
||||
DWORD_PTR dwSystemAffinityMask = 0;
|
||||
if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
|
||||
{
|
||||
dwProcessAffinityMask = 0;
|
||||
}
|
||||
///The number of threads should be equal to the number of available cores - 1
|
||||
m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
|
||||
|
||||
m_activeThreadStatus.resize( m_numThreads );
|
||||
m_completeHandles.resize( m_numThreads );
|
||||
m_startedThreadMask = 0;
|
||||
|
||||
// set main thread affinity
|
||||
if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
|
||||
{
|
||||
SetThreadAffinityMask( GetCurrentThread(), mask );
|
||||
SetThreadIdealProcessor( GetCurrentThread(), 0 );
|
||||
}
|
||||
|
||||
for ( int i = 0; i < m_numThreads; i++ )
|
||||
{
|
||||
printf( "starting thread %d\n", i );
|
||||
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
|
||||
|
||||
LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
|
||||
SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
|
||||
LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
|
||||
LPVOID lpParameter = &threadStatus;
|
||||
DWORD dwCreationFlags = 0;
|
||||
LPDWORD lpThreadId = 0;
|
||||
|
||||
threadStatus.m_userPtr = 0;
|
||||
|
||||
sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
|
||||
threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
|
||||
|
||||
sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
|
||||
threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
|
||||
|
||||
m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
|
||||
|
||||
HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
|
||||
//SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
|
||||
// highest priority -- can cause erratic performance when numThreads > numCores
|
||||
// we don't want worker threads to be higher priority than the main thread or the main thread could get
|
||||
// totally shut out and unable to tell the workers to stop
|
||||
//SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
|
||||
|
||||
{
|
||||
int processorId = i + 1; // leave processor 0 for main thread
|
||||
DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
|
||||
if ( teamMask )
|
||||
{
|
||||
// bind each thread to only execute on processors of it's assigned team
|
||||
// - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
|
||||
// - for multi-socket Intel this will keep threads from migrating from one socket to another
|
||||
// - for AMD Ryzen this will keep threads from migrating from one CCX to another
|
||||
DWORD_PTR mask = teamMask & dwProcessAffinityMask;
|
||||
if ( mask )
|
||||
{
|
||||
SetThreadAffinityMask( handle, mask );
|
||||
}
|
||||
}
|
||||
SetThreadIdealProcessor( handle, processorId );
|
||||
}
|
||||
|
||||
threadStatus.m_taskId = i;
|
||||
threadStatus.m_commandId = 0;
|
||||
threadStatus.m_status = 0;
|
||||
threadStatus.m_threadHandle = handle;
|
||||
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
|
||||
|
||||
printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
|
||||
}
|
||||
}
|
||||
|
||||
///tell the task scheduler we are done with the SPU tasks
|
||||
void btThreadSupportWin32::stopThreads()
|
||||
{
|
||||
for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
|
||||
{
|
||||
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
|
||||
if ( threadStatus.m_status > 0 )
|
||||
{
|
||||
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
|
||||
}
|
||||
|
||||
threadStatus.m_userPtr = NULL;
|
||||
SetEvent( threadStatus.m_eventStartHandle );
|
||||
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
|
||||
|
||||
CloseHandle( threadStatus.m_eventCompleteHandle );
|
||||
CloseHandle( threadStatus.m_eventStartHandle );
|
||||
CloseHandle( threadStatus.m_threadHandle );
|
||||
|
||||
}
|
||||
|
||||
m_activeThreadStatus.clear();
|
||||
m_completeHandles.clear();
|
||||
}
|
||||
|
||||
|
||||
class btWin32CriticalSection : public btCriticalSection
|
||||
{
|
||||
private:
|
||||
CRITICAL_SECTION mCriticalSection;
|
||||
|
||||
public:
|
||||
btWin32CriticalSection()
|
||||
{
|
||||
InitializeCriticalSection( &mCriticalSection );
|
||||
}
|
||||
|
||||
~btWin32CriticalSection()
|
||||
{
|
||||
DeleteCriticalSection( &mCriticalSection );
|
||||
}
|
||||
|
||||
void lock()
|
||||
{
|
||||
EnterCriticalSection( &mCriticalSection );
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
LeaveCriticalSection( &mCriticalSection );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
btCriticalSection* btThreadSupportWin32::createCriticalSection()
|
||||
{
|
||||
unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
|
||||
btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
|
||||
return cs;
|
||||
}
|
||||
|
||||
void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
|
||||
{
|
||||
criticalSection->~btCriticalSection();
|
||||
btAlignedFree( criticalSection );
|
||||
}
|
||||
|
||||
|
||||
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
|
||||
{
|
||||
return new btThreadSupportWin32( info );
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif //defined(_WIN32) && BT_THREADSAFE
|
||||
|
||||
@@ -453,6 +453,33 @@ void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBod
|
||||
#endif// #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
|
||||
{
|
||||
#if BT_THREADSAFE
|
||||
|
||||
#if BT_DETECT_BAD_THREAD_INDEX
|
||||
if ( !btThreadsAreRunning() )
|
||||
{
|
||||
// clear out thread ids
|
||||
for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
|
||||
{
|
||||
gDebugThreadIds[ i ] = kInvalidThreadId;
|
||||
}
|
||||
}
|
||||
#endif // #if BT_DETECT_BAD_THREAD_INDEX
|
||||
|
||||
btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
|
||||
return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
|
||||
|
||||
#else // #if BT_THREADSAFE
|
||||
|
||||
// non-parallel version of btParallelSum
|
||||
btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
|
||||
return body.sumLoop( iBegin, iEnd );
|
||||
|
||||
#endif //#else // #if BT_THREADSAFE
|
||||
}
|
||||
|
||||
|
||||
///
|
||||
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
|
||||
@@ -470,6 +497,11 @@ public:
|
||||
BT_PROFILE( "parallelFor_sequential" );
|
||||
body.forLoop( iBegin, iEnd );
|
||||
}
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelSum_sequential" );
|
||||
return body.sumLoop( iBegin, iEnd );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -514,11 +546,25 @@ public:
|
||||
#pragma omp parallel for schedule( static, 1 )
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
BT_PROFILE( "OpenMP_job" );
|
||||
BT_PROFILE( "OpenMP_forJob" );
|
||||
body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
|
||||
}
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_OpenMP" );
|
||||
btPushThreadsAreRunning();
|
||||
btScalar sum = btScalar( 0 );
|
||||
#pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
|
||||
for ( int i = iBegin; i < iEnd; i += grainSize )
|
||||
{
|
||||
BT_PROFILE( "OpenMP_sumJob" );
|
||||
sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
|
||||
}
|
||||
btPopThreadsAreRunning();
|
||||
return sum;
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
|
||||
|
||||
@@ -571,22 +617,21 @@ public:
|
||||
btResetThreadIndexCounter();
|
||||
}
|
||||
}
|
||||
struct BodyAdapter
|
||||
struct ForBodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
|
||||
ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
|
||||
void operator()( const tbb::blocked_range<int>& range ) const
|
||||
{
|
||||
BT_PROFILE( "TBB_job" );
|
||||
BT_PROFILE( "TBB_forJob" );
|
||||
mBody->forLoop( range.begin(), range.end() );
|
||||
}
|
||||
};
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelFor_TBB" );
|
||||
// TBB dispatch
|
||||
BodyAdapter tbbBody;
|
||||
tbbBody.mBody = &body;
|
||||
ForBodyAdapter tbbBody( &body );
|
||||
btPushThreadsAreRunning();
|
||||
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
|
||||
tbbBody,
|
||||
@@ -594,6 +639,29 @@ public:
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
struct SumBodyAdapter
|
||||
{
|
||||
const btIParallelSumBody* mBody;
|
||||
btScalar mSum;
|
||||
|
||||
SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
|
||||
SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
|
||||
void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
|
||||
void operator()( const tbb::blocked_range<int>& range )
|
||||
{
|
||||
BT_PROFILE( "TBB_sumJob" );
|
||||
mSum += mBody->sumLoop( range.begin(), range.end() );
|
||||
}
|
||||
};
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelSum_TBB" );
|
||||
SumBodyAdapter tbbBody( &body );
|
||||
btPushThreadsAreRunning();
|
||||
tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
|
||||
btPopThreadsAreRunning();
|
||||
return tbbBody.mSum;
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_TBB && BT_THREADSAFE
|
||||
|
||||
@@ -605,6 +673,7 @@ public:
|
||||
class btTaskSchedulerPPL : public btITaskScheduler
|
||||
{
|
||||
int m_numThreads;
|
||||
concurrency::combinable<btScalar> m_sum; // for parallelSum
|
||||
public:
|
||||
btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
|
||||
{
|
||||
@@ -644,15 +713,16 @@ public:
|
||||
btResetThreadIndexCounter();
|
||||
}
|
||||
}
|
||||
struct BodyAdapter
|
||||
struct ForBodyAdapter
|
||||
{
|
||||
const btIParallelForBody* mBody;
|
||||
int mGrainSize;
|
||||
int mIndexEnd;
|
||||
|
||||
ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
|
||||
void operator()( int i ) const
|
||||
{
|
||||
BT_PROFILE( "PPL_job" );
|
||||
BT_PROFILE( "PPL_forJob" );
|
||||
mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
|
||||
}
|
||||
};
|
||||
@@ -660,10 +730,7 @@ public:
|
||||
{
|
||||
BT_PROFILE( "parallelFor_PPL" );
|
||||
// PPL dispatch
|
||||
BodyAdapter pplBody;
|
||||
pplBody.mBody = &body;
|
||||
pplBody.mGrainSize = grainSize;
|
||||
pplBody.mIndexEnd = iEnd;
|
||||
ForBodyAdapter pplBody( &body, grainSize, iEnd );
|
||||
btPushThreadsAreRunning();
|
||||
// note: MSVC 2010 doesn't support partitioner args, so avoid them
|
||||
concurrency::parallel_for( iBegin,
|
||||
@@ -673,6 +740,36 @@ public:
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
}
|
||||
struct SumBodyAdapter
|
||||
{
|
||||
const btIParallelSumBody* mBody;
|
||||
concurrency::combinable<btScalar>* mSum;
|
||||
int mGrainSize;
|
||||
int mIndexEnd;
|
||||
|
||||
SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
|
||||
void operator()( int i ) const
|
||||
{
|
||||
BT_PROFILE( "PPL_sumJob" );
|
||||
mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
|
||||
}
|
||||
};
|
||||
static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
|
||||
{
|
||||
BT_PROFILE( "parallelSum_PPL" );
|
||||
m_sum.clear();
|
||||
SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
|
||||
btPushThreadsAreRunning();
|
||||
// note: MSVC 2010 doesn't support partitioner args, so avoid them
|
||||
concurrency::parallel_for( iBegin,
|
||||
iEnd,
|
||||
grainSize,
|
||||
pplBody
|
||||
);
|
||||
btPopThreadsAreRunning();
|
||||
return m_sum.combine( sumFunc );
|
||||
}
|
||||
};
|
||||
#endif // #if BT_USE_PPL && BT_THREADSAFE
|
||||
|
||||
|
||||
@@ -107,6 +107,17 @@ public:
|
||||
virtual void forLoop( int iBegin, int iEnd ) const = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// btIParallelSumBody -- subclass this to express work that can be done in parallel
|
||||
// and produces a sum over all loop elements
|
||||
//
|
||||
class btIParallelSumBody
|
||||
{
|
||||
public:
|
||||
virtual ~btIParallelSumBody() {}
|
||||
virtual btScalar sumLoop( int iBegin, int iEnd ) const = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// btITaskScheduler -- subclass this to implement a task scheduler that can dispatch work to
|
||||
// worker threads
|
||||
@@ -122,6 +133,8 @@ public:
|
||||
virtual int getNumThreads() const = 0;
|
||||
virtual void setNumThreads( int numThreads ) = 0;
|
||||
virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) = 0;
|
||||
virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) = 0;
|
||||
virtual void sleepWorkerThreadsHint() {} // hint the task scheduler that we may not be using these threads for a little while
|
||||
|
||||
// internal use only
|
||||
virtual void activate();
|
||||
@@ -143,6 +156,9 @@ btITaskScheduler* btGetTaskScheduler();
|
||||
// get non-threaded task scheduler (always available)
|
||||
btITaskScheduler* btGetSequentialTaskScheduler();
|
||||
|
||||
// create a default task scheduler (Win32 or pthreads based)
|
||||
btITaskScheduler* btCreateDefaultTaskScheduler();
|
||||
|
||||
// get OpenMP task scheduler (if available, otherwise returns null)
|
||||
btITaskScheduler* btGetOpenMPTaskScheduler();
|
||||
|
||||
@@ -156,5 +172,9 @@ btITaskScheduler* btGetPPLTaskScheduler();
|
||||
// (iterations may be done out of order, so no dependencies are allowed)
|
||||
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body );
|
||||
|
||||
// btParallelSum -- call this to dispatch work like a for-loop, returns the sum of all iterations
|
||||
// (iterations may be done out of order, so no dependencies are allowed)
|
||||
btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,5 +9,7 @@
|
||||
}
|
||||
files {
|
||||
"*.cpp",
|
||||
"*.h"
|
||||
"*.h",
|
||||
"TaskScheduler/*.cpp",
|
||||
"TaskScheduler/*.h"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user