parallel solver: various changes

- threading: adding btSequentialImpulseConstraintSolverMt
 - task scheduler: added parallelSum so that parallel solver can compute residuals
 - CommonRigidBodyMTBase: add slider for solver least squares residual and allow multithreading without needing OpenMP, TBB, or PPL
 - taskScheduler: don't wait for workers to sleep/signal at the end of each parallel block
 - parallel solver: convertContacts split into an allocContactConstraints and setupContactConstraints stage, the latter of which is done in parallel
 - parallel solver: rolling friction is now interleaved along with normal friction
 - parallel solver: batchified split impulse solving + some cleanup
 - parallel solver: sorting batches from largest to smallest
 - parallel solver: added parallel batch creation
 - parallel solver: added warmstartingWriteBackContacts func + other cleanup
 - task scheduler: truncate low bits to preserve determinism with parallelSum
 - parallel solver: reducing dynamic mem allocs and trying to parallelize more of the batch setup
 - parallel solver: parallelize updating constraint batch ids for merging
 - parallel solver: adding debug visualization
 - task scheduler: make TBB task scheduler parallelSum deterministic
 - parallel solver: split batch gen code into separate file; allow selection of batch gen method
 - task scheduler: add sleepWorkerThreadsHint() at end of simulation
 - parallel solver: added grain size per phase
 - task Scheduler: fix for strange threading issue; also no need for main thread to wait for workers to sleep
 - base constraint solver: break out joint setup into separate function for profiling/overriding
 - parallel solver: allow different batching method for contacts vs joints
 - base constraint solver: add convertJoint and convertBodies to make it possible to parallelize joint and body conversion
 - parallel solver: convert joints and bodies in parallel now
 - parallel solver: speed up batch creation with run-length encoding
 - parallel solver: batch gen: run-length expansion in parallel; collect constraint info in parallel
 - parallel solver: adding spatial grid batching method
 - parallel solver: enhancements to spatial grid batching
 - sequential solver: moving code for writing back into functions that derived classes can call
 - parallel solver: do write back of bodies and joints in parallel
 - parallel solver: removed all batching methods except for spatial grid (others were ineffective)
 - parallel solver: added 2D or 3D grid batching options; and a bit of cleanup
 - move btDefaultTaskScheduler into LinearMath project
This commit is contained in:
Lunkhound
2017-06-04 17:57:25 -07:00
parent 94bc897067
commit b8720f2161
25 changed files with 5236 additions and 767 deletions

View File

@@ -325,3 +325,14 @@ void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
}
}
int btDiscreteDynamicsWorldMt::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep )
{
int numSubSteps = btDiscreteDynamicsWorld::stepSimulation(timeStep, maxSubSteps, fixedTimeStep);
if (btITaskScheduler* scheduler = btGetTaskScheduler())
{
// tell Bullet's threads to sleep, so other threads can run
scheduler->sleepWorkerThreadsHint();
}
return numSubSteps;
}

View File

@@ -129,6 +129,8 @@ public:
btCollisionConfiguration* collisionConfiguration
);
virtual ~btDiscreteDynamicsWorldMt();
virtual int stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep ) BT_OVERRIDE;
};
#endif //BT_DISCRETE_DYNAMICS_WORLD_H

View File

@@ -22,6 +22,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h" // for s_minimumContactManifoldsForBatching
//#include <stdio.h>
#include "LinearMath/btQuickprof.h"
@@ -589,14 +590,52 @@ struct UpdateIslandDispatcher : public btIParallelForBody
}
};
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
{
BT_PROFILE( "parallelIslandDispatch" );
int grainSize = 1; // iterations per task
//
// if there are islands with many contacts, it may be faster to submit these
// large islands *serially* to a single parallel constraint solver, and then later
// submit the remaining smaller islands in parallel to multiple sequential solvers.
//
// Some task schedulers do not deal well with nested parallelFor loops. One implementation
// of OpenMP was actually slower than doing everything single-threaded. Intel TBB
// on the other hand, seems to do a pretty respectable job with it.
//
// When solving islands in parallel, the worst case performance happens when there
// is one very large island and then perhaps a smattering of very small
// islands -- one worker thread takes the large island and the remaining workers
// tear through the smaller islands and then sit idle waiting for the first worker
// to finish. Solving islands in parallel works best when there are numerous small
// islands, roughly equal in size.
//
// By contrast, the other approach -- the parallel constraint solver -- is only
// able to deliver a worthwhile speedup when the island is large. For smaller islands,
// it is difficult to extract a useful amount of parallelism -- the overhead of grouping
// the constraints into batches and sending the batches to worker threads can nullify
// any gains from parallelism.
//
UpdateIslandDispatcher dispatcher;
dispatcher.islandsPtr = islandsPtr;
dispatcher.callback = callback;
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
// We take advantage of the fact the islands are sorted in order of decreasing size
int iBegin = 0;
while (iBegin < islandsPtr->size())
{
btSimulationIslandManagerMt::Island* island = (*islandsPtr)[ iBegin ];
if (island->manifoldArray.size() < btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching)
{
// OK to submit the rest of the array in parallel
break;
}
++iBegin;
}
// serial dispatch for large islands (if any)
dispatcher.forLoop(0, iBegin);
// parallel dispatch for rest
btParallelFor( iBegin, islandsPtr->size(), 1, dispatcher );
}

View File

@@ -106,5 +106,7 @@ public:
}
};
extern int gLargeIslandManifoldCount;
#endif //BT_SIMULATION_ISLAND_MANAGER_H