parallel solver: various changes
- threading: adding btSequentialImpulseConstraintSolverMt - task scheduler: added parallelSum so that parallel solver can compute residuals - CommonRigidBodyMTBase: add slider for solver least squares residual and allow multithreading without needing OpenMP, TBB, or PPL - taskScheduler: don't wait for workers to sleep/signal at the end of each parallel block - parallel solver: convertContacts split into an allocContactConstraints and setupContactConstraints stage, the latter of which is done in parallel - parallel solver: rolling friction is now interleaved along with normal friction - parallel solver: batchified split impulse solving + some cleanup - parallel solver: sorting batches from largest to smallest - parallel solver: added parallel batch creation - parallel solver: added warmstartingWriteBackContacts func + other cleanup - task scheduler: truncate low bits to preserve determinism with parallelSum - parallel solver: reducing dynamic mem allocs and trying to parallelize more of the batch setup - parallel solver: parallelize updating constraint batch ids for merging - parallel solver: adding debug visualization - task scheduler: make TBB task scheduler parallelSum deterministic - parallel solver: split batch gen code into separate file; allow selection of batch gen method - task scheduler: add sleepWorkerThreadsHint() at end of simulation - parallel solver: added grain size per phase - task Scheduler: fix for strange threading issue; also no need for main thread to wait for workers to sleep - base constraint solver: break out joint setup into separate function for profiling/overriding - parallel solver: allow different batching method for contacts vs joints - base constraint solver: add convertJoint and convertBodies to make it possible to parallelize joint and body conversion - parallel solver: convert joints and bodies in parallel now - parallel solver: speed up batch creation with run-length encoding - parallel solver: batch gen: run-length expansion in parallel; collect constraint info in parallel - parallel solver: adding spatial grid batching method - parallel solver: enhancements to spatial grid batching - sequential solver: moving code for writing back into functions that derived classes can call - parallel solver: do write back of bodies and joints in parallel - parallel solver: removed all batching methods except for spatial grid (others were ineffective) - parallel solver: added 2D or 3D grid batching options; and a bit of cleanup - move btDefaultTaskScheduler into LinearMath project
This commit is contained in:
@@ -325,3 +325,14 @@ void btDiscreteDynamicsWorldMt::integrateTransforms( btScalar timeStep )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int btDiscreteDynamicsWorldMt::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep )
|
||||
{
|
||||
int numSubSteps = btDiscreteDynamicsWorld::stepSimulation(timeStep, maxSubSteps, fixedTimeStep);
|
||||
if (btITaskScheduler* scheduler = btGetTaskScheduler())
|
||||
{
|
||||
// tell Bullet's threads to sleep, so other threads can run
|
||||
scheduler->sleepWorkerThreadsHint();
|
||||
}
|
||||
return numSubSteps;
|
||||
}
|
||||
|
||||
@@ -129,6 +129,8 @@ public:
|
||||
btCollisionConfiguration* collisionConfiguration
|
||||
);
|
||||
virtual ~btDiscreteDynamicsWorldMt();
|
||||
|
||||
virtual int stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep ) BT_OVERRIDE;
|
||||
};
|
||||
|
||||
#endif //BT_DISCRETE_DYNAMICS_WORLD_H
|
||||
|
||||
@@ -22,6 +22,7 @@ subject to the following restrictions:
|
||||
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
|
||||
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
|
||||
#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolverMt.h" // for s_minimumContactManifoldsForBatching
|
||||
|
||||
//#include <stdio.h>
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
@@ -589,14 +590,52 @@ struct UpdateIslandDispatcher : public btIParallelForBody
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void btSimulationIslandManagerMt::parallelIslandDispatch( btAlignedObjectArray<Island*>* islandsPtr, IslandCallback* callback )
|
||||
{
|
||||
BT_PROFILE( "parallelIslandDispatch" );
|
||||
int grainSize = 1; // iterations per task
|
||||
//
|
||||
// if there are islands with many contacts, it may be faster to submit these
|
||||
// large islands *serially* to a single parallel constraint solver, and then later
|
||||
// submit the remaining smaller islands in parallel to multiple sequential solvers.
|
||||
//
|
||||
// Some task schedulers do not deal well with nested parallelFor loops. One implementation
|
||||
// of OpenMP was actually slower than doing everything single-threaded. Intel TBB
|
||||
// on the other hand, seems to do a pretty respectable job with it.
|
||||
//
|
||||
// When solving islands in parallel, the worst case performance happens when there
|
||||
// is one very large island and then perhaps a smattering of very small
|
||||
// islands -- one worker thread takes the large island and the remaining workers
|
||||
// tear through the smaller islands and then sit idle waiting for the first worker
|
||||
// to finish. Solving islands in parallel works best when there are numerous small
|
||||
// islands, roughly equal in size.
|
||||
//
|
||||
// By contrast, the other approach -- the parallel constraint solver -- is only
|
||||
// able to deliver a worthwhile speedup when the island is large. For smaller islands,
|
||||
// it is difficult to extract a useful amount of parallelism -- the overhead of grouping
|
||||
// the constraints into batches and sending the batches to worker threads can nullify
|
||||
// any gains from parallelism.
|
||||
//
|
||||
|
||||
UpdateIslandDispatcher dispatcher;
|
||||
dispatcher.islandsPtr = islandsPtr;
|
||||
dispatcher.callback = callback;
|
||||
btParallelFor( 0, islandsPtr->size(), grainSize, dispatcher );
|
||||
// We take advantage of the fact the islands are sorted in order of decreasing size
|
||||
int iBegin = 0;
|
||||
while (iBegin < islandsPtr->size())
|
||||
{
|
||||
btSimulationIslandManagerMt::Island* island = (*islandsPtr)[ iBegin ];
|
||||
if (island->manifoldArray.size() < btSequentialImpulseConstraintSolverMt::s_minimumContactManifoldsForBatching)
|
||||
{
|
||||
// OK to submit the rest of the array in parallel
|
||||
break;
|
||||
}
|
||||
++iBegin;
|
||||
}
|
||||
// serial dispatch for large islands (if any)
|
||||
dispatcher.forLoop(0, iBegin);
|
||||
// parallel dispatch for rest
|
||||
btParallelFor( iBegin, islandsPtr->size(), 1, dispatcher );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -106,5 +106,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
extern int gLargeIslandManifoldCount;
|
||||
|
||||
#endif //BT_SIMULATION_ISLAND_MANAGER_H
|
||||
|
||||
|
||||
Reference in New Issue
Block a user