parallel solver: small tweaks and fixes

This commit is contained in:
Lunkhound
2018-03-06 02:28:23 -08:00
parent 45fd4acf6e
commit eec478709a
4 changed files with 14 additions and 50 deletions

View File

@@ -38,11 +38,10 @@ struct btBatchedConstraintInfo
struct btBatchInfo
{
int phaseId;
int numConstraints;
int mergeIndex;
btBatchInfo(int _phaseId = -1) : numConstraints(0), mergeIndex(-1), phaseId(_phaseId) {}
btBatchInfo() : numConstraints(0), mergeIndex(kNoMerge) {}
};
@@ -728,7 +727,6 @@ struct AssignConstraintsToGridBatchesParams
btIntVec3* bodyGridCoords;
int numBodies;
btBatchedConstraintInfo* conInfos;
char* constraintPhaseIds;
int* constraintBatchIds;
btIntVec3 gridChunkDim;
int maxNumBatchesPerPhase;
@@ -807,7 +805,6 @@ static void assignConstraintsToGridBatches(const AssignConstraintsToGridBatchesP
}
int iBatch = iPhase * params.maxNumBatchesPerPhase + chunkCoord[ 0 ] + chunkCoord[ 1 ] * gridChunkDim[ 0 ] + chunkCoord[ 2 ] * gridChunkDim[ 0 ] * gridChunkDim[ 1 ];
btAssert(iBatch >= 0 && iBatch < params.maxNumBatchesPerPhase*params.numPhases);
params.constraintPhaseIds[ iCon ] = iPhase;
params.constraintBatchIds[ iCon ] = iBatch;
}
}
@@ -834,8 +831,7 @@ struct AssignConstraintsToGridBatchesLoop : public btIParallelForBody
/*
Bodies are treated as 3D points at their center of mass. We only consider dynamic bodies at this stage,
kinematic and static bodies are dealt with at a later stage. Also we only consider constraints that
are between 2 dynamic bodies ("dynamic" constraints) -- constraints that involve a static or kinematic body are handled later
because only dynamic bodies are mutated when a constraint is solved, thus subject to race conditions.
1. Compute a bounding box around all dynamic bodies
2. Compute the maximum extent of all dynamic constraints. Each dynamic constraint is treated as a line segment, and we need the size of
@@ -845,15 +841,16 @@ are between 2 dynamic bodies ("dynamic" constraints) -- constraints that involve
so that no dynamic constraint can span more than 2 cells of our grid on any axis of the grid. The cell size should be adjusted
larger in order to keep the total number of cells from being excessively high
Key idea: Given that each constraint spans 1 or 2 grid cells in each dimension, we can handle all dynamic constraints by processing
Key idea: Given that each constraint spans 1 or 2 grid cells in each dimension, we can handle all constraints by processing
in chunks of 2x2x2 cells with 8 different 1-cell offsets ((0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,0,0)...).
For each of the 8 offsets, we create a phase, and for each 2x2x2 chunk with dynamic constraints becomes a batch in that phase.
Once all of the phases have been populated, if any of the phases end up with too few batches, they could possibly be merged with other phases.
4. Once the grid is established, we can calculate for each constraint which phase and batch it belongs in.
Finally, we handle all of the remaining (non-dynamic) constraints, these can be added to whichever phase is least populated to help
even things out
5. Do a merge small batches on the batches of each phase separately, to try to even out the sizes of batches
Optionally, we can "collapse" one dimension of our 3D grid to turn it into a 2D grid, which reduces the number of phases
to 4. With fewer phases, there are more constraints per phase and this makes it easier to create batches of a useful size.
*/
//
static void setupSpatialGridBatchesMt(
@@ -882,7 +879,6 @@ static void setupSpatialGridBatchesMt(
btBatchInfo* batches = NULL;
int* batchWork = NULL;
btBatchedConstraintInfo* conInfos = NULL;
char* constraintPhaseIds = NULL;
int* constraintBatchIds = NULL;
int* constraintRowBatchIds = NULL;
{
@@ -893,7 +889,6 @@ static void setupSpatialGridBatchesMt(
memHelper.addChunk( (void**) &batches, sizeof( btBatchInfo )* allocNumBatches );
memHelper.addChunk( (void**) &batchWork, sizeof( int )* allocNumBatches );
memHelper.addChunk( (void**) &conInfos, sizeof( btBatchedConstraintInfo ) * numConstraints );
memHelper.addChunk( (void**) &constraintPhaseIds, sizeof( char ) * numConstraints );
memHelper.addChunk( (void**) &constraintBatchIds, sizeof( int ) * numConstraints );
memHelper.addChunk( (void**) &constraintRowBatchIds, sizeof( int ) * numConstraintRows );
size_t scratchSize = memHelper.getSizeToAllocate();
@@ -1010,7 +1005,7 @@ static void setupSpatialGridBatchesMt(
for ( int iBatch = batchBegin; iBatch < batchEnd; ++iBatch )
{
btBatchInfo& batch = batches[ iBatch ];
batch = btBatchInfo( iPhase );
batch = btBatchInfo();
}
}
@@ -1020,7 +1015,6 @@ static void setupSpatialGridBatchesMt(
params.bodyGridCoords = bodyGridCoords;
params.numBodies = bodies.size();
params.conInfos = conInfos;
params.constraintPhaseIds = constraintPhaseIds;
params.constraintBatchIds = constraintBatchIds;
params.gridChunkDim = gridChunkDim;
params.maxNumBatchesPerPhase = maxNumBatchesPerPhase;
@@ -1030,7 +1024,7 @@ static void setupSpatialGridBatchesMt(
if (inParallel)
{
AssignConstraintsToGridBatchesLoop loop(params);
int grainSize = 500;
int grainSize = 250;
btParallelFor(0, numConstraints, grainSize, loop);
}
else

View File

@@ -940,7 +940,7 @@ void btSequentialImpulseConstraintSolverMt::solveGroupCacheFriendlySplitImpulseI
{
int iPhase = batchedCons.m_phaseOrder[ iiPhase ];
const btBatchedConstraints::Range& phase = batchedCons.m_phases[ iPhase ];
int grainSize = 8;
int grainSize = batchedCons.m_phaseGrainSize[iPhase];
leastSquaresResidual += btParallelSum( phase.begin, phase.end, grainSize, loop );
}
}

View File

@@ -17,42 +17,12 @@ typedef void* ( *btThreadLocalStorageFunc )();
///
/// getNumHardwareThreads()
///
///
/// https://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
///
#if __cplusplus >= 201103L
#include <thread>
int getNumHardwareThreads()
{
return std::thread::hardware_concurrency();
}
#elif defined( _WIN32 )
#if defined( _WIN32 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getNumHardwareThreads()
{
// caps out at 32
SYSTEM_INFO info;
GetSystemInfo( &info );
return info.dwNumberOfProcessors;
}
#else
int getNumHardwareThreads()
{
return 0; // don't know
}
#endif
@@ -581,7 +551,6 @@ public:
// put the main thread to work on emptying the job queue and then wait for all workers to finish
waitJobs();
m_antiNestingLock.unlock();
// add up all the thread sums
btScalar sum = btScalar(0);
@@ -589,6 +558,7 @@ public:
{
sum += threadLocalSum[ iThread ].mSum;
}
m_antiNestingLock.unlock();
return sum;
}
else

View File

@@ -267,8 +267,8 @@ DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
void btThreadSupportWin32::runTask( int threadIndex, void* userData )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( taskId >= 0 );
btAssert( int( taskId ) < m_activeThreadStatus.size() );
btAssert( threadIndex >= 0 );
btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;