Files
bullet3/examples/CommonInterfaces/ParallelFor.h
Lunkhound 1c3686ca51 MultiThreaded Demo:
- fixing various race conditions throughout (usage of static vars, etc)
 - addition of a few lightweight mutexes (which are compiled out by default)
 - slight code rearrangement in discreteDynamicsWorld to facilitate multithreading
 - PoolAllocator::allocate() can now be called when pool is full without
     crashing (null pointer returned)
 - PoolAllocator allocate and freeMemory, are OPTIONALLY threadsafe
     (default is un-threadsafe)
 - CollisionDispatcher no longer checks if the pool allocator is full
     before calling allocate(), instead it just calls allocate() and
     checks if the return is null -- this avoids a race condition
 - SequentialImpulseConstraintSolver OPTIONALLY uses different logic in
     getOrInitSolverBody() to avoid a race condition with kinematic bodies
 - addition of 2 classes which together allow simulation islands to be run
   in parallel:
    - btSimulationIslandManagerMt
    - btDiscreteDynamicsWorldMt
 - MultiThreadedDemo example in the example browser demonstrating use of
   OpenMP, Microsoft PPL, and Intel TBB
 - use multithreading for other demos
 - benchmark demo: add parallel raycasting
2016-10-30 12:47:27 -07:00

337 lines
7.5 KiB
C++

/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h> //printf debugging
#include <algorithm>
// choose threading providers:
#if BT_USE_TBB
#define USE_TBB 1 // use Intel Threading Building Blocks for thread management
#endif
#if BT_USE_PPL
#define USE_PPL 1 // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
#endif // BT_USE_PPL
#if BT_USE_OPENMP
#define USE_OPENMP 1 // use OpenMP (also need to change compiler options for OpenMP support)
#endif
#if USE_OPENMP
#include <omp.h>
#endif // #if USE_OPENMP
#if USE_PPL
#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
// Visual Studio 2010 and later should come with it
#include <concrtrm.h> // for GetProcessorCount()
#endif // #if USE_PPL
#if USE_TBB
#define __TBB_NO_IMPLICIT_LINKAGE 1
#include <tbb/tbb.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#endif // #if USE_TBB
class TaskManager
{
public:
enum Api
{
apiNone,
apiOpenMP,
apiTbb,
apiPpl,
apiCount
};
static const char* getApiName( Api api )
{
switch ( api )
{
case apiNone: return "None";
case apiOpenMP: return "OpenMP";
case apiTbb: return "Intel TBB";
case apiPpl: return "MS PPL";
default: return "unknown";
}
}
TaskManager()
{
m_api = apiNone;
m_numThreads = 0;
#if USE_TBB
m_tbbSchedulerInit = NULL;
#endif // #if USE_TBB
}
Api getApi() const
{
return m_api;
}
bool isSupported( Api api ) const
{
#if USE_OPENMP
if ( api == apiOpenMP )
{
return true;
}
#endif
#if USE_TBB
if ( api == apiTbb )
{
return true;
}
#endif
#if USE_PPL
if ( api == apiPpl )
{
return true;
}
#endif
// apiNone is always "supported"
return api == apiNone;
}
void setApi( Api api )
{
if (isSupported(api))
{
m_api = api;
}
else
{
// no compile time support for selected API, fallback to "none"
m_api = apiNone;
}
}
static int getMaxNumThreads()
{
#if USE_OPENMP
return omp_get_max_threads();
#elif USE_PPL
return concurrency::GetProcessorCount();
#elif USE_TBB
return tbb::task_scheduler_init::default_num_threads();
#endif
return 1;
}
int getNumThreads() const
{
return m_numThreads;
}
int setNumThreads( int numThreads )
{
m_numThreads = ( std::max )( 1, numThreads );
#if USE_OPENMP
omp_set_num_threads( m_numThreads );
#endif
#if USE_PPL
{
using namespace concurrency;
if ( CurrentScheduler::Id() != -1 )
{
CurrentScheduler::Detach();
}
SchedulerPolicy policy;
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
CurrentScheduler::Create( policy );
}
#endif
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
#endif
return m_numThreads;
}
void init()
{
if (m_numThreads == 0)
{
#if USE_PPL
setApi( apiPpl );
#endif
#if USE_TBB
setApi( apiTbb );
#endif
#if USE_OPENMP
setApi( apiOpenMP );
#endif
setNumThreads(getMaxNumThreads());
}
else
{
setNumThreads(m_numThreads);
}
}
void shutdown()
{
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
#endif
}
private:
Api m_api;
int m_numThreads;
#if USE_TBB
tbb::task_scheduler_init* m_tbbSchedulerInit;
#endif // #if USE_TBB
};
extern TaskManager gTaskMgr;
static void initTaskScheduler()
{
gTaskMgr.init();
}
static void cleanupTaskScheduler()
{
gTaskMgr.shutdown();
}
#if USE_TBB
///
/// TbbBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a TBB compatible object that takes a tbb::blocked_range<int> type.
///
template <class TBody>
struct TbbBodyAdapter
{
const TBody* mBody;
void operator()( const tbb::blocked_range<int>& range ) const
{
mBody->forLoop( range.begin(), range.end() );
}
};
#endif // #if USE_TBB
#if USE_PPL
///
/// PplBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a PPL compatible object that implements "void operator()( int ) const"
///
template <class TBody>
struct PplBodyAdapter
{
const TBody* mBody;
int mGrainSize;
int mIndexEnd;
void operator()( int i ) const
{
mBody->forLoop( i, (std::min)(i + mGrainSize, mIndexEnd) );
}
};
#endif // #if USE_PPL
///
/// parallelFor -- interface for submitting work expressed as a for loop to the worker threads
///
template <class TBody>
void parallelFor( int iBegin, int iEnd, int grainSize, const TBody& body )
{
#if USE_OPENMP
if ( gTaskMgr.getApi() == TaskManager::apiOpenMP )
{
#pragma omp parallel for schedule(static, 1)
for ( int i = iBegin; i < iEnd; i += grainSize )
{
body.forLoop( i, (std::min)( i + grainSize, iEnd ) );
}
return;
}
#endif // #if USE_OPENMP
#if USE_PPL
if ( gTaskMgr.getApi() == TaskManager::apiPpl )
{
// PPL dispatch
PplBodyAdapter<TBody> pplBody;
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
return;
}
#endif //#if USE_PPL
#if USE_TBB
if ( gTaskMgr.getApi() == TaskManager::apiTbb )
{
// TBB dispatch
TbbBodyAdapter<TBody> tbbBody;
tbbBody.mBody = &body;
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody,
tbb::simple_partitioner()
);
return;
}
#endif // #if USE_TBB
{
// run on main thread
body.forLoop( iBegin, iEnd );
}
}