Code-style consistency improvement:

Apply clang-format-all.sh using the _clang-format file through all the cpp/.h files.
make sure not to apply it to certain serialization structures, since some parser expects the * as part of the name, instead of type.
This commit contains no other changes aside from adding and applying clang-format-all.sh
This commit is contained in:
erwincoumans
2018-09-23 14:17:31 -07:00
parent b73b05e9fb
commit ab8f16961e
1773 changed files with 1081087 additions and 474249 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -16,55 +16,49 @@ subject to the following restrictions:
#ifndef BT_THREAD_SUPPORT_INTERFACE_H
#define BT_THREAD_SUPPORT_INTERFACE_H
class btCriticalSection
{
public:
btCriticalSection() {}
virtual ~btCriticalSection() {}
btCriticalSection() {}
virtual ~btCriticalSection() {}
virtual void lock() = 0;
virtual void unlock() = 0;
virtual void lock() = 0;
virtual void unlock() = 0;
};
class btThreadSupportInterface
{
public:
virtual ~btThreadSupportInterface() {}
virtual ~btThreadSupportInterface() {}
virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
virtual void runTask(int threadIndex, void* userData) = 0;
virtual void waitForAllTasks() = 0;
virtual int getNumWorkerThreads() const = 0; // number of worker threads (total number of logical processors - 1)
virtual int getCacheFriendlyNumThreads() const = 0; // the number of logical processors sharing a single L3 cache
virtual int getLogicalToPhysicalCoreRatio() const = 0; // the number of logical processors per physical processor (usually 1 or 2)
virtual void runTask( int threadIndex, void* userData ) = 0;
virtual void waitForAllTasks() = 0;
virtual btCriticalSection* createCriticalSection() = 0;
virtual void deleteCriticalSection(btCriticalSection* criticalSection) = 0;
virtual btCriticalSection* createCriticalSection() = 0;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) = 0;
typedef void (*ThreadFunc)(void* userPtr);
typedef void( *ThreadFunc )( void* userPtr );
struct ConstructionInfo
{
ConstructionInfo(const char* uniqueName,
ThreadFunc userThreadFunc,
int threadStackSize = 65535)
: m_uniqueName(uniqueName),
m_userThreadFunc(userThreadFunc),
m_threadStackSize(threadStackSize)
{
}
struct ConstructionInfo
{
ConstructionInfo( const char* uniqueName,
ThreadFunc userThreadFunc,
int threadStackSize = 65535
)
:m_uniqueName( uniqueName ),
m_userThreadFunc( userThreadFunc ),
m_threadStackSize( threadStackSize )
{
}
const char* m_uniqueName;
ThreadFunc m_userThreadFunc;
int m_threadStackSize;
};
const char* m_uniqueName;
ThreadFunc m_userThreadFunc;
int m_threadStackSize;
};
static btThreadSupportInterface* create( const ConstructionInfo& info );
static btThreadSupportInterface* create(const ConstructionInfo& info);
};
#endif //BT_THREAD_SUPPORT_INTERFACE_H
#endif //BT_THREAD_SUPPORT_INTERFACE_H

View File

@@ -13,9 +13,7 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#if BT_THREADSAFE && !defined( _WIN32 )
#if BT_THREADSAFE && !defined(_WIN32)
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedObjectArray.h"
@@ -27,14 +25,12 @@ subject to the following restrictions:
#include <errno.h>
#include <unistd.h>
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
#endif //_XOPEN_SOURCE
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
#endif //_XOPEN_SOURCE
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h> //for sysconf
#include <unistd.h> //for sysconf
///
/// getNumHardwareThreads()
@@ -48,318 +44,309 @@ subject to the following restrictions:
int btGetNumHardwareThreads()
{
return btMin<int>(BT_MAX_THREAD_COUNT, std::thread::hardware_concurrency());
return btMin<int>(BT_MAX_THREAD_COUNT, std::thread::hardware_concurrency());
}
#else
int btGetNumHardwareThreads()
{
return btMin<int>(BT_MAX_THREAD_COUNT, sysconf( _SC_NPROCESSORS_ONLN ));
return btMin<int>(BT_MAX_THREAD_COUNT, sysconf(_SC_NPROCESSORS_ONLN));
}
#endif
// btThreadSupportPosix helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportPosix : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
pthread_t thread;
//each tread will wait until this signal to start its work
sem_t* startSemaphore;
pthread_t thread;
//each tread will wait until this signal to start its work
sem_t* startSemaphore;
// this is a copy of m_mainSemaphore,
//each tread will signal once it is finished with its work
sem_t* m_mainSemaphore;
unsigned long threadUsed;
};
// this is a copy of m_mainSemaphore,
//each tread will signal once it is finished with its work
sem_t* m_mainSemaphore;
unsigned long threadUsed;
};
private:
typedef unsigned long long UINT64;
typedef unsigned long long UINT64;
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
// m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
sem_t* m_mainSemaphore;
int m_numThreads;
UINT64 m_startedThreadsMask;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
// m_mainSemaphoresemaphore will signal, if and how many threads are finished with their work
sem_t* m_mainSemaphore;
int m_numThreads;
UINT64 m_startedThreadsMask;
void startThreads(const ConstructionInfo& threadInfo);
void stopThreads();
int waitForResponse();
public:
btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportPosix();
btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo);
virtual ~btThreadSupportPosix();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
// TODO: return the number of logical processors sharing the first L3 cache
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
// TODO: detect if CPU has hyperthreading enabled
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
// TODO: return the number of logical processors sharing the first L3 cache
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return m_numThreads + 1; }
// TODO: detect if CPU has hyperthreading enabled
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return 1; }
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
};
#define checkPThreadFunction(returnValue) \
if(0 != returnValue) { \
printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
}
#define checkPThreadFunction(returnValue) \
if (0 != returnValue) \
{ \
printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
}
// The number of threads should be equal to the number of available cores
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
btThreadSupportPosix::btThreadSupportPosix( const ConstructionInfo& threadConstructionInfo )
btThreadSupportPosix::btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo)
{
startThreads( threadConstructionInfo );
startThreads(threadConstructionInfo);
}
// cleanup/shutdown Libspe2
btThreadSupportPosix::~btThreadSupportPosix()
{
stopThreads();
stopThreads();
}
#if (defined (__APPLE__))
#if (defined(__APPLE__))
#define NAMED_SEMAPHORES
#endif
static sem_t* createSem( const char* baseName )
static sem_t* createSem(const char* baseName)
{
static int semCount = 0;
static int semCount = 0;
#ifdef NAMED_SEMAPHORES
/// Named semaphore begin
char name[ 32 ];
snprintf( name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++ );
sem_t* tempSem = sem_open( name, O_CREAT, 0600, 0 );
/// Named semaphore begin
char name[32];
snprintf(name, 32, "/%8.s-%4.d-%4.4d", baseName, getpid(), semCount++);
sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
if ( tempSem != reinterpret_cast<sem_t *>( SEM_FAILED ) )
{
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
}
else
{
//printf("Error creating Semaphore %d\n", errno);
exit( -1 );
}
/// Named semaphore end
if (tempSem != reinterpret_cast<sem_t*>(SEM_FAILED))
{
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
}
else
{
//printf("Error creating Semaphore %d\n", errno);
exit(-1);
}
/// Named semaphore end
#else
sem_t* tempSem = new sem_t;
checkPThreadFunction( sem_init( tempSem, 0, 0 ) );
sem_t* tempSem = new sem_t;
checkPThreadFunction(sem_init(tempSem, 0, 0));
#endif
return tempSem;
return tempSem;
}
static void destroySem( sem_t* semaphore )
static void destroySem(sem_t* semaphore)
{
#ifdef NAMED_SEMAPHORES
checkPThreadFunction( sem_close( semaphore ) );
checkPThreadFunction(sem_close(semaphore));
#else
checkPThreadFunction( sem_destroy( semaphore ) );
delete semaphore;
checkPThreadFunction(sem_destroy(semaphore));
delete semaphore;
#endif
}
static void *threadFunction( void *argument )
static void* threadFunction(void* argument)
{
btThreadSupportPosix::btThreadStatus* status = ( btThreadSupportPosix::btThreadStatus* )argument;
btThreadSupportPosix::btThreadStatus* status = (btThreadSupportPosix::btThreadStatus*)argument;
while ( 1 )
{
checkPThreadFunction( sem_wait( status->startSemaphore ) );
void* userPtr = status->m_userPtr;
while (1)
{
checkPThreadFunction(sem_wait(status->startSemaphore));
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr );
status->m_status = 2;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
status->threadUsed++;
}
else
{
//exit Thread
status->m_status = 3;
checkPThreadFunction( sem_post( status->m_mainSemaphore ) );
printf( "Thread with taskId %i exiting\n", status->m_taskId );
break;
}
}
if (userPtr)
{
btAssert(status->m_status);
status->m_userThreadFunc(userPtr);
status->m_status = 2;
checkPThreadFunction(sem_post(status->m_mainSemaphore));
status->threadUsed++;
}
else
{
//exit Thread
status->m_status = 3;
checkPThreadFunction(sem_post(status->m_mainSemaphore));
printf("Thread with taskId %i exiting\n", status->m_taskId);
break;
}
}
printf( "Thread TERMINATED\n" );
return 0;
printf("Thread TERMINATED\n");
return 0;
}
///send messages to SPUs
void btThreadSupportPosix::runTask( int threadIndex, void* userData )
void btThreadSupportPosix::runTask(int threadIndex, void* userData)
{
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( threadIndex >= 0 );
btAssert( threadIndex < m_activeThreadStatus.size() );
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
btAssert(threadIndex >= 0);
btAssert(threadIndex < m_activeThreadStatus.size());
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadsMask |= UINT64( 1 ) << threadIndex;
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadsMask |= UINT64(1) << threadIndex;
// fire event to start new task
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
// fire event to start new task
checkPThreadFunction(sem_post(threadStatus.startSemaphore));
}
///check for messages from SPUs
int btThreadSupportPosix::waitForResponse()
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert( m_activeThreadStatus.size() );
btAssert(m_activeThreadStatus.size());
// wait for any of the threads to finish
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
// get at least one thread which has finished
size_t last = -1;
// wait for any of the threads to finish
checkPThreadFunction(sem_wait(m_mainSemaphore));
// get at least one thread which has finished
size_t last = -1;
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
if ( 2 == m_activeThreadStatus[ t ].m_status )
{
last = t;
break;
}
}
for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t)
{
if (2 == m_activeThreadStatus[t].m_status)
{
last = t;
break;
}
}
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btThreadStatus& threadStatus = m_activeThreadStatus[last];
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
btAssert(threadStatus.m_status > 1);
threadStatus.m_status = 0;
// need to find an active spu
btAssert( last >= 0 );
m_startedThreadsMask &= ~( UINT64( 1 ) << last );
// need to find an active spu
btAssert(last >= 0);
m_startedThreadsMask &= ~(UINT64(1) << last);
return last;
return last;
}
void btThreadSupportPosix::waitForAllTasks()
{
while ( m_startedThreadsMask )
{
waitForResponse();
}
while (m_startedThreadsMask)
{
waitForResponse();
}
}
void btThreadSupportPosix::startThreads( const ConstructionInfo& threadConstructionInfo )
void btThreadSupportPosix::startThreads(const ConstructionInfo& threadConstructionInfo)
{
m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
printf( "%s creating %i threads.\n", __FUNCTION__, m_numThreads );
m_activeThreadStatus.resize( m_numThreads );
m_startedThreadsMask = 0;
m_numThreads = btGetNumHardwareThreads() - 1; // main thread exists already
printf("%s creating %i threads.\n", __FUNCTION__, m_numThreads);
m_activeThreadStatus.resize(m_numThreads);
m_startedThreadsMask = 0;
m_mainSemaphore = createSem( "main" );
//checkPThreadFunction(sem_wait(mainSemaphore));
m_mainSemaphore = createSem("main");
//checkPThreadFunction(sem_wait(mainSemaphore));
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
threadStatus.startSemaphore = createSem( "threadLocal" );
checkPThreadFunction( pthread_create( &threadStatus.thread, NULL, &threadFunction, (void*) &threadStatus ) );
for (int i = 0; i < m_numThreads; i++)
{
printf("starting thread %d\n", i);
btThreadStatus& threadStatus = m_activeThreadStatus[i];
threadStatus.startSemaphore = createSem("threadLocal");
checkPThreadFunction(pthread_create(&threadStatus.thread, NULL, &threadFunction, (void*)&threadStatus));
threadStatus.m_userPtr = 0;
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_mainSemaphore = m_mainSemaphore;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
threadStatus.threadUsed = 0;
threadStatus.m_userPtr = 0;
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_mainSemaphore = m_mainSemaphore;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
threadStatus.threadUsed = 0;
printf( "started thread %d \n", i );
}
printf("started thread %d \n", i);
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportPosix::stopThreads()
{
for ( size_t t = 0; t < size_t( m_activeThreadStatus.size() ); ++t )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ t ];
printf( "%s: Thread %i used: %ld\n", __FUNCTION__, int( t ), threadStatus.threadUsed );
for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t)
{
btThreadStatus& threadStatus = m_activeThreadStatus[t];
printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), threadStatus.threadUsed);
threadStatus.m_userPtr = 0;
checkPThreadFunction( sem_post( threadStatus.startSemaphore ) );
checkPThreadFunction( sem_wait( m_mainSemaphore ) );
threadStatus.m_userPtr = 0;
checkPThreadFunction(sem_post(threadStatus.startSemaphore));
checkPThreadFunction(sem_wait(m_mainSemaphore));
printf( "destroy semaphore\n" );
destroySem( threadStatus.startSemaphore );
printf( "semaphore destroyed\n" );
checkPThreadFunction( pthread_join( threadStatus.thread, 0 ) );
}
printf( "destroy main semaphore\n" );
destroySem( m_mainSemaphore );
printf( "main semaphore destroyed\n" );
m_activeThreadStatus.clear();
printf("destroy semaphore\n");
destroySem(threadStatus.startSemaphore);
printf("semaphore destroyed\n");
checkPThreadFunction(pthread_join(threadStatus.thread, 0));
}
printf("destroy main semaphore\n");
destroySem(m_mainSemaphore);
printf("main semaphore destroyed\n");
m_activeThreadStatus.clear();
}
class btCriticalSectionPosix : public btCriticalSection
{
pthread_mutex_t m_mutex;
pthread_mutex_t m_mutex;
public:
btCriticalSectionPosix()
{
pthread_mutex_init( &m_mutex, NULL );
}
virtual ~btCriticalSectionPosix()
{
pthread_mutex_destroy( &m_mutex );
}
btCriticalSectionPosix()
{
pthread_mutex_init(&m_mutex, NULL);
}
virtual ~btCriticalSectionPosix()
{
pthread_mutex_destroy(&m_mutex);
}
virtual void lock()
{
pthread_mutex_lock( &m_mutex );
}
virtual void unlock()
{
pthread_mutex_unlock( &m_mutex );
}
virtual void lock()
{
pthread_mutex_lock(&m_mutex);
}
virtual void unlock()
{
pthread_mutex_unlock(&m_mutex);
}
};
btCriticalSection* btThreadSupportPosix::createCriticalSection()
{
return new btCriticalSectionPosix();
return new btCriticalSectionPosix();
}
void btThreadSupportPosix::deleteCriticalSection( btCriticalSection* cs )
void btThreadSupportPosix::deleteCriticalSection(btCriticalSection* cs)
{
delete cs;
delete cs;
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
{
return new btThreadSupportPosix( info );
return new btThreadSupportPosix(info);
}
#endif // BT_THREADSAFE && !defined( _WIN32 )
#endif // BT_THREADSAFE && !defined( _WIN32 )

View File

@@ -13,7 +13,7 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#if defined( _WIN32 ) && BT_THREADSAFE
#if defined(_WIN32) && BT_THREADSAFE
#include "LinearMath/btScalar.h"
#include "LinearMath/btMinMax.h"
@@ -23,450 +23,430 @@ subject to the following restrictions:
#include <windows.h>
#include <stdio.h>
struct btProcessorInfo
{
int numLogicalProcessors;
int numCores;
int numNumaNodes;
int numL1Cache;
int numL2Cache;
int numL3Cache;
int numPhysicalPackages;
static const int maxNumTeamMasks = 32;
int numTeamMasks;
UINT64 processorTeamMasks[ maxNumTeamMasks ];
int numLogicalProcessors;
int numCores;
int numNumaNodes;
int numL1Cache;
int numL2Cache;
int numL3Cache;
int numPhysicalPackages;
static const int maxNumTeamMasks = 32;
int numTeamMasks;
UINT64 processorTeamMasks[maxNumTeamMasks];
};
UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId)
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return procInfo.processorTeamMasks[ i ];
}
}
return 0;
UINT64 procMask = UINT64(1) << procId;
for (int i = 0; i < procInfo.numTeamMasks; ++i)
{
if (procMask & procInfo.processorTeamMasks[i])
{
return procInfo.processorTeamMasks[i];
}
}
return 0;
}
int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId)
{
UINT64 procMask = UINT64( 1 ) << procId;
for ( int i = 0; i < procInfo.numTeamMasks; ++i )
{
if ( procMask & procInfo.processorTeamMasks[ i ] )
{
return i;
}
}
return -1;
UINT64 procMask = UINT64(1) << procId;
for (int i = 0; i < procInfo.numTeamMasks; ++i)
{
if (procMask & procInfo.processorTeamMasks[i])
{
return i;
}
}
return -1;
}
int countSetBits( ULONG64 bits )
int countSetBits(ULONG64 bits)
{
int count = 0;
while ( bits )
{
if ( bits & 1 )
{
count++;
}
bits >>= 1;
}
return count;
int count = 0;
while (bits)
{
if (bits & 1)
{
count++;
}
bits >>= 1;
}
return count;
}
typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
void getProcessorInformation( btProcessorInfo* procInfo )
void getProcessorInformation(btProcessorInfo* procInfo)
{
memset( procInfo, 0, sizeof( *procInfo ) );
Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
(Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
if ( getLogicalProcInfo == NULL )
{
// no info
return;
}
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
DWORD bufSize = 0;
while ( true )
{
if ( getLogicalProcInfo( buf, &bufSize ) )
{
break;
}
else
{
if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
{
if ( buf )
{
free( buf );
}
buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
}
}
}
memset(procInfo, 0, sizeof(*procInfo));
Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
(Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
if (getLogicalProcInfo == NULL)
{
// no info
return;
}
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
DWORD bufSize = 0;
while (true)
{
if (getLogicalProcInfo(buf, &bufSize))
{
break;
}
else
{
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
{
if (buf)
{
free(buf);
}
buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize);
}
}
}
int len = bufSize / sizeof( *buf );
for ( int i = 0; i < len; ++i )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
switch ( info->Relationship )
{
case RelationNumaNode:
procInfo->numNumaNodes++;
break;
int len = bufSize / sizeof(*buf);
for (int i = 0; i < len; ++i)
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
switch (info->Relationship)
{
case RelationNumaNode:
procInfo->numNumaNodes++;
break;
case RelationProcessorCore:
procInfo->numCores++;
procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
break;
case RelationProcessorCore:
procInfo->numCores++;
procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
break;
case RelationCache:
if ( info->Cache.Level == 1 )
{
procInfo->numL1Cache++;
}
else if ( info->Cache.Level == 2 )
{
procInfo->numL2Cache++;
}
else if ( info->Cache.Level == 3 )
{
procInfo->numL3Cache++;
// processors that share L3 cache are considered to be on the same team
// because they can more easily work together on the same data.
// Large performance penalties will occur if 2 or more threads from different
// teams attempt to frequently read and modify the same cache lines.
//
// On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
// 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
// CCXs are operating on the same data, many cycles will be spent keeping the
// two caches coherent.
if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
{
procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
procInfo->numTeamMasks++;
}
}
break;
case RelationCache:
if (info->Cache.Level == 1)
{
procInfo->numL1Cache++;
}
else if (info->Cache.Level == 2)
{
procInfo->numL2Cache++;
}
else if (info->Cache.Level == 3)
{
procInfo->numL3Cache++;
// processors that share L3 cache are considered to be on the same team
// because they can more easily work together on the same data.
// Large performance penalties will occur if 2 or more threads from different
// teams attempt to frequently read and modify the same cache lines.
//
// On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
// 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
// CCXs are operating on the same data, many cycles will be spent keeping the
// two caches coherent.
if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
{
procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
procInfo->numTeamMasks++;
}
}
break;
case RelationProcessorPackage:
procInfo->numPhysicalPackages++;
break;
}
}
free( buf );
case RelationProcessorPackage:
procInfo->numPhysicalPackages++;
break;
}
}
free(buf);
}
///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class btThreadSupportWin32 : public btThreadSupportInterface
{
public:
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
struct btThreadStatus
{
int m_taskId;
int m_commandId;
int m_status;
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
void* m_eventStartHandle;
char m_eventStartHandleName[ 32 ];
void* m_eventStartHandle;
char m_eventStartHandleName[32];
void* m_eventCompleteHandle;
char m_eventCompleteHandleName[ 32 ];
};
void* m_eventCompleteHandle;
char m_eventCompleteHandleName[32];
};
private:
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
btAlignedObjectArray<void*> m_completeHandles;
int m_numThreads;
DWORD_PTR m_startedThreadMask;
btProcessorInfo m_processorInfo;
btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
btAlignedObjectArray<void*> m_completeHandles;
int m_numThreads;
DWORD_PTR m_startedThreadMask;
btProcessorInfo m_processorInfo;
void startThreads( const ConstructionInfo& threadInfo );
void stopThreads();
int waitForResponse();
void startThreads(const ConstructionInfo& threadInfo);
void stopThreads();
int waitForResponse();
public:
btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
virtual ~btThreadSupportWin32();
btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
virtual ~btThreadSupportWin32();
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
virtual void waitForAllTasks() BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
};
btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo)
{
startThreads( threadConstructionInfo );
startThreads(threadConstructionInfo);
}
btThreadSupportWin32::~btThreadSupportWin32()
{
stopThreads();
stopThreads();
}
DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
DWORD WINAPI win32threadStartFunc(LPVOID lpParam)
{
btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
while ( 1 )
{
WaitForSingleObject( status->m_eventStartHandle, INFINITE );
void* userPtr = status->m_userPtr;
while (1)
{
WaitForSingleObject(status->m_eventStartHandle, INFINITE);
void* userPtr = status->m_userPtr;
if ( userPtr )
{
btAssert( status->m_status );
status->m_userThreadFunc( userPtr );
status->m_status = 2;
SetEvent( status->m_eventCompleteHandle );
}
else
{
//exit Thread
status->m_status = 3;
printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
SetEvent( status->m_eventCompleteHandle );
break;
}
}
printf( "Thread TERMINATED\n" );
return 0;
if (userPtr)
{
btAssert(status->m_status);
status->m_userThreadFunc(userPtr);
status->m_status = 2;
SetEvent(status->m_eventCompleteHandle);
}
else
{
//exit Thread
status->m_status = 3;
printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
SetEvent(status->m_eventCompleteHandle);
break;
}
}
printf("Thread TERMINATED\n");
return 0;
}
void btThreadSupportWin32::runTask( int threadIndex, void* userData )
void btThreadSupportWin32::runTask(int threadIndex, void* userData)
{
btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
btAssert( threadIndex >= 0 );
btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
btAssert(threadIndex >= 0);
btAssert(int(threadIndex) < m_activeThreadStatus.size());
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
threadStatus.m_commandId = 1;
threadStatus.m_status = 1;
threadStatus.m_userPtr = userData;
m_startedThreadMask |= DWORD_PTR(1) << threadIndex;
///fire event to start new task
SetEvent( threadStatus.m_eventStartHandle );
///fire event to start new task
SetEvent(threadStatus.m_eventStartHandle);
}
int btThreadSupportWin32::waitForResponse()
{
btAssert( m_activeThreadStatus.size() );
btAssert(m_activeThreadStatus.size());
int last = -1;
DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
btAssert( res != WAIT_FAILED );
last = res - WAIT_OBJECT_0;
int last = -1;
DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
btAssert(res != WAIT_FAILED);
last = res - WAIT_OBJECT_0;
btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
btAssert( threadStatus.m_threadHandle );
btAssert( threadStatus.m_eventCompleteHandle );
btThreadStatus& threadStatus = m_activeThreadStatus[last];
btAssert(threadStatus.m_threadHandle);
btAssert(threadStatus.m_eventCompleteHandle);
//WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
btAssert( threadStatus.m_status > 1 );
threadStatus.m_status = 0;
//WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
btAssert(threadStatus.m_status > 1);
threadStatus.m_status = 0;
///need to find an active spu
btAssert( last >= 0 );
m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
///need to find an active spu
btAssert(last >= 0);
m_startedThreadMask &= ~(DWORD_PTR(1) << last);
return last;
return last;
}
void btThreadSupportWin32::waitForAllTasks()
{
while ( m_startedThreadMask )
{
waitForResponse();
}
while (m_startedThreadMask)
{
waitForResponse();
}
}
void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
{
static int uniqueId = 0;
uniqueId++;
btProcessorInfo& procInfo = m_processorInfo;
getProcessorInformation( &procInfo );
DWORD_PTR dwProcessAffinityMask = 0;
DWORD_PTR dwSystemAffinityMask = 0;
if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
{
dwProcessAffinityMask = 0;
}
///The number of threads should be equal to the number of available cores - 1
m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
static int uniqueId = 0;
uniqueId++;
btProcessorInfo& procInfo = m_processorInfo;
getProcessorInformation(&procInfo);
DWORD_PTR dwProcessAffinityMask = 0;
DWORD_PTR dwSystemAffinityMask = 0;
if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask))
{
dwProcessAffinityMask = 0;
}
///The number of threads should be equal to the number of available cores - 1
m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
m_activeThreadStatus.resize( m_numThreads );
m_completeHandles.resize( m_numThreads );
m_startedThreadMask = 0;
m_activeThreadStatus.resize(m_numThreads);
m_completeHandles.resize(m_numThreads);
m_startedThreadMask = 0;
// set main thread affinity
if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
{
SetThreadAffinityMask( GetCurrentThread(), mask );
SetThreadIdealProcessor( GetCurrentThread(), 0 );
}
// set main thread affinity
if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0))
{
SetThreadAffinityMask(GetCurrentThread(), mask);
SetThreadIdealProcessor(GetCurrentThread(), 0);
}
for ( int i = 0; i < m_numThreads; i++ )
{
printf( "starting thread %d\n", i );
for (int i = 0; i < m_numThreads; i++)
{
printf("starting thread %d\n", i);
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
btThreadStatus& threadStatus = m_activeThreadStatus[i];
LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
LPVOID lpParameter = &threadStatus;
DWORD dwCreationFlags = 0;
LPDWORD lpThreadId = 0;
LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
LPVOID lpParameter = &threadStatus;
DWORD dwCreationFlags = 0;
LPDWORD lpThreadId = 0;
threadStatus.m_userPtr = 0;
threadStatus.m_userPtr = 0;
sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
//SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
// highest priority -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
//SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId);
//SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
// highest priority -- can cause erratic performance when numThreads > numCores
// we don't want worker threads to be higher priority than the main thread or the main thread could get
// totally shut out and unable to tell the workers to stop
//SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
{
int processorId = i + 1; // leave processor 0 for main thread
DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
if ( teamMask )
{
// bind each thread to only execute on processors of it's assigned team
// - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
// - for multi-socket Intel this will keep threads from migrating from one socket to another
// - for AMD Ryzen this will keep threads from migrating from one CCX to another
DWORD_PTR mask = teamMask & dwProcessAffinityMask;
if ( mask )
{
SetThreadAffinityMask( handle, mask );
}
}
SetThreadIdealProcessor( handle, processorId );
}
{
int processorId = i + 1; // leave processor 0 for main thread
DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId);
if (teamMask)
{
// bind each thread to only execute on processors of it's assigned team
// - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
// - for multi-socket Intel this will keep threads from migrating from one socket to another
// - for AMD Ryzen this will keep threads from migrating from one CCX to another
DWORD_PTR mask = teamMask & dwProcessAffinityMask;
if (mask)
{
SetThreadAffinityMask(handle, mask);
}
}
SetThreadIdealProcessor(handle, processorId);
}
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_threadHandle = handle;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
threadStatus.m_taskId = i;
threadStatus.m_commandId = 0;
threadStatus.m_status = 0;
threadStatus.m_threadHandle = handle;
threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
}
printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
}
}
///tell the task scheduler we are done with the SPU tasks
void btThreadSupportWin32::stopThreads()
{
for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
{
btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
if ( threadStatus.m_status > 0 )
{
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
}
for (int i = 0; i < m_activeThreadStatus.size(); i++)
{
btThreadStatus& threadStatus = m_activeThreadStatus[i];
if (threadStatus.m_status > 0)
{
WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
}
threadStatus.m_userPtr = NULL;
SetEvent( threadStatus.m_eventStartHandle );
WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
threadStatus.m_userPtr = NULL;
SetEvent(threadStatus.m_eventStartHandle);
WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
CloseHandle( threadStatus.m_eventCompleteHandle );
CloseHandle( threadStatus.m_eventStartHandle );
CloseHandle( threadStatus.m_threadHandle );
CloseHandle(threadStatus.m_eventCompleteHandle);
CloseHandle(threadStatus.m_eventStartHandle);
CloseHandle(threadStatus.m_threadHandle);
}
}
m_activeThreadStatus.clear();
m_completeHandles.clear();
m_activeThreadStatus.clear();
m_completeHandles.clear();
}
class btWin32CriticalSection : public btCriticalSection
{
private:
CRITICAL_SECTION mCriticalSection;
CRITICAL_SECTION mCriticalSection;
public:
btWin32CriticalSection()
{
InitializeCriticalSection( &mCriticalSection );
}
btWin32CriticalSection()
{
InitializeCriticalSection(&mCriticalSection);
}
~btWin32CriticalSection()
{
DeleteCriticalSection( &mCriticalSection );
}
~btWin32CriticalSection()
{
DeleteCriticalSection(&mCriticalSection);
}
void lock()
{
EnterCriticalSection( &mCriticalSection );
}
void lock()
{
EnterCriticalSection(&mCriticalSection);
}
void unlock()
{
LeaveCriticalSection( &mCriticalSection );
}
void unlock()
{
LeaveCriticalSection(&mCriticalSection);
}
};
btCriticalSection* btThreadSupportWin32::createCriticalSection()
{
unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
return cs;
unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
btWin32CriticalSection* cs = new (mem) btWin32CriticalSection();
return cs;
}
void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
{
criticalSection->~btCriticalSection();
btAlignedFree( criticalSection );
criticalSection->~btCriticalSection();
btAlignedFree(criticalSection);
}
btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
{
return new btThreadSupportWin32( info );
return new btThreadSupportWin32(info);
}
#endif //defined(_WIN32) && BT_THREADSAFE
#endif //defined(_WIN32) && BT_THREADSAFE