Expose pushProfileTimer / pop ProfileTimer in PhysicsClient API to benchmark Python parts of PyBullet.

reduce 'm_cooldownTime' from 1000 microseconds to 100 microseconds (overhead in raycast is too large)
If needed, we can expose this cooldown time.
Replace malloc by btAlignedObjectArray (going through Bullet's memory allocator)
This commit is contained in:
erwincoumans
2018-06-16 06:19:49 -07:00
parent bb8cbcdaae
commit f517b03534
13 changed files with 118 additions and 16 deletions

View File

@@ -18,7 +18,7 @@ del tmp1234.txt
cd build3 cd build3
premake4 --double --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010 premake4 --double --enable_multithreading --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
#premake4 --serial --audio --double --midi --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010 #premake4 --serial --audio --double --midi --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010

View File

@@ -78,6 +78,10 @@ public:
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const = 0; virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const = 0;
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const = 0; virtual int getNumUserData(int bodyUniqueId, int linkIndex) const = 0;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const = 0; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const = 0;
virtual void pushProfileTiming(const char* timingName)=0;
virtual void popProfileTiming()=0;
}; };

View File

@@ -4438,6 +4438,21 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsCl
return (b3SharedMemoryCommandHandle)command; return (b3SharedMemoryCommandHandle)command;
} }
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName)
{
PhysicsClient* cl = (PhysicsClient*)physClient;
b3Assert(cl);
cl->pushProfileTiming(timingName);
}
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient)
{
PhysicsClient* cl = (PhysicsClient*)physClient;
b3Assert(cl);
cl->popProfileTiming();
}
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration) B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration)
{ {
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle; struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle;

View File

@@ -598,6 +598,9 @@ B3_SHARED_API int b3StateLoggingStop(b3SharedMemoryCommandHandle commandHandle,
B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsClientHandle physClient, const char* name); B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsClientHandle physClient, const char* name);
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration); B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration);
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName);
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient);
B3_SHARED_API void b3SetTimeOut(b3PhysicsClientHandle physClient, double timeOutInSeconds); B3_SHARED_API void b3SetTimeOut(b3PhysicsClientHandle physClient, double timeOutInSeconds);
B3_SHARED_API double b3GetTimeOut(b3PhysicsClientHandle physClient); B3_SHARED_API double b3GetTimeOut(b3PhysicsClientHandle physClient);

View File

@@ -12,7 +12,7 @@
#include "SharedMemoryBlock.h" #include "SharedMemoryBlock.h"
#include "BodyJointInfoUtility.h" #include "BodyJointInfoUtility.h"
#include "SharedMemoryUserData.h" #include "SharedMemoryUserData.h"
#include "LinearMath/btQuickprof.h"
struct UserDataCache struct UserDataCache
@@ -41,11 +41,16 @@ struct BodyJointInfoCache
} }
}; };
struct PhysicsClientSharedMemoryInternalData { struct PhysicsClientSharedMemoryInternalData {
SharedMemoryInterface* m_sharedMemory; SharedMemoryInterface* m_sharedMemory;
bool m_ownsSharedMemory; bool m_ownsSharedMemory;
SharedMemoryBlock* m_testBlock1; SharedMemoryBlock* m_testBlock1;
btAlignedObjectArray<CProfileSample* > m_profileTimings;
btAlignedObjectArray<std::string* > m_profileTimingStrings;
btHashMap<btHashInt,BodyJointInfoCache*> m_bodyJointMap; btHashMap<btHashInt,BodyJointInfoCache*> m_bodyJointMap;
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap; btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
@@ -950,6 +955,7 @@ const SharedMemoryStatus* PhysicsClientSharedMemory::processServerStatus() {
case CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED: case CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED:
{ {
B3_PROFILE("m_raycastHits");
if (m_data->m_verboseOutput) if (m_data->m_verboseOutput)
{ {
b3Printf("Raycast completed"); b3Printf("Raycast completed");
@@ -1882,3 +1888,27 @@ void PhysicsClientSharedMemory::getUserDataInfo(int bodyUniqueId, int linkIndex,
SharedMemoryUserData *userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex); SharedMemoryUserData *userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
*keyOut = (userDataPtr)->m_key.c_str(); *keyOut = (userDataPtr)->m_key.c_str();
} }
void PhysicsClientSharedMemory::pushProfileTiming(const char* timingName)
{
std::string* str = new std::string(timingName);
m_data->m_profileTimingStrings.push_back(str);
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
}
void PhysicsClientSharedMemory::popProfileTiming()
{
if (m_data->m_profileTimings.size())
{
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
m_data->m_profileTimings.pop_back();
delete sample;
}
}

View File

@@ -89,6 +89,8 @@ public:
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const; virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
virtual void pushProfileTiming(const char* timingName);
virtual void popProfileTiming();
}; };
#endif // BT_PHYSICS_CLIENT_API_H #endif // BT_PHYSICS_CLIENT_API_H

View File

@@ -14,12 +14,15 @@
#include <string> #include <string>
#include "SharedMemoryUserData.h" #include "SharedMemoryUserData.h"
#include "LinearMath/btQuickprof.h"
struct UserDataCache { struct UserDataCache {
btHashMap<btHashInt, SharedMemoryUserData> m_userDataMap; btHashMap<btHashInt, SharedMemoryUserData> m_userDataMap;
btHashMap<btHashString, int> m_keyToUserDataIdMap; btHashMap<btHashString, int> m_keyToUserDataIdMap;
~UserDataCache() { ~UserDataCache()
{
} }
}; };
@@ -58,6 +61,9 @@ struct PhysicsDirectInternalData
btHashMap<btHashInt,BodyJointInfoCache2*> m_bodyJointMap; btHashMap<btHashInt,BodyJointInfoCache2*> m_bodyJointMap;
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap; btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
btAlignedObjectArray<CProfileSample* > m_profileTimings;
btAlignedObjectArray<std::string* > m_profileTimingStrings;
char m_bulletStreamDataServerToClient[SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE]; char m_bulletStreamDataServerToClient[SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE];
btAlignedObjectArray<double> m_cachedMassMatrix; btAlignedObjectArray<double> m_cachedMassMatrix;
int m_cachedCameraPixelsWidth; int m_cachedCameraPixelsWidth;
@@ -111,6 +117,12 @@ PhysicsDirect::PhysicsDirect(PhysicsCommandProcessorInterface* physSdk, bool pas
PhysicsDirect::~PhysicsDirect() PhysicsDirect::~PhysicsDirect()
{ {
for (int i=0;i<m_data->m_profileTimingStrings.size();i++)
{
delete m_data->m_profileTimingStrings[i];
}
m_data->m_profileTimingStrings.clear();
if (m_data->m_commandProcessor->isConnected()) if (m_data->m_commandProcessor->isConnected())
{ {
m_data->m_commandProcessor->disconnect(); m_data->m_commandProcessor->disconnect();
@@ -1530,3 +1542,23 @@ void PhysicsDirect::getUserDataInfo(int bodyUniqueId, int linkIndex, int userDat
SharedMemoryUserData* userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex); SharedMemoryUserData* userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
*keyOut = (userDataPtr)->m_key.c_str(); *keyOut = (userDataPtr)->m_key.c_str();
} }
void PhysicsDirect::pushProfileTiming(const char* timingName)
{
std::string* str = new std::string(timingName);
m_data->m_profileTimingStrings.push_back(str);
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
}
void PhysicsDirect::popProfileTiming()
{
if (m_data->m_profileTimings.size())
{
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
m_data->m_profileTimings.pop_back();
delete sample;
}
}

View File

@@ -118,6 +118,9 @@ public:
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const; virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const;
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const; virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
virtual void pushProfileTiming(const char* timingName);
virtual void popProfileTiming();
}; };
#endif //PHYSICS_DIRECT_H #endif //PHYSICS_DIRECT_H

View File

@@ -1780,6 +1780,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
createEmptyDynamicsWorld(); createEmptyDynamicsWorld();
#ifdef BT_THREADSAFE
if (btGetTaskScheduler() == 0) { if (btGetTaskScheduler() == 0) {
btITaskScheduler *scheduler = btCreateDefaultTaskScheduler(); btITaskScheduler *scheduler = btCreateDefaultTaskScheduler();
if (scheduler == 0) { if (scheduler == 0) {
@@ -1787,6 +1788,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
} }
btSetTaskScheduler(scheduler); btSetTaskScheduler(scheduler);
} }
#endif //BT_THREADSAFE
} }
PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor() PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor()
@@ -4821,10 +4823,11 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays; const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays;
const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads; const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads;
b3RayData *rayInputBuffer = (b3RayData *)malloc(sizeof(b3RayData) * numRays); btAlignedObjectArray<b3RayData> rays;
memcpy(rayInputBuffer, bufferServerToClient, sizeof(b3RayData) * numRays); rays.resize(numRays);
memcpy(&rays[0],bufferServerToClient,numRays*sizeof(b3RayData));
BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, rayInputBuffer, (b3RayHitInfo *)bufferServerToClient, numRays); BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, &rays[0], (b3RayHitInfo *)bufferServerToClient, numRays);
if (numThreads == 0) { if (numThreads == 0) {
// When 0 is specified, Bullet can decide how many threads to use. // When 0 is specified, Bullet can decide how many threads to use.
// About 16 rays per thread seems to work reasonably well. // About 16 rays per thread seems to work reasonably well.
@@ -4840,7 +4843,6 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
batchRayCaster.castRays(numThreads); batchRayCaster.castRays(numThreads);
} }
free(rayInputBuffer);
serverStatusOut.m_raycastHits.m_numRaycastHits = numRays; serverStatusOut.m_raycastHits.m_numRaycastHits = numRays;
serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED; serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED;
return hasStatus; return hasStatus;

View File

@@ -548,6 +548,7 @@ enum b3StateLoggingType
STATE_LOGGING_PROFILE_TIMINGS = 6, STATE_LOGGING_PROFILE_TIMINGS = 6,
STATE_LOGGING_ALL_COMMANDS=7, STATE_LOGGING_ALL_COMMANDS=7,
STATE_REPLAY_ALL_COMMANDS=8, STATE_REPLAY_ALL_COMMANDS=8,
STATE_LOGGING_CUSTOM_TIMER=9,
}; };

View File

@@ -4704,6 +4704,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
} }
commandHandle = b3CreateRaycastBatchCommandInit(sm); commandHandle = b3CreateRaycastBatchCommandInit(sm);
b3RaycastBatchSetNumThreads(commandHandle, numThreads); b3RaycastBatchSetNumThreads(commandHandle, numThreads);
@@ -4734,6 +4735,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
Py_DECREF(seqRayToObj); Py_DECREF(seqRayToObj);
return NULL; return NULL;
} }
b3PushProfileTiming(sm, "extractPythonFromToSequenceToC");
for (i = 0; i < lenFrom; i++) for (i = 0; i < lenFrom; i++)
{ {
PyObject* rayFromObj = PySequence_GetItem(rayFromObjList,i); PyObject* rayFromObj = PySequence_GetItem(rayFromObjList,i);
@@ -4752,11 +4754,13 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
Py_DECREF(seqRayToObj); Py_DECREF(seqRayToObj);
Py_DECREF(rayFromObj); Py_DECREF(rayFromObj);
Py_DECREF(rayToObj); Py_DECREF(rayToObj);
b3PopProfileTiming(sm);
return NULL; return NULL;
} }
Py_DECREF(rayFromObj); Py_DECREF(rayFromObj);
Py_DECREF(rayToObj); Py_DECREF(rayToObj);
} }
b3PopProfileTiming(sm);
} }
} else } else
{ {
@@ -4779,6 +4783,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
struct b3RaycastInformation raycastInfo; struct b3RaycastInformation raycastInfo;
PyObject* rayHitsObj = 0; PyObject* rayHitsObj = 0;
int i; int i;
b3PushProfileTiming(sm, "convertRaycastInformationToPython");
b3GetRaycastInformation(sm, &raycastInfo); b3GetRaycastInformation(sm, &raycastInfo);
rayHitsObj = PyTuple_New(raycastInfo.m_numRayHits); rayHitsObj = PyTuple_New(raycastInfo.m_numRayHits);
@@ -4819,6 +4824,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
} }
PyTuple_SetItem(rayHitsObj, i, singleHitObj); PyTuple_SetItem(rayHitsObj, i, singleHitObj);
} }
b3PopProfileTiming(sm);
return rayHitsObj; return rayHitsObj;
} }
@@ -9587,6 +9593,7 @@ initpybullet(void)
PyModule_AddIntConstant(m, "STATE_LOGGING_PROFILE_TIMINGS", STATE_LOGGING_PROFILE_TIMINGS); PyModule_AddIntConstant(m, "STATE_LOGGING_PROFILE_TIMINGS", STATE_LOGGING_PROFILE_TIMINGS);
PyModule_AddIntConstant(m, "STATE_LOGGING_ALL_COMMANDS", STATE_LOGGING_ALL_COMMANDS); PyModule_AddIntConstant(m, "STATE_LOGGING_ALL_COMMANDS", STATE_LOGGING_ALL_COMMANDS);
PyModule_AddIntConstant(m, "STATE_REPLAY_ALL_COMMANDS", STATE_REPLAY_ALL_COMMANDS); PyModule_AddIntConstant(m, "STATE_REPLAY_ALL_COMMANDS", STATE_REPLAY_ALL_COMMANDS);
PyModule_AddIntConstant(m, "STATE_LOGGING_CUSTOM_TIMER", STATE_LOGGING_CUSTOM_TIMER);
PyModule_AddIntConstant(m, "COV_ENABLE_GUI", COV_ENABLE_GUI); PyModule_AddIntConstant(m, "COV_ENABLE_GUI", COV_ENABLE_GUI);
PyModule_AddIntConstant(m, "COV_ENABLE_SHADOWS", COV_ENABLE_SHADOWS); PyModule_AddIntConstant(m, "COV_ENABLE_SHADOWS", COV_ENABLE_SHADOWS);

View File

@@ -781,7 +781,8 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
else else
{ {
// Incorrectly set collision object flags can degrade performance in various ways. // Incorrectly set collision object flags can degrade performance in various ways.
btAssert( body.isStaticOrKinematicObject() ); //btAssert( body.isStaticOrKinematicObject() );
//it could be a multibody link collider
// all fixed bodies (inf mass) get mapped to a single solver id // all fixed bodies (inf mass) get mapped to a single solver id
if ( m_fixedBodyId < 0 ) if ( m_fixedBodyId < 0 )
{ {

View File

@@ -412,11 +412,13 @@ static void WorkerThreadFunc( void* userPtr )
} }
} }
} }
{
BT_PROFILE("sleep");
// go sleep // go sleep
localStorage->m_mutex.lock(); localStorage->m_mutex.lock();
localStorage->m_status = WorkerThreadStatus::kSleeping; localStorage->m_status = WorkerThreadStatus::kSleeping;
localStorage->m_mutex.unlock(); localStorage->m_mutex.unlock();
}
} }
@@ -503,7 +505,7 @@ public:
storage.m_threadId = i; storage.m_threadId = i;
storage.m_directive = m_workerDirective; storage.m_directive = m_workerDirective;
storage.m_status = WorkerThreadStatus::kSleeping; storage.m_status = WorkerThreadStatus::kSleeping;
storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do
storage.m_clock = &m_clock; storage.m_clock = &m_clock;
storage.m_queue = m_perThreadJobQueues[i]; storage.m_queue = m_perThreadJobQueues[i];
} }