Expose pushProfileTimer / pop ProfileTimer in PhysicsClient API to benchmark Python parts of PyBullet.

reduce 'm_cooldownTime' from 1000 microseconds to 100 microseconds (overhead in raycast is too large)
If needed, we can expose this cooldown time.
Replace malloc by btAlignedObjectArray (going through Bullet's memory allocator)
This commit is contained in:
erwincoumans
2018-06-16 06:19:49 -07:00
parent bb8cbcdaae
commit f517b03534
13 changed files with 118 additions and 16 deletions

View File

@@ -18,7 +18,7 @@ del tmp1234.txt
cd build3
premake4 --double --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
premake4 --double --enable_multithreading --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
#premake4 --serial --audio --double --midi --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010

View File

@@ -78,6 +78,10 @@ public:
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const = 0;
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const = 0;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const = 0;
virtual void pushProfileTiming(const char* timingName)=0;
virtual void popProfileTiming()=0;
};

View File

@@ -4438,6 +4438,21 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsCl
return (b3SharedMemoryCommandHandle)command;
}
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName)
{
PhysicsClient* cl = (PhysicsClient*)physClient;
b3Assert(cl);
cl->pushProfileTiming(timingName);
}
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient)
{
PhysicsClient* cl = (PhysicsClient*)physClient;
b3Assert(cl);
cl->popProfileTiming();
}
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration)
{
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle;

View File

@@ -598,6 +598,9 @@ B3_SHARED_API int b3StateLoggingStop(b3SharedMemoryCommandHandle commandHandle,
B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsClientHandle physClient, const char* name);
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration);
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName);
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient);
B3_SHARED_API void b3SetTimeOut(b3PhysicsClientHandle physClient, double timeOutInSeconds);
B3_SHARED_API double b3GetTimeOut(b3PhysicsClientHandle physClient);

View File

@@ -12,7 +12,7 @@
#include "SharedMemoryBlock.h"
#include "BodyJointInfoUtility.h"
#include "SharedMemoryUserData.h"
#include "LinearMath/btQuickprof.h"
struct UserDataCache
@@ -41,11 +41,16 @@ struct BodyJointInfoCache
}
};
struct PhysicsClientSharedMemoryInternalData {
SharedMemoryInterface* m_sharedMemory;
bool m_ownsSharedMemory;
SharedMemoryBlock* m_testBlock1;
btAlignedObjectArray<CProfileSample* > m_profileTimings;
btAlignedObjectArray<std::string* > m_profileTimingStrings;
btHashMap<btHashInt,BodyJointInfoCache*> m_bodyJointMap;
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
@@ -950,6 +955,7 @@ const SharedMemoryStatus* PhysicsClientSharedMemory::processServerStatus() {
case CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED:
{
B3_PROFILE("m_raycastHits");
if (m_data->m_verboseOutput)
{
b3Printf("Raycast completed");
@@ -1882,3 +1888,27 @@ void PhysicsClientSharedMemory::getUserDataInfo(int bodyUniqueId, int linkIndex,
SharedMemoryUserData *userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
*keyOut = (userDataPtr)->m_key.c_str();
}
void PhysicsClientSharedMemory::pushProfileTiming(const char* timingName)
{
std::string* str = new std::string(timingName);
m_data->m_profileTimingStrings.push_back(str);
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
}
void PhysicsClientSharedMemory::popProfileTiming()
{
if (m_data->m_profileTimings.size())
{
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
m_data->m_profileTimings.pop_back();
delete sample;
}
}

View File

@@ -89,6 +89,8 @@ public:
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
virtual void pushProfileTiming(const char* timingName);
virtual void popProfileTiming();
};
#endif // BT_PHYSICS_CLIENT_API_H

View File

@@ -14,12 +14,15 @@
#include <string>
#include "SharedMemoryUserData.h"
#include "LinearMath/btQuickprof.h"
struct UserDataCache {
btHashMap<btHashInt, SharedMemoryUserData> m_userDataMap;
btHashMap<btHashString, int> m_keyToUserDataIdMap;
~UserDataCache() {
~UserDataCache()
{
}
};
@@ -58,6 +61,9 @@ struct PhysicsDirectInternalData
btHashMap<btHashInt,BodyJointInfoCache2*> m_bodyJointMap;
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
btAlignedObjectArray<CProfileSample* > m_profileTimings;
btAlignedObjectArray<std::string* > m_profileTimingStrings;
char m_bulletStreamDataServerToClient[SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE];
btAlignedObjectArray<double> m_cachedMassMatrix;
int m_cachedCameraPixelsWidth;
@@ -111,6 +117,12 @@ PhysicsDirect::PhysicsDirect(PhysicsCommandProcessorInterface* physSdk, bool pas
PhysicsDirect::~PhysicsDirect()
{
for (int i=0;i<m_data->m_profileTimingStrings.size();i++)
{
delete m_data->m_profileTimingStrings[i];
}
m_data->m_profileTimingStrings.clear();
if (m_data->m_commandProcessor->isConnected())
{
m_data->m_commandProcessor->disconnect();
@@ -1530,3 +1542,23 @@ void PhysicsDirect::getUserDataInfo(int bodyUniqueId, int linkIndex, int userDat
SharedMemoryUserData* userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
*keyOut = (userDataPtr)->m_key.c_str();
}
void PhysicsDirect::pushProfileTiming(const char* timingName)
{
std::string* str = new std::string(timingName);
m_data->m_profileTimingStrings.push_back(str);
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
}
void PhysicsDirect::popProfileTiming()
{
if (m_data->m_profileTimings.size())
{
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
m_data->m_profileTimings.pop_back();
delete sample;
}
}

View File

@@ -118,6 +118,9 @@ public:
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const;
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
virtual void pushProfileTiming(const char* timingName);
virtual void popProfileTiming();
};
#endif //PHYSICS_DIRECT_H

View File

@@ -1780,6 +1780,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
createEmptyDynamicsWorld();
#ifdef BT_THREADSAFE
if (btGetTaskScheduler() == 0) {
btITaskScheduler *scheduler = btCreateDefaultTaskScheduler();
if (scheduler == 0) {
@@ -1787,6 +1788,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
}
btSetTaskScheduler(scheduler);
}
#endif //BT_THREADSAFE
}
PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor()
@@ -4821,10 +4823,11 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays;
const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads;
b3RayData *rayInputBuffer = (b3RayData *)malloc(sizeof(b3RayData) * numRays);
memcpy(rayInputBuffer, bufferServerToClient, sizeof(b3RayData) * numRays);
BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, rayInputBuffer, (b3RayHitInfo *)bufferServerToClient, numRays);
btAlignedObjectArray<b3RayData> rays;
rays.resize(numRays);
memcpy(&rays[0],bufferServerToClient,numRays*sizeof(b3RayData));
BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, &rays[0], (b3RayHitInfo *)bufferServerToClient, numRays);
if (numThreads == 0) {
// When 0 is specified, Bullet can decide how many threads to use.
// About 16 rays per thread seems to work reasonably well.
@@ -4840,7 +4843,6 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
batchRayCaster.castRays(numThreads);
}
free(rayInputBuffer);
serverStatusOut.m_raycastHits.m_numRaycastHits = numRays;
serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED;
return hasStatus;

View File

@@ -548,6 +548,7 @@ enum b3StateLoggingType
STATE_LOGGING_PROFILE_TIMINGS = 6,
STATE_LOGGING_ALL_COMMANDS=7,
STATE_REPLAY_ALL_COMMANDS=8,
STATE_LOGGING_CUSTOM_TIMER=9,
};

View File

@@ -4704,6 +4704,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
}
commandHandle = b3CreateRaycastBatchCommandInit(sm);
b3RaycastBatchSetNumThreads(commandHandle, numThreads);
@@ -4734,6 +4735,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
Py_DECREF(seqRayToObj);
return NULL;
}
b3PushProfileTiming(sm, "extractPythonFromToSequenceToC");
for (i = 0; i < lenFrom; i++)
{
PyObject* rayFromObj = PySequence_GetItem(rayFromObjList,i);
@@ -4752,11 +4754,13 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
Py_DECREF(seqRayToObj);
Py_DECREF(rayFromObj);
Py_DECREF(rayToObj);
b3PopProfileTiming(sm);
return NULL;
}
Py_DECREF(rayFromObj);
Py_DECREF(rayToObj);
}
b3PopProfileTiming(sm);
}
} else
{
@@ -4779,8 +4783,9 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
struct b3RaycastInformation raycastInfo;
PyObject* rayHitsObj = 0;
int i;
b3PushProfileTiming(sm, "convertRaycastInformationToPython");
b3GetRaycastInformation(sm, &raycastInfo);
rayHitsObj = PyTuple_New(raycastInfo.m_numRayHits);
for (i = 0; i < raycastInfo.m_numRayHits; i++)
{
@@ -4819,6 +4824,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
}
PyTuple_SetItem(rayHitsObj, i, singleHitObj);
}
b3PopProfileTiming(sm);
return rayHitsObj;
}
@@ -9587,6 +9593,7 @@ initpybullet(void)
PyModule_AddIntConstant(m, "STATE_LOGGING_PROFILE_TIMINGS", STATE_LOGGING_PROFILE_TIMINGS);
PyModule_AddIntConstant(m, "STATE_LOGGING_ALL_COMMANDS", STATE_LOGGING_ALL_COMMANDS);
PyModule_AddIntConstant(m, "STATE_REPLAY_ALL_COMMANDS", STATE_REPLAY_ALL_COMMANDS);
PyModule_AddIntConstant(m, "STATE_LOGGING_CUSTOM_TIMER", STATE_LOGGING_CUSTOM_TIMER);
PyModule_AddIntConstant(m, "COV_ENABLE_GUI", COV_ENABLE_GUI);
PyModule_AddIntConstant(m, "COV_ENABLE_SHADOWS", COV_ENABLE_SHADOWS);

View File

@@ -781,7 +781,8 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
else
{
// Incorrectly set collision object flags can degrade performance in various ways.
btAssert( body.isStaticOrKinematicObject() );
//btAssert( body.isStaticOrKinematicObject() );
//it could be a multibody link collider
// all fixed bodies (inf mass) get mapped to a single solver id
if ( m_fixedBodyId < 0 )
{

View File

@@ -412,11 +412,13 @@ static void WorkerThreadFunc( void* userPtr )
}
}
}
// go sleep
localStorage->m_mutex.lock();
localStorage->m_status = WorkerThreadStatus::kSleeping;
localStorage->m_mutex.unlock();
{
BT_PROFILE("sleep");
// go sleep
localStorage->m_mutex.lock();
localStorage->m_status = WorkerThreadStatus::kSleeping;
localStorage->m_mutex.unlock();
}
}
@@ -503,7 +505,7 @@ public:
storage.m_threadId = i;
storage.m_directive = m_workerDirective;
storage.m_status = WorkerThreadStatus::kSleeping;
storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do
storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do
storage.m_clock = &m_clock;
storage.m_queue = m_perThreadJobQueues[i];
}