From f517b03534ae57fa17b94c86712e43537df78d4f Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Sat, 16 Jun 2018 06:19:49 -0700 Subject: [PATCH] Expose pushProfileTimer / pop ProfileTimer in PhysicsClient API to benchmark Python parts of PyBullet. reduce 'm_cooldownTime' from 1000 microseconds to 100 microseconds (overhead in raycast is too large) If needed, we can expose this cooldown time. Replace malloc by btAlignedObjectArray (going through Bullet's memory allocator) --- build_visual_studio_vr_pybullet_double.bat | 2 +- examples/SharedMemory/PhysicsClient.h | 4 +++ examples/SharedMemory/PhysicsClientC_API.cpp | 15 ++++++++ examples/SharedMemory/PhysicsClientC_API.h | 3 ++ .../PhysicsClientSharedMemory.cpp | 32 ++++++++++++++++- .../SharedMemory/PhysicsClientSharedMemory.h | 2 ++ examples/SharedMemory/PhysicsDirect.cpp | 34 ++++++++++++++++++- examples/SharedMemory/PhysicsDirect.h | 3 ++ .../PhysicsServerCommandProcessor.cpp | 12 ++++--- examples/SharedMemory/SharedMemoryPublic.h | 1 + examples/pybullet/pybullet.c | 9 ++++- .../btSequentialImpulseConstraintSolver.cpp | 3 +- .../TaskScheduler/btTaskScheduler.cpp | 14 ++++---- 13 files changed, 118 insertions(+), 16 deletions(-) diff --git a/build_visual_studio_vr_pybullet_double.bat b/build_visual_studio_vr_pybullet_double.bat index c98d916b2..0211b7489 100644 --- a/build_visual_studio_vr_pybullet_double.bat +++ b/build_visual_studio_vr_pybullet_double.bat @@ -18,7 +18,7 @@ del tmp1234.txt cd build3 -premake4 --double --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010 +premake4 --double --enable_multithreading --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010 #premake4 --serial --audio --double --midi --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010 diff --git a/examples/SharedMemory/PhysicsClient.h b/examples/SharedMemory/PhysicsClient.h index f73715f6d..6d34f8f53 100644 --- a/examples/SharedMemory/PhysicsClient.h +++ b/examples/SharedMemory/PhysicsClient.h @@ -78,6 +78,10 @@ public: virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const = 0; virtual int getNumUserData(int bodyUniqueId, int linkIndex) const = 0; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const = 0; + + virtual void pushProfileTiming(const char* timingName)=0; + virtual void popProfileTiming()=0; + }; diff --git a/examples/SharedMemory/PhysicsClientC_API.cpp b/examples/SharedMemory/PhysicsClientC_API.cpp index b391ab5f2..5e9707094 100644 --- a/examples/SharedMemory/PhysicsClientC_API.cpp +++ b/examples/SharedMemory/PhysicsClientC_API.cpp @@ -4438,6 +4438,21 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsCl return (b3SharedMemoryCommandHandle)command; } +B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName) +{ + PhysicsClient* cl = (PhysicsClient*)physClient; + b3Assert(cl); + cl->pushProfileTiming(timingName); +} + +B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient) +{ + PhysicsClient* cl = (PhysicsClient*)physClient; + b3Assert(cl); + cl->popProfileTiming(); +} + + B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration) { struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle; diff --git a/examples/SharedMemory/PhysicsClientC_API.h b/examples/SharedMemory/PhysicsClientC_API.h index 9e9a9aa49..ee71d8b97 100644 --- a/examples/SharedMemory/PhysicsClientC_API.h +++ b/examples/SharedMemory/PhysicsClientC_API.h @@ -598,6 +598,9 @@ B3_SHARED_API int b3StateLoggingStop(b3SharedMemoryCommandHandle commandHandle, B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsClientHandle physClient, const char* name); B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration); +B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName); +B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient); + B3_SHARED_API void b3SetTimeOut(b3PhysicsClientHandle physClient, double timeOutInSeconds); B3_SHARED_API double b3GetTimeOut(b3PhysicsClientHandle physClient); diff --git a/examples/SharedMemory/PhysicsClientSharedMemory.cpp b/examples/SharedMemory/PhysicsClientSharedMemory.cpp index 6913d1b15..50abe7905 100644 --- a/examples/SharedMemory/PhysicsClientSharedMemory.cpp +++ b/examples/SharedMemory/PhysicsClientSharedMemory.cpp @@ -12,7 +12,7 @@ #include "SharedMemoryBlock.h" #include "BodyJointInfoUtility.h" #include "SharedMemoryUserData.h" - +#include "LinearMath/btQuickprof.h" struct UserDataCache @@ -41,11 +41,16 @@ struct BodyJointInfoCache } }; + + struct PhysicsClientSharedMemoryInternalData { SharedMemoryInterface* m_sharedMemory; bool m_ownsSharedMemory; SharedMemoryBlock* m_testBlock1; + btAlignedObjectArray m_profileTimings; + btAlignedObjectArray m_profileTimingStrings; + btHashMap m_bodyJointMap; btHashMap m_userConstraintInfoMap; @@ -950,6 +955,7 @@ const SharedMemoryStatus* PhysicsClientSharedMemory::processServerStatus() { case CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED: { + B3_PROFILE("m_raycastHits"); if (m_data->m_verboseOutput) { b3Printf("Raycast completed"); @@ -1882,3 +1888,27 @@ void PhysicsClientSharedMemory::getUserDataInfo(int bodyUniqueId, int linkIndex, SharedMemoryUserData *userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex); *keyOut = (userDataPtr)->m_key.c_str(); } + + + + + +void PhysicsClientSharedMemory::pushProfileTiming(const char* timingName) +{ + std::string* str = new std::string(timingName); + m_data->m_profileTimingStrings.push_back(str); + m_data->m_profileTimings.push_back(new CProfileSample(str->c_str())); + +} + + +void PhysicsClientSharedMemory::popProfileTiming() +{ + if (m_data->m_profileTimings.size()) + { + CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1]; + m_data->m_profileTimings.pop_back(); + delete sample; + } +} + diff --git a/examples/SharedMemory/PhysicsClientSharedMemory.h b/examples/SharedMemory/PhysicsClientSharedMemory.h index dadb8acf6..a611e9eb9 100644 --- a/examples/SharedMemory/PhysicsClientSharedMemory.h +++ b/examples/SharedMemory/PhysicsClientSharedMemory.h @@ -89,6 +89,8 @@ public: virtual int getNumUserData(int bodyUniqueId, int linkIndex) const; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const; + virtual void pushProfileTiming(const char* timingName); + virtual void popProfileTiming(); }; #endif // BT_PHYSICS_CLIENT_API_H diff --git a/examples/SharedMemory/PhysicsDirect.cpp b/examples/SharedMemory/PhysicsDirect.cpp index 5c8070687..bebfb158d 100644 --- a/examples/SharedMemory/PhysicsDirect.cpp +++ b/examples/SharedMemory/PhysicsDirect.cpp @@ -14,12 +14,15 @@ #include #include "SharedMemoryUserData.h" +#include "LinearMath/btQuickprof.h" struct UserDataCache { btHashMap m_userDataMap; btHashMap m_keyToUserDataIdMap; - ~UserDataCache() { + ~UserDataCache() + { + } }; @@ -58,6 +61,9 @@ struct PhysicsDirectInternalData btHashMap m_bodyJointMap; btHashMap m_userConstraintInfoMap; + btAlignedObjectArray m_profileTimings; + btAlignedObjectArray m_profileTimingStrings; + char m_bulletStreamDataServerToClient[SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE]; btAlignedObjectArray m_cachedMassMatrix; int m_cachedCameraPixelsWidth; @@ -111,6 +117,12 @@ PhysicsDirect::PhysicsDirect(PhysicsCommandProcessorInterface* physSdk, bool pas PhysicsDirect::~PhysicsDirect() { + for (int i=0;im_profileTimingStrings.size();i++) + { + delete m_data->m_profileTimingStrings[i]; + } + m_data->m_profileTimingStrings.clear(); + if (m_data->m_commandProcessor->isConnected()) { m_data->m_commandProcessor->disconnect(); @@ -1530,3 +1542,23 @@ void PhysicsDirect::getUserDataInfo(int bodyUniqueId, int linkIndex, int userDat SharedMemoryUserData* userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex); *keyOut = (userDataPtr)->m_key.c_str(); } + + + +void PhysicsDirect::pushProfileTiming(const char* timingName) +{ + std::string* str = new std::string(timingName); + m_data->m_profileTimingStrings.push_back(str); + m_data->m_profileTimings.push_back(new CProfileSample(str->c_str())); +} + + +void PhysicsDirect::popProfileTiming() +{ + if (m_data->m_profileTimings.size()) + { + CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1]; + m_data->m_profileTimings.pop_back(); + delete sample; + } +} diff --git a/examples/SharedMemory/PhysicsDirect.h b/examples/SharedMemory/PhysicsDirect.h index 299312199..f12f917a6 100644 --- a/examples/SharedMemory/PhysicsDirect.h +++ b/examples/SharedMemory/PhysicsDirect.h @@ -118,6 +118,9 @@ public: virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const; virtual int getNumUserData(int bodyUniqueId, int linkIndex) const; virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const; + + virtual void pushProfileTiming(const char* timingName); + virtual void popProfileTiming(); }; #endif //PHYSICS_DIRECT_H diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index 0b64cbffc..b39baeed1 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -1780,6 +1780,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor() createEmptyDynamicsWorld(); +#ifdef BT_THREADSAFE if (btGetTaskScheduler() == 0) { btITaskScheduler *scheduler = btCreateDefaultTaskScheduler(); if (scheduler == 0) { @@ -1787,6 +1788,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor() } btSetTaskScheduler(scheduler); } +#endif //BT_THREADSAFE } PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor() @@ -4821,10 +4823,11 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays; const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads; - b3RayData *rayInputBuffer = (b3RayData *)malloc(sizeof(b3RayData) * numRays); - memcpy(rayInputBuffer, bufferServerToClient, sizeof(b3RayData) * numRays); - - BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, rayInputBuffer, (b3RayHitInfo *)bufferServerToClient, numRays); + btAlignedObjectArray rays; + rays.resize(numRays); + memcpy(&rays[0],bufferServerToClient,numRays*sizeof(b3RayData)); + + BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, &rays[0], (b3RayHitInfo *)bufferServerToClient, numRays); if (numThreads == 0) { // When 0 is specified, Bullet can decide how many threads to use. // About 16 rays per thread seems to work reasonably well. @@ -4840,7 +4843,6 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co batchRayCaster.castRays(numThreads); } - free(rayInputBuffer); serverStatusOut.m_raycastHits.m_numRaycastHits = numRays; serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED; return hasStatus; diff --git a/examples/SharedMemory/SharedMemoryPublic.h b/examples/SharedMemory/SharedMemoryPublic.h index 1ee9a9d52..7a434ec48 100644 --- a/examples/SharedMemory/SharedMemoryPublic.h +++ b/examples/SharedMemory/SharedMemoryPublic.h @@ -548,6 +548,7 @@ enum b3StateLoggingType STATE_LOGGING_PROFILE_TIMINGS = 6, STATE_LOGGING_ALL_COMMANDS=7, STATE_REPLAY_ALL_COMMANDS=8, + STATE_LOGGING_CUSTOM_TIMER=9, }; diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index a29201492..b74140751 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -4704,6 +4704,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* } + commandHandle = b3CreateRaycastBatchCommandInit(sm); b3RaycastBatchSetNumThreads(commandHandle, numThreads); @@ -4734,6 +4735,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* Py_DECREF(seqRayToObj); return NULL; } + b3PushProfileTiming(sm, "extractPythonFromToSequenceToC"); for (i = 0; i < lenFrom; i++) { PyObject* rayFromObj = PySequence_GetItem(rayFromObjList,i); @@ -4752,11 +4754,13 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* Py_DECREF(seqRayToObj); Py_DECREF(rayFromObj); Py_DECREF(rayToObj); + b3PopProfileTiming(sm); return NULL; } Py_DECREF(rayFromObj); Py_DECREF(rayToObj); } + b3PopProfileTiming(sm); } } else { @@ -4779,8 +4783,9 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* struct b3RaycastInformation raycastInfo; PyObject* rayHitsObj = 0; int i; + b3PushProfileTiming(sm, "convertRaycastInformationToPython"); b3GetRaycastInformation(sm, &raycastInfo); - + rayHitsObj = PyTuple_New(raycastInfo.m_numRayHits); for (i = 0; i < raycastInfo.m_numRayHits; i++) { @@ -4819,6 +4824,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* } PyTuple_SetItem(rayHitsObj, i, singleHitObj); } + b3PopProfileTiming(sm); return rayHitsObj; } @@ -9587,6 +9593,7 @@ initpybullet(void) PyModule_AddIntConstant(m, "STATE_LOGGING_PROFILE_TIMINGS", STATE_LOGGING_PROFILE_TIMINGS); PyModule_AddIntConstant(m, "STATE_LOGGING_ALL_COMMANDS", STATE_LOGGING_ALL_COMMANDS); PyModule_AddIntConstant(m, "STATE_REPLAY_ALL_COMMANDS", STATE_REPLAY_ALL_COMMANDS); + PyModule_AddIntConstant(m, "STATE_LOGGING_CUSTOM_TIMER", STATE_LOGGING_CUSTOM_TIMER); PyModule_AddIntConstant(m, "COV_ENABLE_GUI", COV_ENABLE_GUI); PyModule_AddIntConstant(m, "COV_ENABLE_SHADOWS", COV_ENABLE_SHADOWS); diff --git a/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp index 9ef7e89d8..692fb8b4d 100644 --- a/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp +++ b/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp @@ -781,7 +781,8 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject& else { // Incorrectly set collision object flags can degrade performance in various ways. - btAssert( body.isStaticOrKinematicObject() ); + //btAssert( body.isStaticOrKinematicObject() ); + //it could be a multibody link collider // all fixed bodies (inf mass) get mapped to a single solver id if ( m_fixedBodyId < 0 ) { diff --git a/src/LinearMath/TaskScheduler/btTaskScheduler.cpp b/src/LinearMath/TaskScheduler/btTaskScheduler.cpp index 02fe07ab1..4643bbee8 100644 --- a/src/LinearMath/TaskScheduler/btTaskScheduler.cpp +++ b/src/LinearMath/TaskScheduler/btTaskScheduler.cpp @@ -412,11 +412,13 @@ static void WorkerThreadFunc( void* userPtr ) } } } - - // go sleep - localStorage->m_mutex.lock(); - localStorage->m_status = WorkerThreadStatus::kSleeping; - localStorage->m_mutex.unlock(); + { + BT_PROFILE("sleep"); + // go sleep + localStorage->m_mutex.lock(); + localStorage->m_status = WorkerThreadStatus::kSleeping; + localStorage->m_mutex.unlock(); + } } @@ -503,7 +505,7 @@ public: storage.m_threadId = i; storage.m_directive = m_workerDirective; storage.m_status = WorkerThreadStatus::kSleeping; - storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do + storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do storage.m_clock = &m_clock; storage.m_queue = m_perThreadJobQueues[i]; }