Expose pushProfileTimer / pop ProfileTimer in PhysicsClient API to benchmark Python parts of PyBullet.
reduce 'm_cooldownTime' from 1000 microseconds to 100 microseconds (overhead in raycast is too large) If needed, we can expose this cooldown time. Replace malloc by btAlignedObjectArray (going through Bullet's memory allocator)
This commit is contained in:
@@ -18,7 +18,7 @@ del tmp1234.txt
|
||||
cd build3
|
||||
|
||||
|
||||
premake4 --double --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
|
||||
premake4 --double --enable_multithreading --midi --enable_static_vr_plugin --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
|
||||
|
||||
#premake4 --serial --audio --double --midi --enable_openvr --enable_pybullet --python_include_dir="%myvar%/include" --python_lib_dir="%myvar%/libs" --targetdir="../bin" vs2010
|
||||
|
||||
|
||||
@@ -78,6 +78,10 @@ public:
|
||||
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const = 0;
|
||||
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const = 0;
|
||||
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const = 0;
|
||||
|
||||
virtual void pushProfileTiming(const char* timingName)=0;
|
||||
virtual void popProfileTiming()=0;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -4438,6 +4438,21 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsCl
|
||||
return (b3SharedMemoryCommandHandle)command;
|
||||
}
|
||||
|
||||
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName)
|
||||
{
|
||||
PhysicsClient* cl = (PhysicsClient*)physClient;
|
||||
b3Assert(cl);
|
||||
cl->pushProfileTiming(timingName);
|
||||
}
|
||||
|
||||
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient)
|
||||
{
|
||||
PhysicsClient* cl = (PhysicsClient*)physClient;
|
||||
b3Assert(cl);
|
||||
cl->popProfileTiming();
|
||||
}
|
||||
|
||||
|
||||
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration)
|
||||
{
|
||||
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle;
|
||||
|
||||
@@ -598,6 +598,9 @@ B3_SHARED_API int b3StateLoggingStop(b3SharedMemoryCommandHandle commandHandle,
|
||||
B3_SHARED_API b3SharedMemoryCommandHandle b3ProfileTimingCommandInit(b3PhysicsClientHandle physClient, const char* name);
|
||||
B3_SHARED_API void b3SetProfileTimingDuractionInMicroSeconds(b3SharedMemoryCommandHandle commandHandle, int duration);
|
||||
|
||||
B3_SHARED_API void b3PushProfileTiming(b3PhysicsClientHandle physClient, const char* timingName);
|
||||
B3_SHARED_API void b3PopProfileTiming(b3PhysicsClientHandle physClient);
|
||||
|
||||
B3_SHARED_API void b3SetTimeOut(b3PhysicsClientHandle physClient, double timeOutInSeconds);
|
||||
B3_SHARED_API double b3GetTimeOut(b3PhysicsClientHandle physClient);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "SharedMemoryBlock.h"
|
||||
#include "BodyJointInfoUtility.h"
|
||||
#include "SharedMemoryUserData.h"
|
||||
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
|
||||
struct UserDataCache
|
||||
@@ -41,11 +41,16 @@ struct BodyJointInfoCache
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct PhysicsClientSharedMemoryInternalData {
|
||||
SharedMemoryInterface* m_sharedMemory;
|
||||
bool m_ownsSharedMemory;
|
||||
SharedMemoryBlock* m_testBlock1;
|
||||
|
||||
btAlignedObjectArray<CProfileSample* > m_profileTimings;
|
||||
btAlignedObjectArray<std::string* > m_profileTimingStrings;
|
||||
|
||||
btHashMap<btHashInt,BodyJointInfoCache*> m_bodyJointMap;
|
||||
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
|
||||
|
||||
@@ -950,6 +955,7 @@ const SharedMemoryStatus* PhysicsClientSharedMemory::processServerStatus() {
|
||||
|
||||
case CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED:
|
||||
{
|
||||
B3_PROFILE("m_raycastHits");
|
||||
if (m_data->m_verboseOutput)
|
||||
{
|
||||
b3Printf("Raycast completed");
|
||||
@@ -1882,3 +1888,27 @@ void PhysicsClientSharedMemory::getUserDataInfo(int bodyUniqueId, int linkIndex,
|
||||
SharedMemoryUserData *userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
|
||||
*keyOut = (userDataPtr)->m_key.c_str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void PhysicsClientSharedMemory::pushProfileTiming(const char* timingName)
|
||||
{
|
||||
std::string* str = new std::string(timingName);
|
||||
m_data->m_profileTimingStrings.push_back(str);
|
||||
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
|
||||
|
||||
}
|
||||
|
||||
|
||||
void PhysicsClientSharedMemory::popProfileTiming()
|
||||
{
|
||||
if (m_data->m_profileTimings.size())
|
||||
{
|
||||
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
|
||||
m_data->m_profileTimings.pop_back();
|
||||
delete sample;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,6 +89,8 @@ public:
|
||||
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
|
||||
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
|
||||
|
||||
virtual void pushProfileTiming(const char* timingName);
|
||||
virtual void popProfileTiming();
|
||||
};
|
||||
|
||||
#endif // BT_PHYSICS_CLIENT_API_H
|
||||
|
||||
@@ -14,12 +14,15 @@
|
||||
#include <string>
|
||||
|
||||
#include "SharedMemoryUserData.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
struct UserDataCache {
|
||||
btHashMap<btHashInt, SharedMemoryUserData> m_userDataMap;
|
||||
btHashMap<btHashString, int> m_keyToUserDataIdMap;
|
||||
|
||||
~UserDataCache() {
|
||||
~UserDataCache()
|
||||
{
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
@@ -58,6 +61,9 @@ struct PhysicsDirectInternalData
|
||||
btHashMap<btHashInt,BodyJointInfoCache2*> m_bodyJointMap;
|
||||
btHashMap<btHashInt,b3UserConstraint> m_userConstraintInfoMap;
|
||||
|
||||
btAlignedObjectArray<CProfileSample* > m_profileTimings;
|
||||
btAlignedObjectArray<std::string* > m_profileTimingStrings;
|
||||
|
||||
char m_bulletStreamDataServerToClient[SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE];
|
||||
btAlignedObjectArray<double> m_cachedMassMatrix;
|
||||
int m_cachedCameraPixelsWidth;
|
||||
@@ -111,6 +117,12 @@ PhysicsDirect::PhysicsDirect(PhysicsCommandProcessorInterface* physSdk, bool pas
|
||||
|
||||
PhysicsDirect::~PhysicsDirect()
|
||||
{
|
||||
for (int i=0;i<m_data->m_profileTimingStrings.size();i++)
|
||||
{
|
||||
delete m_data->m_profileTimingStrings[i];
|
||||
}
|
||||
m_data->m_profileTimingStrings.clear();
|
||||
|
||||
if (m_data->m_commandProcessor->isConnected())
|
||||
{
|
||||
m_data->m_commandProcessor->disconnect();
|
||||
@@ -1530,3 +1542,23 @@ void PhysicsDirect::getUserDataInfo(int bodyUniqueId, int linkIndex, int userDat
|
||||
SharedMemoryUserData* userDataPtr = (userDataCachePtr)->m_userDataMap.getAtIndex(userDataIndex);
|
||||
*keyOut = (userDataPtr)->m_key.c_str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void PhysicsDirect::pushProfileTiming(const char* timingName)
|
||||
{
|
||||
std::string* str = new std::string(timingName);
|
||||
m_data->m_profileTimingStrings.push_back(str);
|
||||
m_data->m_profileTimings.push_back(new CProfileSample(str->c_str()));
|
||||
}
|
||||
|
||||
|
||||
void PhysicsDirect::popProfileTiming()
|
||||
{
|
||||
if (m_data->m_profileTimings.size())
|
||||
{
|
||||
CProfileSample* sample = m_data->m_profileTimings[m_data->m_profileTimings.size()-1];
|
||||
m_data->m_profileTimings.pop_back();
|
||||
delete sample;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,6 +118,9 @@ public:
|
||||
virtual int getCachedUserDataId(int bodyUniqueId, int linkIndex, const char *key) const;
|
||||
virtual int getNumUserData(int bodyUniqueId, int linkIndex) const;
|
||||
virtual void getUserDataInfo(int bodyUniqueId, int linkIndex, int userDataIndex, const char **keyOut, int *userDataIdOut) const;
|
||||
|
||||
virtual void pushProfileTiming(const char* timingName);
|
||||
virtual void popProfileTiming();
|
||||
};
|
||||
|
||||
#endif //PHYSICS_DIRECT_H
|
||||
|
||||
@@ -1780,6 +1780,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
|
||||
|
||||
createEmptyDynamicsWorld();
|
||||
|
||||
#ifdef BT_THREADSAFE
|
||||
if (btGetTaskScheduler() == 0) {
|
||||
btITaskScheduler *scheduler = btCreateDefaultTaskScheduler();
|
||||
if (scheduler == 0) {
|
||||
@@ -1787,6 +1788,7 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor()
|
||||
}
|
||||
btSetTaskScheduler(scheduler);
|
||||
}
|
||||
#endif //BT_THREADSAFE
|
||||
}
|
||||
|
||||
PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor()
|
||||
@@ -4821,10 +4823,11 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
|
||||
const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays;
|
||||
const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads;
|
||||
|
||||
b3RayData *rayInputBuffer = (b3RayData *)malloc(sizeof(b3RayData) * numRays);
|
||||
memcpy(rayInputBuffer, bufferServerToClient, sizeof(b3RayData) * numRays);
|
||||
btAlignedObjectArray<b3RayData> rays;
|
||||
rays.resize(numRays);
|
||||
memcpy(&rays[0],bufferServerToClient,numRays*sizeof(b3RayData));
|
||||
|
||||
BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, rayInputBuffer, (b3RayHitInfo *)bufferServerToClient, numRays);
|
||||
BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, &rays[0], (b3RayHitInfo *)bufferServerToClient, numRays);
|
||||
if (numThreads == 0) {
|
||||
// When 0 is specified, Bullet can decide how many threads to use.
|
||||
// About 16 rays per thread seems to work reasonably well.
|
||||
@@ -4840,7 +4843,6 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co
|
||||
batchRayCaster.castRays(numThreads);
|
||||
}
|
||||
|
||||
free(rayInputBuffer);
|
||||
serverStatusOut.m_raycastHits.m_numRaycastHits = numRays;
|
||||
serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED;
|
||||
return hasStatus;
|
||||
|
||||
@@ -548,6 +548,7 @@ enum b3StateLoggingType
|
||||
STATE_LOGGING_PROFILE_TIMINGS = 6,
|
||||
STATE_LOGGING_ALL_COMMANDS=7,
|
||||
STATE_REPLAY_ALL_COMMANDS=8,
|
||||
STATE_LOGGING_CUSTOM_TIMER=9,
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -4704,6 +4704,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
|
||||
}
|
||||
|
||||
|
||||
|
||||
commandHandle = b3CreateRaycastBatchCommandInit(sm);
|
||||
b3RaycastBatchSetNumThreads(commandHandle, numThreads);
|
||||
|
||||
@@ -4734,6 +4735,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
|
||||
Py_DECREF(seqRayToObj);
|
||||
return NULL;
|
||||
}
|
||||
b3PushProfileTiming(sm, "extractPythonFromToSequenceToC");
|
||||
for (i = 0; i < lenFrom; i++)
|
||||
{
|
||||
PyObject* rayFromObj = PySequence_GetItem(rayFromObjList,i);
|
||||
@@ -4752,11 +4754,13 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
|
||||
Py_DECREF(seqRayToObj);
|
||||
Py_DECREF(rayFromObj);
|
||||
Py_DECREF(rayToObj);
|
||||
b3PopProfileTiming(sm);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(rayFromObj);
|
||||
Py_DECREF(rayToObj);
|
||||
}
|
||||
b3PopProfileTiming(sm);
|
||||
}
|
||||
} else
|
||||
{
|
||||
@@ -4779,6 +4783,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
|
||||
struct b3RaycastInformation raycastInfo;
|
||||
PyObject* rayHitsObj = 0;
|
||||
int i;
|
||||
b3PushProfileTiming(sm, "convertRaycastInformationToPython");
|
||||
b3GetRaycastInformation(sm, &raycastInfo);
|
||||
|
||||
rayHitsObj = PyTuple_New(raycastInfo.m_numRayHits);
|
||||
@@ -4819,6 +4824,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject*
|
||||
}
|
||||
PyTuple_SetItem(rayHitsObj, i, singleHitObj);
|
||||
}
|
||||
b3PopProfileTiming(sm);
|
||||
return rayHitsObj;
|
||||
}
|
||||
|
||||
@@ -9587,6 +9593,7 @@ initpybullet(void)
|
||||
PyModule_AddIntConstant(m, "STATE_LOGGING_PROFILE_TIMINGS", STATE_LOGGING_PROFILE_TIMINGS);
|
||||
PyModule_AddIntConstant(m, "STATE_LOGGING_ALL_COMMANDS", STATE_LOGGING_ALL_COMMANDS);
|
||||
PyModule_AddIntConstant(m, "STATE_REPLAY_ALL_COMMANDS", STATE_REPLAY_ALL_COMMANDS);
|
||||
PyModule_AddIntConstant(m, "STATE_LOGGING_CUSTOM_TIMER", STATE_LOGGING_CUSTOM_TIMER);
|
||||
|
||||
PyModule_AddIntConstant(m, "COV_ENABLE_GUI", COV_ENABLE_GUI);
|
||||
PyModule_AddIntConstant(m, "COV_ENABLE_SHADOWS", COV_ENABLE_SHADOWS);
|
||||
|
||||
@@ -781,7 +781,8 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
|
||||
else
|
||||
{
|
||||
// Incorrectly set collision object flags can degrade performance in various ways.
|
||||
btAssert( body.isStaticOrKinematicObject() );
|
||||
//btAssert( body.isStaticOrKinematicObject() );
|
||||
//it could be a multibody link collider
|
||||
// all fixed bodies (inf mass) get mapped to a single solver id
|
||||
if ( m_fixedBodyId < 0 )
|
||||
{
|
||||
|
||||
@@ -412,11 +412,13 @@ static void WorkerThreadFunc( void* userPtr )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("sleep");
|
||||
// go sleep
|
||||
localStorage->m_mutex.lock();
|
||||
localStorage->m_status = WorkerThreadStatus::kSleeping;
|
||||
localStorage->m_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -503,7 +505,7 @@ public:
|
||||
storage.m_threadId = i;
|
||||
storage.m_directive = m_workerDirective;
|
||||
storage.m_status = WorkerThreadStatus::kSleeping;
|
||||
storage.m_cooldownTime = 1000; // 1000 microseconds, threads go to sleep after this long if they have nothing to do
|
||||
storage.m_cooldownTime = 100; // 100 microseconds, threads go to sleep after this long if they have nothing to do
|
||||
storage.m_clock = &m_clock;
|
||||
storage.m_queue = m_perThreadJobQueues[i];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user