From 08409cae9f8bf52f73ee26b196e6fa2f25c98883 Mon Sep 17 00:00:00 2001 From: Tigran Gasparian Date: Fri, 15 Jun 2018 16:47:04 +0200 Subject: [PATCH] Adds multithreading support for batch ray casts. To enable the feature, enable the BULLET2_MULTITHREADING option. Increases the number of rays that can go in a batch request by storing them in the shared memory stream instead of the shared memory command. Adds the API b3RaycastBatchSetNumThreads to specify the number of threads to use for the raycast batch, also adds the argument numThreads to the pybullet function rayTestBatch. Rays are distributed among the threads in a greedy fashion there's a shared queue of work, once a thread finishes its task, it picks the next available ray from the task. This works better than pre-distributing the rays among threads, since there's a large variance in computation time per ray. Some controversial changes: - Added a pointer to PhysicsClient to the SharedMemoryCommand struct, this was necessary to keep the C-API the same for b3RaycastBatchAddRay, while adding the ray to the shared memory stream instead of the command struct. I think this may be useful to simplify other APIs as well, that take both a client handle and a command handle. - Moved #define SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE to SharedMemoryPublic. This was necessary for the definition of MAX_RAY_INTERSECTION_BATCH_SIZE. --- examples/SharedMemory/PhysicsClient.h | 1 + examples/SharedMemory/PhysicsClientC_API.cpp | 38 ++-- examples/SharedMemory/PhysicsClientC_API.h | 2 + .../PhysicsClientSharedMemory.cpp | 8 +- .../SharedMemory/PhysicsClientSharedMemory.h | 1 + examples/SharedMemory/PhysicsDirect.cpp | 8 +- examples/SharedMemory/PhysicsDirect.h | 1 + examples/SharedMemory/PhysicsLoopBack.cpp | 4 + examples/SharedMemory/PhysicsLoopBack.h | 1 + .../PhysicsServerCommandProcessor.cpp | 173 +++++++++++++----- examples/SharedMemory/SharedMemoryCommands.h | 18 +- examples/SharedMemory/SharedMemoryPublic.h | 23 ++- examples/pybullet/pybullet.c | 10 +- 13 files changed, 213 insertions(+), 75 deletions(-) diff --git a/examples/SharedMemory/PhysicsClient.h b/examples/SharedMemory/PhysicsClient.h index 7c97996df..f73715f6d 100644 --- a/examples/SharedMemory/PhysicsClient.h +++ b/examples/SharedMemory/PhysicsClient.h @@ -43,6 +43,7 @@ public: virtual void setSharedMemoryKey(int key) = 0; virtual void uploadBulletFileToSharedMemory(const char* data, int len) = 0; + virtual char* getSharedMemoryStreamBuffer() = 0; virtual int getNumDebugLines() const = 0; diff --git a/examples/SharedMemory/PhysicsClientC_API.cpp b/examples/SharedMemory/PhysicsClientC_API.cpp index 63a66f110..f92817396 100644 --- a/examples/SharedMemory/PhysicsClientC_API.cpp +++ b/examples/SharedMemory/PhysicsClientC_API.cpp @@ -2729,12 +2729,14 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3CreateRaycastCommandInit(b3PhysicsCl b3Assert(command); command->m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS; command->m_requestRaycastIntersections.m_numRays = 1; - command->m_requestRaycastIntersections.m_rayFromPositions[0][0] = rayFromWorldX; - command->m_requestRaycastIntersections.m_rayFromPositions[0][1] = rayFromWorldY; - command->m_requestRaycastIntersections.m_rayFromPositions[0][2] = rayFromWorldZ; - command->m_requestRaycastIntersections.m_rayToPositions[0][0] = rayToWorldX; - command->m_requestRaycastIntersections.m_rayToPositions[0][1] = rayToWorldY; - command->m_requestRaycastIntersections.m_rayToPositions[0][2] = rayToWorldZ; + command->m_requestRaycastIntersections.m_numThreads = 1; + b3RayData* rayDataStream = (b3RayData *)cl->getSharedMemoryStreamBuffer(); + rayDataStream[0].m_rayFromPosition[0] = rayFromWorldX; + rayDataStream[0].m_rayFromPosition[1] = rayFromWorldY; + rayDataStream[0].m_rayFromPosition[2] = rayFromWorldZ; + rayDataStream[0].m_rayToPosition[0] = rayToWorldX; + rayDataStream[0].m_rayToPosition[1] = rayToWorldY; + rayDataStream[0].m_rayToPosition[2] = rayToWorldZ; return (b3SharedMemoryCommandHandle)command; } @@ -2749,25 +2751,37 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3CreateRaycastBatchCommandInit(b3Phys command->m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS; command->m_updateFlags = 0; command->m_requestRaycastIntersections.m_numRays = 0; + command->m_requestRaycastIntersections.m_numThreads = 1; return (b3SharedMemoryCommandHandle)command; } +B3_SHARED_API void b3RaycastBatchSetNumThreads(b3SharedMemoryCommandHandle commandHandle, int numThreads) { + struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle; + b3Assert(command); + b3Assert(command->m_type == CMD_REQUEST_RAY_CAST_INTERSECTIONS); + command->m_requestRaycastIntersections.m_numThreads = numThreads; +} + B3_SHARED_API void b3RaycastBatchAddRay(b3SharedMemoryCommandHandle commandHandle, const double rayFromWorld[3], const double rayToWorld[3]) { struct SharedMemoryCommand* command = (struct SharedMemoryCommand*) commandHandle; b3Assert(command); b3Assert(command->m_type == CMD_REQUEST_RAY_CAST_INTERSECTIONS); + b3Assert(command->m_client) + PhysicsClient *cl = command->m_client; + b3Assert(cl); if (command->m_type == CMD_REQUEST_RAY_CAST_INTERSECTIONS) { int numRays = command->m_requestRaycastIntersections.m_numRays; if (numRaysm_requestRaycastIntersections.m_rayFromPositions[numRays][0] = rayFromWorld[0]; - command->m_requestRaycastIntersections.m_rayFromPositions[numRays][1] = rayFromWorld[1]; - command->m_requestRaycastIntersections.m_rayFromPositions[numRays][2] = rayFromWorld[2]; - command->m_requestRaycastIntersections.m_rayToPositions[numRays][0] = rayToWorld[0]; - command->m_requestRaycastIntersections.m_rayToPositions[numRays][1] = rayToWorld[1]; - command->m_requestRaycastIntersections.m_rayToPositions[numRays][2] = rayToWorld[2]; + b3RayData* rayDataStream = (b3RayData *)cl->getSharedMemoryStreamBuffer(); + rayDataStream[numRays].m_rayFromPosition[0] = rayFromWorld[0]; + rayDataStream[numRays].m_rayFromPosition[1] = rayFromWorld[1]; + rayDataStream[numRays].m_rayFromPosition[2] = rayFromWorld[2]; + rayDataStream[numRays].m_rayToPosition[0] = rayToWorld[0]; + rayDataStream[numRays].m_rayToPosition[1] = rayToWorld[1]; + rayDataStream[numRays].m_rayToPosition[2] = rayToWorld[2]; command->m_requestRaycastIntersections.m_numRays++; } } diff --git a/examples/SharedMemory/PhysicsClientC_API.h b/examples/SharedMemory/PhysicsClientC_API.h index b48258ce6..380269833 100644 --- a/examples/SharedMemory/PhysicsClientC_API.h +++ b/examples/SharedMemory/PhysicsClientC_API.h @@ -541,6 +541,8 @@ B3_SHARED_API b3SharedMemoryCommandHandle b3CreateRaycastCommandInit(b3PhysicsCl double rayToWorldX, double rayToWorldY, double rayToWorldZ); B3_SHARED_API b3SharedMemoryCommandHandle b3CreateRaycastBatchCommandInit(b3PhysicsClientHandle physClient); +// Sets the number of threads to use to compute the ray intersections for the batch. Specify 0 to let Bullet decide, 1 (default) for single core execution, 2 or more to select the number of threads to use. +B3_SHARED_API void b3RaycastBatchSetNumThreads(b3SharedMemoryCommandHandle commandHandle, int numThreads); B3_SHARED_API void b3RaycastBatchAddRay(b3SharedMemoryCommandHandle commandHandle, const double rayFromWorld[/*3*/], const double rayToWorld[/*3*/]); B3_SHARED_API void b3GetRaycastInformation(b3PhysicsClientHandle physClient, struct b3RaycastInformation* raycastInfo); diff --git a/examples/SharedMemory/PhysicsClientSharedMemory.cpp b/examples/SharedMemory/PhysicsClientSharedMemory.cpp index 9bf922d7a..6913d1b15 100644 --- a/examples/SharedMemory/PhysicsClientSharedMemory.cpp +++ b/examples/SharedMemory/PhysicsClientSharedMemory.cpp @@ -955,9 +955,10 @@ const SharedMemoryStatus* PhysicsClientSharedMemory::processServerStatus() { b3Printf("Raycast completed"); } m_data->m_raycastHits.clear(); + b3RayHitInfo *rayHits = (b3RayHitInfo *)m_data->m_testBlock1->m_bulletStreamDataServerToClientRefactor; for (int i=0;im_raycastHits.push_back(serverCmd.m_raycastHits.m_rayHits[i]); + m_data->m_raycastHits.push_back(rayHits[i]); } break; } @@ -1664,6 +1665,7 @@ bool PhysicsClientSharedMemory::canSubmitCommand() const { struct SharedMemoryCommand* PhysicsClientSharedMemory::getAvailableSharedMemoryCommand() { static int sequence = 0; m_data->m_testBlock1->m_clientCommands[0].m_sequenceNumber = sequence++; + m_data->m_testBlock1->m_clientCommands[0].m_client = this; return &m_data->m_testBlock1->m_clientCommands[0]; } @@ -1683,6 +1685,10 @@ bool PhysicsClientSharedMemory::submitClientCommand(const SharedMemoryCommand& c return false; } +char* PhysicsClientSharedMemory::getSharedMemoryStreamBuffer() { + return m_data->m_testBlock1->m_bulletStreamDataServerToClientRefactor; +} + void PhysicsClientSharedMemory::uploadBulletFileToSharedMemory(const char* data, int len) { btAssert(len < SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE); if (len >= SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE) { diff --git a/examples/SharedMemory/PhysicsClientSharedMemory.h b/examples/SharedMemory/PhysicsClientSharedMemory.h index 441fceb8d..dadb8acf6 100644 --- a/examples/SharedMemory/PhysicsClientSharedMemory.h +++ b/examples/SharedMemory/PhysicsClientSharedMemory.h @@ -54,6 +54,7 @@ public: virtual void setSharedMemoryKey(int key); virtual void uploadBulletFileToSharedMemory(const char* data, int len); + virtual char* getSharedMemoryStreamBuffer(); virtual int getNumDebugLines() const; diff --git a/examples/SharedMemory/PhysicsDirect.cpp b/examples/SharedMemory/PhysicsDirect.cpp index 77c53dec7..5c8070687 100644 --- a/examples/SharedMemory/PhysicsDirect.cpp +++ b/examples/SharedMemory/PhysicsDirect.cpp @@ -106,6 +106,7 @@ PhysicsDirect::PhysicsDirect(PhysicsCommandProcessorInterface* physSdk, bool pas m_data = new PhysicsDirectInternalData; m_data->m_commandProcessor = physSdk; m_data->m_ownsCommandProcessor = passSdkOwnership; + m_data->m_command.m_client = this; } PhysicsDirect::~PhysicsDirect() @@ -712,9 +713,10 @@ void PhysicsDirect::postProcessStatus(const struct SharedMemoryStatus& serverCmd b3Printf("Raycast completed"); } m_data->m_raycastHits.clear(); + b3RayHitInfo *rayHits = (b3RayHitInfo *)m_data->m_bulletStreamDataServerToClient; for (int i=0;im_raycastHits.push_back(serverCmd.m_raycastHits.m_rayHits[i]); + m_data->m_raycastHits.push_back(rayHits[i]); } break; } @@ -1323,6 +1325,10 @@ void PhysicsDirect::setSharedMemoryKey(int key) //m_data->m_physicsClient->setSharedMemoryKey(key); } +char* PhysicsDirect::getSharedMemoryStreamBuffer() { + return m_data->m_bulletStreamDataServerToClient; +} + void PhysicsDirect::uploadBulletFileToSharedMemory(const char* data, int len) { if (len>SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE) diff --git a/examples/SharedMemory/PhysicsDirect.h b/examples/SharedMemory/PhysicsDirect.h index b02c10da8..299312199 100644 --- a/examples/SharedMemory/PhysicsDirect.h +++ b/examples/SharedMemory/PhysicsDirect.h @@ -78,6 +78,7 @@ public: virtual void setSharedMemoryKey(int key); void uploadBulletFileToSharedMemory(const char* data, int len); + virtual char* getSharedMemoryStreamBuffer(); virtual int getNumDebugLines() const; diff --git a/examples/SharedMemory/PhysicsLoopBack.cpp b/examples/SharedMemory/PhysicsLoopBack.cpp index b7a2dd13c..cff52a534 100644 --- a/examples/SharedMemory/PhysicsLoopBack.cpp +++ b/examples/SharedMemory/PhysicsLoopBack.cpp @@ -140,6 +140,10 @@ void PhysicsLoopBack::setSharedMemoryKey(int key) m_data->m_physicsClient->setSharedMemoryKey(key); } +char* PhysicsLoopBack::getSharedMemoryStreamBuffer() { + return m_data->m_physicsClient->getSharedMemoryStreamBuffer(); +} + void PhysicsLoopBack::uploadBulletFileToSharedMemory(const char* data, int len) { m_data->m_physicsClient->uploadBulletFileToSharedMemory(data,len); diff --git a/examples/SharedMemory/PhysicsLoopBack.h b/examples/SharedMemory/PhysicsLoopBack.h index afa42817b..512ddf7ba 100644 --- a/examples/SharedMemory/PhysicsLoopBack.h +++ b/examples/SharedMemory/PhysicsLoopBack.h @@ -58,6 +58,7 @@ public: virtual void setSharedMemoryKey(int key); void uploadBulletFileToSharedMemory(const char* data, int len); + virtual char* getSharedMemoryStreamBuffer(); virtual int getNumDebugLines() const; diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index d10df0cdc..f84f3fd54 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -1565,7 +1565,7 @@ struct PhysicsServerCommandProcessorInternalData b3ResizablePool< InternalBodyHandle > m_bodyHandles; b3ResizablePool m_userCollisionShapeHandles; b3ResizablePool m_userVisualShapeHandles; - + b3PluginManager m_pluginManager; @@ -1780,6 +1780,13 @@ PhysicsServerCommandProcessor::PhysicsServerCommandProcessor() createEmptyDynamicsWorld(); + if (btGetTaskScheduler() == 0) { + btITaskScheduler *scheduler = btCreateDefaultTaskScheduler(); + if (scheduler == 0) { + scheduler = btGetSequentialTaskScheduler(); + } + btSetTaskScheduler(scheduler); + } } PhysicsServerCommandProcessor::~PhysicsServerCommandProcessor() @@ -4681,31 +4688,87 @@ bool PhysicsServerCommandProcessor::processRequestKeyboardEventsCommand(const st return hasStatus; } -bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) -{ - bool hasStatus = true; - BT_PROFILE("CMD_REQUEST_RAY_CAST_INTERSECTIONS"); - serverStatusOut.m_raycastHits.m_numRaycastHits = 0; - for (int ray=0;ray= 201103L +#include + +struct CastSyncInfo { + std::atomic m_nextTaskNumber; + + CastSyncInfo() : m_nextTaskNumber(0) {} + + inline int getNextTask() { + return m_nextTaskNumber++; + } +}; +#else // __cplusplus >= 201103L +struct CastSyncInfo { + volatile int m_nextTaskNumber; + btSpinMutex m_taskLock; + + CastSyncInfo() : m_nextTaskNumber(0) {} + + inline int getNextTask() { + m_taskLock.lock(); + const int taskNr = m_nextTaskNumber++; + m_taskLock.unlock(); + return taskNr; + } +}; +#endif // __cplusplus >= 201103L + +struct BatchRayCaster : public btIParallelForBody +{ + CastSyncInfo *m_syncInfo; + const btCollisionWorld *m_world; + const b3RayData *m_rayInputBuffer; + b3RayHitInfo *m_hitInfoOutputBuffer; + int m_numRays; + + BatchRayCaster(const btCollisionWorld* world, const b3RayData *rayInputBuffer, b3RayHitInfo *hitInfoOutputBuffer, int numRays) + : m_world(world), m_rayInputBuffer(rayInputBuffer), m_hitInfoOutputBuffer(hitInfoOutputBuffer), m_numRays(numRays) { + m_syncInfo = new CastSyncInfo; + } + + ~BatchRayCaster() { + delete m_syncInfo; + } + + void castRays(int numWorkers) { +#if BT_THREADSAFE + btParallelFor(0, numWorkers, 1, *this); +#else // BT_THREADSAFE + for (int i = 0; i < m_numRays; i++) { + processRay(i); + } +#endif // BT_THREADSAFE + } + + void forLoop( int iBegin, int iEnd ) const { - btVector3 rayFromWorld(clientCmd.m_requestRaycastIntersections.m_rayFromPositions[ray][0], - clientCmd.m_requestRaycastIntersections.m_rayFromPositions[ray][1], - clientCmd.m_requestRaycastIntersections.m_rayFromPositions[ray][2]); - btVector3 rayToWorld(clientCmd.m_requestRaycastIntersections.m_rayToPositions[ray][0], - clientCmd.m_requestRaycastIntersections.m_rayToPositions[ray][1], - clientCmd.m_requestRaycastIntersections.m_rayToPositions[ray][2]); + while(true) { + const int taskNr = m_syncInfo->getNextTask(); + if (taskNr >= m_numRays) + return; + processRay(taskNr); + } + } + + void processRay(int ray) const { + const double *from = m_rayInputBuffer[ray].m_rayFromPosition; + const double *to = m_rayInputBuffer[ray].m_rayToPosition; + btVector3 rayFromWorld(from[0], from[1], from[2]); + btVector3 rayToWorld(to[0], to[1], to[2]); btCollisionWorld::ClosestRayResultCallback rayResultCallback(rayFromWorld,rayToWorld); rayResultCallback.m_flags |= btTriangleRaycastCallback::kF_UseGjkConvexCastRaytest; - m_data->m_dynamicsWorld->rayTest(rayFromWorld,rayToWorld,rayResultCallback); - int rayHits = serverStatusOut.m_raycastHits.m_numRaycastHits; + m_world->rayTest(rayFromWorld,rayToWorld,rayResultCallback); + b3RayHitInfo& hit = m_hitInfoOutputBuffer[ray]; if (rayResultCallback.hasHit()) { - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitFraction - = rayResultCallback.m_closestHitFraction; + hit.m_hitFraction = rayResultCallback.m_closestHitFraction; int objectUniqueId = -1; int linkIndex = -1; @@ -4724,39 +4787,61 @@ bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(co } } - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitObjectUniqueId - = objectUniqueId; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitObjectLinkIndex - = linkIndex; + hit.m_hitObjectUniqueId = objectUniqueId; + hit.m_hitObjectLinkIndex = linkIndex; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[0] - = rayResultCallback.m_hitPointWorld[0]; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[1] - = rayResultCallback.m_hitPointWorld[1]; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[2] - = rayResultCallback.m_hitPointWorld[2]; - - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[0] - = rayResultCallback.m_hitNormalWorld[0]; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[1] - = rayResultCallback.m_hitNormalWorld[1]; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[2] - = rayResultCallback.m_hitNormalWorld[2]; + hit.m_hitPositionWorld[0] = rayResultCallback.m_hitPointWorld[0]; + hit.m_hitPositionWorld[1] = rayResultCallback.m_hitPointWorld[1]; + hit.m_hitPositionWorld[2] = rayResultCallback.m_hitPointWorld[2]; + hit.m_hitNormalWorld[0] = rayResultCallback.m_hitNormalWorld[0]; + hit.m_hitNormalWorld[1] = rayResultCallback.m_hitNormalWorld[1]; + hit.m_hitNormalWorld[2] = rayResultCallback.m_hitNormalWorld[2]; } else { - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitFraction = 1; - serverStatusOut.m_raycastHits.m_rayHits[serverStatusOut.m_raycastHits.m_numRaycastHits].m_hitObjectUniqueId = -1; - serverStatusOut.m_raycastHits.m_rayHits[serverStatusOut.m_raycastHits.m_numRaycastHits].m_hitObjectLinkIndex = -1; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[0] = 0; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[1] = 0; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitPositionWorld[2] = 0; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[0] = 0; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[1] = 0; - serverStatusOut.m_raycastHits.m_rayHits[rayHits].m_hitNormalWorld[2] = 0; + hit.m_hitFraction = 1; + hit.m_hitObjectUniqueId = -1; + hit.m_hitObjectLinkIndex = -1; + hit.m_hitPositionWorld[0] = 0; + hit.m_hitPositionWorld[1] = 0; + hit.m_hitPositionWorld[2] = 0; + hit.m_hitNormalWorld[0] = 0; + hit.m_hitNormalWorld[1] = 0; + hit.m_hitNormalWorld[2] = 0; } - serverStatusOut.m_raycastHits.m_numRaycastHits++; } +}; + +bool PhysicsServerCommandProcessor::processRequestRaycastIntersectionsCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) +{ + bool hasStatus = true; + BT_PROFILE("CMD_REQUEST_RAY_CAST_INTERSECTIONS"); + serverStatusOut.m_raycastHits.m_numRaycastHits = 0; + + const int numRays = clientCmd.m_requestRaycastIntersections.m_numRays; + const int numThreads = clientCmd.m_requestRaycastIntersections.m_numThreads; + + b3RayData *rayInputBuffer = (b3RayData *)malloc(sizeof(b3RayData) * numRays); + memcpy(rayInputBuffer, bufferServerToClient, sizeof(b3RayData) * numRays); + + BatchRayCaster batchRayCaster(m_data->m_dynamicsWorld, rayInputBuffer, (b3RayHitInfo *)bufferServerToClient, numRays); + if (numThreads == 0) { + // When 0 is specified, Bullet can decide how many threads to use. + // About 16 rays per thread seems to work reasonably well. + batchRayCaster.castRays(numRays / 32); + } else if (numThreads == 1) { + // Sequentially trace all rays: + for (int i = 0; i < numRays; i++) { + batchRayCaster.processRay(i); + } + } else { + // Otherwise, just use the user-specified number of threads. This is + // still limited by the number of virtual cores on the machine. + batchRayCaster.castRays(numThreads); + } + + free(rayInputBuffer); + serverStatusOut.m_raycastHits.m_numRaycastHits = numRays; serverStatusOut.m_type = CMD_REQUEST_RAY_CAST_INTERSECTIONS_COMPLETED; return hasStatus; } diff --git a/examples/SharedMemory/SharedMemoryCommands.h b/examples/SharedMemory/SharedMemoryCommands.h index 248f726d4..56d79a396 100644 --- a/examples/SharedMemory/SharedMemoryCommands.h +++ b/examples/SharedMemory/SharedMemoryCommands.h @@ -23,12 +23,6 @@ typedef unsigned long long int smUint64_t; #endif -#ifdef __APPLE__ - #define SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE (512*1024) -#else - #define SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE (8*1024*1024) -#endif - #define SHARED_MEMORY_SERVER_TEST_C #define MAX_DEGREE_OF_FREEDOM 128 #define MAX_NUM_SENSORS 256 @@ -38,6 +32,7 @@ #define MAX_NUM_LINKS MAX_DEGREE_OF_FREEDOM #define MAX_USER_DATA_KEY_LENGTH MAX_URDF_FILENAME_LENGTH + struct TmpFloat3 { float m_x; @@ -279,15 +274,19 @@ enum EnumRequestContactDataUpdateFlags struct RequestRaycastIntersections { + // The number of threads that Bullet may use to perform the ray casts. + // 0: Let Bullet decide + // 1: Use a single thread (i.e. no multi-threading) + // 2 or more: Number of threads to use. + int m_numThreads; int m_numRays; - double m_rayFromPositions[MAX_RAY_INTERSECTION_BATCH_SIZE][3]; - double m_rayToPositions[MAX_RAY_INTERSECTION_BATCH_SIZE][3]; + // Actual ray data stored in m_bulletStreamDataServerToClientRefactor. }; struct SendRaycastHits { int m_numRaycastHits; - b3RayHitInfo m_rayHits[MAX_RAY_INTERSECTION_BATCH_SIZE]; + // Actual ray data stored in m_bulletStreamDataServerToClientRefactor. }; struct RequestContactDataArgs @@ -1010,6 +1009,7 @@ struct SharedMemoryCommand int m_type; smUint64_t m_timeStamp; int m_sequenceNumber; + struct PhysicsClient *m_client; //m_updateFlags is a bit fields to tell which parameters need updating //for example m_updateFlags = SIM_PARAM_UPDATE_DELTA_TIME | SIM_PARAM_UPDATE_NUM_SOLVER_ITERATIONS; diff --git a/examples/SharedMemory/SharedMemoryPublic.h b/examples/SharedMemory/SharedMemoryPublic.h index f5682b1a2..cce5fcb35 100644 --- a/examples/SharedMemory/SharedMemoryPublic.h +++ b/examples/SharedMemory/SharedMemoryPublic.h @@ -5,7 +5,7 @@ ///increase the SHARED_MEMORY_MAGIC_NUMBER whenever incompatible changes are made in the structures ///my convention is year/month/day/rev -#define SHARED_MEMORY_MAGIC_NUMBER 201806020 +#define SHARED_MEMORY_MAGIC_NUMBER 201806150 //#define SHARED_MEMORY_MAGIC_NUMBER 201801170 //#define SHARED_MEMORY_MAGIC_NUMBER 201801080 //#define SHARED_MEMORY_MAGIC_NUMBER 201801010 @@ -17,6 +17,11 @@ //#define SHARED_MEMORY_MAGIC_NUMBER 201706001 //#define SHARED_MEMORY_MAGIC_NUMBER 201703024 +#ifdef __APPLE__ + #define SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE (512*1024) +#else + #define SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE (8*1024*1024) +#endif enum EnumSharedMemoryClientCommand { @@ -408,8 +413,7 @@ enum b3VREventType #define MAX_VR_BUTTONS 64 #define MAX_VR_CONTROLLERS 8 -#define MAX_RAY_INTERSECTION_BATCH_SIZE 256 -#define MAX_RAY_HITS MAX_RAY_INTERSECTION_BATCH_SIZE + #define MAX_KEYBOARD_EVENTS 256 #define MAX_MOUSE_EVENTS 256 @@ -544,6 +548,12 @@ struct b3ContactInformation struct b3ContactPointData* m_contactPointData; }; +struct b3RayData +{ + double m_rayFromPosition[3]; + double m_rayToPosition[3]; +}; + struct b3RayHitInfo { double m_hitFraction; @@ -559,7 +569,12 @@ struct b3RaycastInformation struct b3RayHitInfo* m_rayHits; }; - +typedef union { + struct b3RayData a; + struct b3RayHitInfo b; +} RAY_DATA_UNION; +#define MAX_RAY_INTERSECTION_BATCH_SIZE SHARED_MEMORY_MAX_STREAM_CHUNK_SIZE / sizeof( RAY_DATA_UNION ) +#define MAX_RAY_HITS MAX_RAY_INTERSECTION_BATCH_SIZE #define VISUAL_SHAPE_MAX_PATH_LEN 1024 struct b3VisualShapeData diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index 09bd9cef8..e127b758c 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -4678,16 +4678,17 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* int statusType; PyObject* rayFromObjList = 0; PyObject* rayToObjList = 0; + int numThreads = 1; b3PhysicsClientHandle sm = 0; int sizeFrom = 0; int sizeTo = 0; - static char* kwlist[] = {"rayFromPositions", "rayToPositions", "physicsClientId", NULL}; + static char* kwlist[] = {"rayFromPositions", "rayToPositions", "numThreads", "physicsClientId", NULL}; int physicsClientId = 0; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "OO|i", kwlist, - &rayFromObjList, &rayToObjList, &physicsClientId)) + if (!PyArg_ParseTupleAndKeywords(args, keywds, "OO|ii", kwlist, + &rayFromObjList, &rayToObjList, &numThreads, &physicsClientId)) return NULL; sm = getPhysicsClient(physicsClientId); @@ -4699,6 +4700,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* commandHandle = b3CreateRaycastBatchCommandInit(sm); + b3RaycastBatchSetNumThreads(commandHandle, numThreads); if (rayFromObjList) @@ -4720,7 +4722,7 @@ static PyObject* pybullet_rayTestBatch(PyObject* self, PyObject* args, PyObject* { int i; - if (lenFrom >= MAX_RAY_INTERSECTION_BATCH_SIZE) + if (lenFrom > MAX_RAY_INTERSECTION_BATCH_SIZE) { PyErr_SetString(SpamError, "Number of rays exceed the maximum batch size."); Py_DECREF(seqRayFromObj);