From a48b6b9ca0dbbcaab4774258015ce8e1638c9310 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 31 Oct 2018 17:00:34 -0700 Subject: [PATCH 1/8] fix some thread sanitizer (read/write integer, should be a harmless warning) --- .../TaskScheduler/btThreadSupportPosix.cpp | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp b/src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp index 47f57943b..25bfda0bf 100644 --- a/src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp +++ b/src/LinearMath/TaskScheduler/btThreadSupportPosix.cpp @@ -1,3 +1,4 @@ + /* Bullet Continuous Collision Detection and Physics Library Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com @@ -72,7 +73,7 @@ public: pthread_t thread; //each tread will wait until this signal to start its work sem_t* startSemaphore; - + btCriticalSection* m_cs; // this is a copy of m_mainSemaphore, //each tread will signal once it is finished with its work sem_t* m_mainSemaphore; @@ -90,7 +91,7 @@ private: void startThreads(const ConstructionInfo& threadInfo); void stopThreads(); int waitForResponse(); - + btCriticalSection* m_cs; public: btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo); virtual ~btThreadSupportPosix(); @@ -119,6 +120,7 @@ public: btThreadSupportPosix::btThreadSupportPosix(const ConstructionInfo& threadConstructionInfo) { + m_cs = createCriticalSection(); startThreads(threadConstructionInfo); } @@ -126,6 +128,8 @@ btThreadSupportPosix::btThreadSupportPosix(const ConstructionInfo& threadConstru btThreadSupportPosix::~btThreadSupportPosix() { stopThreads(); + deleteCriticalSection(m_cs); + m_cs=0; } #if (defined(__APPLE__)) @@ -181,14 +185,18 @@ static void* threadFunction(void* argument) { btAssert(status->m_status); status->m_userThreadFunc(userPtr); + status->m_cs->lock(); status->m_status = 2; + status->m_cs->unlock(); checkPThreadFunction(sem_post(status->m_mainSemaphore)); status->threadUsed++; } else { //exit Thread + status->m_cs->lock(); status->m_status = 3; + status->m_cs->unlock(); checkPThreadFunction(sem_post(status->m_mainSemaphore)); printf("Thread with taskId %i exiting\n", status->m_taskId); break; @@ -206,7 +214,7 @@ void btThreadSupportPosix::runTask(int threadIndex, void* userData) btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex]; btAssert(threadIndex >= 0); btAssert(threadIndex < m_activeThreadStatus.size()); - + threadStatus.m_cs = m_cs; threadStatus.m_commandId = 1; threadStatus.m_status = 1; threadStatus.m_userPtr = userData; @@ -231,7 +239,10 @@ int btThreadSupportPosix::waitForResponse() for (size_t t = 0; t < size_t(m_activeThreadStatus.size()); ++t) { - if (2 == m_activeThreadStatus[t].m_status) + m_cs->lock(); + bool hasFinished = (2 == m_activeThreadStatus[t].m_status); + m_cs->unlock(); + if (hasFinished) { last = t; break; @@ -273,15 +284,15 @@ void btThreadSupportPosix::startThreads(const ConstructionInfo& threadConstructi printf("starting thread %d\n", i); btThreadStatus& threadStatus = m_activeThreadStatus[i]; threadStatus.startSemaphore = createSem("threadLocal"); - checkPThreadFunction(pthread_create(&threadStatus.thread, NULL, &threadFunction, (void*)&threadStatus)); - threadStatus.m_userPtr = 0; + threadStatus.m_cs = m_cs; threadStatus.m_taskId = i; threadStatus.m_commandId = 0; threadStatus.m_status = 0; threadStatus.m_mainSemaphore = m_mainSemaphore; threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; threadStatus.threadUsed = 0; + checkPThreadFunction(pthread_create(&threadStatus.thread, NULL, &threadFunction, (void*)&threadStatus)); printf("started thread %d \n", i); } From 131535a99ffb362b89c0f52064f6e05fcf136cc4 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 31 Oct 2018 21:24:44 -0700 Subject: [PATCH 2/8] remove debug stats --- examples/pybullet/gym/pybullet_utils/examples/mjcf2urdf.py | 2 +- .../NarrowPhaseCollision/btGjkPairDetector.cpp | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/examples/pybullet/gym/pybullet_utils/examples/mjcf2urdf.py b/examples/pybullet/gym/pybullet_utils/examples/mjcf2urdf.py index a4fc7ca0e..c1e39745d 100644 --- a/examples/pybullet/gym/pybullet_utils/examples/mjcf2urdf.py +++ b/examples/pybullet/gym/pybullet_utils/examples/mjcf2urdf.py @@ -14,7 +14,7 @@ p.setAdditionalSearchPath(pd.getDataPath()) objs = p.loadMJCF(args.mjcf, flags=p.URDF_USE_IMPLICIT_CYLINDER) for o in objs: - print("o=",o, p.getBodyInfo(o), p.getNumJoints(o)) + #print("o=",o, p.getBodyInfo(o), p.getNumJoints(o)) humanoid = objs[o] ed0 = ed.UrdfEditor() ed0.initializeFromBulletBody(humanoid, p._client) diff --git a/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp b/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp index 936130753..803f6e067 100644 --- a/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp +++ b/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp @@ -36,9 +36,6 @@ btScalar gGjkEpaPenetrationTolerance = 1.0e-12; btScalar gGjkEpaPenetrationTolerance = 0.001; #endif -//temp globals, to improve GJK/EPA/penetration calculations -int gNumDeepPenetrationChecks = 0; -int gNumGjkChecks = 0; btGjkPairDetector::btGjkPairDetector(const btConvexShape *objectA, const btConvexShape *objectB, btSimplexSolverInterface *simplexSolver, btConvexPenetrationDepthSolver *penetrationDepthSolver) : m_cachedSeparatingAxis(btScalar(0.), btScalar(1.), btScalar(0.)), @@ -708,7 +705,6 @@ void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput &inpu btScalar marginA = m_marginA; btScalar marginB = m_marginB; - gNumGjkChecks++; //for CCD we don't use margins if (m_ignoreMargin) @@ -1021,7 +1017,6 @@ void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput &inpu // Penetration depth case. btVector3 tmpPointOnA, tmpPointOnB; - gNumDeepPenetrationChecks++; m_cachedSeparatingAxis.setZero(); bool isValid2 = m_penetrationDepthSolver->calcPenDepth( From 9e58a217324d555bb11eb5194825906f977c82e6 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 31 Oct 2018 21:50:49 -0700 Subject: [PATCH 3/8] more aesthetic wireframe, pure visual improvement (no impact on simulation/performance) fixes https://github.com/bulletphysics/bullet3/issues/1970 Issue 1970 --- data/multibody.bullet | Bin 16836 -> 16836 bytes examples/Benchmarks/BenchmarkDemo.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/data/multibody.bullet b/data/multibody.bullet index 0283009946cd6302966b345ef4522b929709667e..32b9e7f37411e082f0138d19b5176450d62d6f50 100644 GIT binary patch delta 466 zcmX@o%y^`kaRVs+=(e(}t3=IE)07!uWh|L0KHP}Oi7#M*JK^O-}N3$Y0 zj6gO>j~7tUsd|L?l7hYaK)eI<-mLh;&YVftxfEovOdk@eGko&#^>0e0JfsB8^%)r6GAi@B1A4ng_oezQZV<6rP z#Fv?X1Oz0&XnUCRzycBsmjz<&6DIlZ3FKo`00$fs-=4kN3#;t;oUH6XY8oH>-N$Da zurF3IV)A?D)X7uW#U}4!Z7Zx-cVZw3H~A#~XQ delta 419 zcmX@o%y^`kaRV6m=!q9ZCJV5bOg_ORz_O%Z@4m@~Od^(oj0|9~Aj{y3 zNEib{MD(_Ln|cO@2|!UsFmwRRD}YG^F=66e$;o<59g`iHgn%ZROg3Q>n{3C*F{; zpP>OL2Q`bqh5@7*Ss{?UnStSe9Y_oWQYSlu6x+WDI=WYJ_jEg;;r6p;&IHOgOn%Rt z>N!DQ791J{o}WL1LInmIp$vg=Td;h>YaK8>VT%fwW-x~611bZX9|#O%{mExoI5><= X^$ZMj6e>60VXinitializePolyhedralFeatures(); + convexHullShape->initializePolyhedralFeatures(); btTransform trans; trans.setIdentity(); From 438e082b330f7fa84c6d3f9de7236ddaaa3a2234 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Thu, 1 Nov 2018 07:27:37 -0700 Subject: [PATCH 4/8] PyBullet: remove a potential race condition --- examples/SharedMemory/PhysicsServerExample.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/SharedMemory/PhysicsServerExample.cpp b/examples/SharedMemory/PhysicsServerExample.cpp index 93292ee89..2dc7145be 100644 --- a/examples/SharedMemory/PhysicsServerExample.cpp +++ b/examples/SharedMemory/PhysicsServerExample.cpp @@ -1737,6 +1737,11 @@ void PhysicsServerExample::initPhysics() m_args[w].m_cs2 = m_threadSupport->createCriticalSection(); m_args[w].m_cs3 = m_threadSupport->createCriticalSection(); m_args[w].m_csGUI = m_threadSupport->createCriticalSection(); + m_multiThreadedHelper->setCriticalSection(m_args[w].m_cs); + m_multiThreadedHelper->setCriticalSection2(m_args[w].m_cs2); + m_multiThreadedHelper->setCriticalSection3(m_args[w].m_cs3); + m_multiThreadedHelper->setCriticalSectionGUI(m_args[w].m_csGUI); + m_args[w].m_cs->lock(); m_args[w].m_cs->setSharedParam(0, eMotionIsUnInitialized); m_args[w].m_cs->unlock(); @@ -1760,10 +1765,6 @@ void PhysicsServerExample::initPhysics() } m_args[0].m_cs->setSharedParam(1, eGUIHelperIdle); - m_multiThreadedHelper->setCriticalSection(m_args[0].m_cs); - m_multiThreadedHelper->setCriticalSection2(m_args[0].m_cs2); - m_multiThreadedHelper->setCriticalSection3(m_args[0].m_cs3); - m_multiThreadedHelper->setCriticalSectionGUI(m_args[0].m_csGUI); m_args[0].m_cs2->lock(); From 336a4b65feb8a8003dcabf602230a8f11848244b Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Thu, 1 Nov 2018 07:41:35 -0700 Subject: [PATCH 5/8] disable some internal statistics, reporting a race condition. --- src/Bullet3Common/b3AlignedAllocator.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Bullet3Common/b3AlignedAllocator.cpp b/src/Bullet3Common/b3AlignedAllocator.cpp index 6e54c5fb7..d546d5e06 100644 --- a/src/Bullet3Common/b3AlignedAllocator.cpp +++ b/src/Bullet3Common/b3AlignedAllocator.cpp @@ -15,9 +15,11 @@ subject to the following restrictions: #include "b3AlignedAllocator.h" +#ifdef B3_ALLOCATOR_STATISTICS int b3g_numAlignedAllocs = 0; int b3g_numAlignedFree = 0; int b3g_totalBytesAlignedAllocs = 0; //detect memory leaks +#endif static void *b3AllocDefault(size_t size) { @@ -109,10 +111,10 @@ void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filenam { void *ret; char *real; - +#ifdef B3_ALLOCATOR_STATISTICS b3g_totalBytesAlignedAllocs += size; b3g_numAlignedAllocs++; - +#endif real = (char *)b3s_allocFunc(size + 2 * sizeof(void *) + (alignment - 1)); if (real) { @@ -135,14 +137,16 @@ void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filenam void b3AlignedFreeInternal(void *ptr, int line, char *filename) { void *real; +#ifdef B3_ALLOCATOR_STATISTICS b3g_numAlignedFree++; - +#endif if (ptr) { real = *((void **)(ptr)-1); int size = *((int *)(ptr)-2); +#ifdef B3_ALLOCATOR_STATISTICS b3g_totalBytesAlignedAllocs -= size; - +#endif b3Printf("free #%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedFree, real, filename, line, size); b3s_freeFunc(real); @@ -157,7 +161,9 @@ void b3AlignedFreeInternal(void *ptr, int line, char *filename) void *b3AlignedAllocInternal(size_t size, int alignment) { +#ifdef B3_ALLOCATOR_STATISTICS b3g_numAlignedAllocs++; +#endif void *ptr; ptr = b3s_alignedAllocFunc(size, alignment); // b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr); @@ -170,8 +176,9 @@ void b3AlignedFreeInternal(void *ptr) { return; } - +#ifdef B3_ALLOCATOR_STATISTICS b3g_numAlignedFree++; +#endif // b3Printf("b3AlignedFreeInternal %x\n",ptr); b3s_alignedFreeFunc(ptr); } From 750133694c83ac33bd33b7baee5b2e6d3a7cc8fc Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Thu, 1 Nov 2018 08:19:50 -0700 Subject: [PATCH 6/8] Disable btQuickprof.h profiling by default. We use custom profiling functions, see b3ChromeUtilsStartTimings. --- .../ExampleBrowser/OpenGLExampleBrowser.cpp | 3 +- examples/SharedMemory/SharedMemoryPublic.h | 4 +-- examples/Utils/ChromeTraceUtil.cpp | 8 ++--- src/LinearMath/btQuickprof.cpp | 34 ++++++++++--------- src/LinearMath/btQuickprof.h | 9 ++--- 5 files changed, 30 insertions(+), 28 deletions(-) diff --git a/examples/ExampleBrowser/OpenGLExampleBrowser.cpp b/examples/ExampleBrowser/OpenGLExampleBrowser.cpp index 1ed8c7a37..d5c9ccac5 100644 --- a/examples/ExampleBrowser/OpenGLExampleBrowser.cpp +++ b/examples/ExampleBrowser/OpenGLExampleBrowser.cpp @@ -251,7 +251,6 @@ void MyKeyboardCallback(int key, int state) if (key == 'p') { -#ifndef BT_NO_PROFILE if (state) { b3ChromeUtilsStartTimings(); @@ -260,7 +259,6 @@ void MyKeyboardCallback(int key, int state) { b3ChromeUtilsStopTimingsAndWriteJsonFile("timings"); } -#endif //BT_NO_PROFILE } #ifndef NO_OPENGL3 @@ -1129,6 +1127,7 @@ bool OpenGLExampleBrowser::init(int argc, char* argv[]) gui2->registerFileOpenCallback(fileOpenCallback); gui2->registerQuitCallback(quitCallback); + } return true; diff --git a/examples/SharedMemory/SharedMemoryPublic.h b/examples/SharedMemory/SharedMemoryPublic.h index 166de101c..eb74d3f3c 100644 --- a/examples/SharedMemory/SharedMemoryPublic.h +++ b/examples/SharedMemory/SharedMemoryPublic.h @@ -928,7 +928,7 @@ enum eFileIOTypes }; //limits for vertices/indices in PyBullet::createCollisionShape -#define B3_MAX_NUM_VERTICES 1024 -#define B3_MAX_NUM_INDICES 1024 +#define B3_MAX_NUM_VERTICES 16 +#define B3_MAX_NUM_INDICES 16 #endif //SHARED_MEMORY_PUBLIC_H diff --git a/examples/Utils/ChromeTraceUtil.cpp b/examples/Utils/ChromeTraceUtil.cpp index 2a25678ed..78d478162 100644 --- a/examples/Utils/ChromeTraceUtil.cpp +++ b/examples/Utils/ChromeTraceUtil.cpp @@ -174,7 +174,7 @@ void MyEnterProfileZoneFunc(const char* msg) { if (gProfileDisabled) return; -#ifndef BT_NO_PROFILE + int threadId = btQuickprofGetCurrentThreadIndex2(); if (threadId < 0 || threadId >= BT_QUICKPROF_MAX_THREAD_COUNT) return; @@ -191,13 +191,13 @@ void MyEnterProfileZoneFunc(const char* msg) gStartTimes[threadId][gStackDepths[threadId]] = 1 + gStartTimes[threadId][gStackDepths[threadId] - 1]; } gStackDepths[threadId]++; -#endif + } void MyLeaveProfileZoneFunc() { if (gProfileDisabled) return; -#ifndef BT_NO_PROFILE + int threadId = btQuickprofGetCurrentThreadIndex2(); if (threadId < 0 || threadId >= BT_QUICKPROF_MAX_THREAD_COUNT) return; @@ -214,7 +214,7 @@ void MyLeaveProfileZoneFunc() unsigned long long int endTime = clk.getTimeNanoseconds(); gTimings[threadId].addTiming(name, threadId, startTime, endTime); -#endif //BT_NO_PROFILE + } void b3ChromeUtilsStartTimings() diff --git a/src/LinearMath/btQuickprof.cpp b/src/LinearMath/btQuickprof.cpp index 8d3310981..86fd1d781 100644 --- a/src/LinearMath/btQuickprof.cpp +++ b/src/LinearMath/btQuickprof.cpp @@ -694,6 +694,24 @@ void CProfileManager::dumpAll() CProfileManager::Release_Iterator(profileIterator); } + +void btEnterProfileZoneDefault(const char* name) +{ +} +void btLeaveProfileZoneDefault() +{ +} + +#else +void btEnterProfileZoneDefault(const char* name) +{ +} +void btLeaveProfileZoneDefault() +{ +} +#endif //BT_NO_PROFILE + + // clang-format off #if defined(_WIN32) && (defined(__MINGW32__) || defined(__MINGW64__)) #define BT_HAVE_TLS 1 @@ -743,22 +761,6 @@ unsigned int btQuickprofGetCurrentThreadIndex2() #endif //BT_THREADSAFE } -void btEnterProfileZoneDefault(const char* name) -{ -} -void btLeaveProfileZoneDefault() -{ -} - -#else -void btEnterProfileZoneDefault(const char* name) -{ -} -void btLeaveProfileZoneDefault() -{ -} -#endif //BT_NO_PROFILE - static btEnterProfileZoneFunc* bts_enterFunc = btEnterProfileZoneDefault; static btLeaveProfileZoneFunc* bts_leaveFunc = btLeaveProfileZoneDefault; diff --git a/src/LinearMath/btQuickprof.h b/src/LinearMath/btQuickprof.h index 18ba4d5fb..990d401d5 100644 --- a/src/LinearMath/btQuickprof.h +++ b/src/LinearMath/btQuickprof.h @@ -61,18 +61,19 @@ btLeaveProfileZoneFunc* btGetCurrentLeaveProfileZoneFunc(); void btSetCustomEnterProfileZoneFunc(btEnterProfileZoneFunc* enterFunc); void btSetCustomLeaveProfileZoneFunc(btLeaveProfileZoneFunc* leaveFunc); -#ifndef BT_NO_PROFILE // FIX redefinition -//To disable built-in profiling, please comment out next line -//#define BT_NO_PROFILE 1 +#ifndef BT_ENABLE_PROFILE +#define BT_NO_PROFILE 1 #endif //BT_NO_PROFILE const unsigned int BT_QUICKPROF_MAX_THREAD_COUNT = 64; -#ifndef BT_NO_PROFILE //btQuickprofGetCurrentThreadIndex will return -1 if thread index cannot be determined, //otherwise returns thread index in range [0..maxThreads] unsigned int btQuickprofGetCurrentThreadIndex2(); +#ifndef BT_NO_PROFILE + + #include //@todo remove this, backwards compatibility #include "btAlignedAllocator.h" From f6ea2a73796438734be1a0cb5f8124da8e976be8 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Thu, 1 Nov 2018 10:20:54 -0700 Subject: [PATCH 7/8] fix a potential data race condition. remove createObstacleCourse programmatic creation part until we can steam vertices/indices. --- .../SharedMemory/PhysicsServerExample.cpp | 3 ++- .../pybullet/examples/createObstacleCourse.py | 22 ------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/examples/SharedMemory/PhysicsServerExample.cpp b/examples/SharedMemory/PhysicsServerExample.cpp index 2dc7145be..36ec1f18b 100644 --- a/examples/SharedMemory/PhysicsServerExample.cpp +++ b/examples/SharedMemory/PhysicsServerExample.cpp @@ -1764,8 +1764,9 @@ void PhysicsServerExample::initPhysics() } } + m_args[0].m_cs->lock(); m_args[0].m_cs->setSharedParam(1, eGUIHelperIdle); - + m_args[0].m_cs->unlock(); m_args[0].m_cs2->lock(); { diff --git a/examples/pybullet/examples/createObstacleCourse.py b/examples/pybullet/examples/createObstacleCourse.py index 1194a213a..ef145c72c 100644 --- a/examples/pybullet/examples/createObstacleCourse.py +++ b/examples/pybullet/examples/createObstacleCourse.py @@ -21,28 +21,6 @@ colSphereId = p.createCollisionShape(p.GEOM_SPHERE,radius=sphereRadius) #convex mesh from obj stoneId = p.createCollisionShape(p.GEOM_MESH,fileName="stone.obj") -#concave mesh from obj -stoneId = p.createCollisionShape(p.GEOM_MESH,fileName="stone.obj", flags=p.GEOM_FORCE_CONCAVE_TRIMESH) - - -verts=[[-0.246350, -0.246483, -0.000624], - [ -0.151407, -0.176325, 0.172867], - [ -0.246350, 0.249205, -0.000624], - [ -0.151407, 0.129477, 0.172867], - [ 0.249338, -0.246483, -0.000624], - [ 0.154395, -0.176325, 0.172867], - [ 0.249338, 0.249205, -0.000624], - [ 0.154395, 0.129477, 0.172867]] -#convex mesh from vertices -stoneConvexId = p.createCollisionShape(p.GEOM_MESH,vertices=verts) - -indices=[0,3,2,3,6,2,7,4,6,5,0,4,6,0,2,3,5,7,0,1,3,3,7,6,7,5,4,5,1,0,6,4,0,3,1,5] - -#concave mesh from vertices+indices -stoneConcaveId = p.createCollisionShape(p.GEOM_MESH,vertices=verts, indices=indices) - -stoneId = stoneConvexId -#stoneId = stoneConcaveId boxHalfLength = 0.5 From 8b53e47fe8531710eb1ae0533035ef4e51f855d4 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Fri, 2 Nov 2018 11:19:46 -0700 Subject: [PATCH 8/8] add simpler ARS implementation, thanks to Alexis Jacq and Hadelin de Ponteves (will add save/restore of policy and rendering movies through command-line arguments soon) --- .../pybullet/gym/pybullet_envs/ARS/ars.py | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 examples/pybullet/gym/pybullet_envs/ARS/ars.py diff --git a/examples/pybullet/gym/pybullet_envs/ARS/ars.py b/examples/pybullet/gym/pybullet_envs/ARS/ars.py new file mode 100644 index 000000000..b4c0c3127 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/ARS/ars.py @@ -0,0 +1,142 @@ +# AI 2018 + +# Importing the libraries +import os +import numpy as np +import gym +from gym import wrappers +import pybullet_envs + +# Setting the Hyper Parameters + +class Hp(): + + def __init__(self): + self.nb_steps = 1000 + self.episode_length = 1000 + self.learning_rate = 0.02 + self.nb_directions = 16 + self.nb_best_directions = 16 + assert self.nb_best_directions <= self.nb_directions + self.noise = 0.03 + self.seed = 1 + self.env_name = 'HalfCheetahBulletEnv-v0' + +# Normalizing the states + +class Normalizer(): + + def __init__(self, nb_inputs): + self.n = np.zeros(nb_inputs) + self.mean = np.zeros(nb_inputs) + self.mean_diff = np.zeros(nb_inputs) + self.var = np.zeros(nb_inputs) + + def observe(self, x): + self.n += 1. + last_mean = self.mean.copy() + self.mean += (x - self.mean) / self.n + self.mean_diff += (x - last_mean) * (x - self.mean) + self.var = (self.mean_diff / self.n).clip(min = 1e-2) + + def normalize(self, inputs): + obs_mean = self.mean + obs_std = np.sqrt(self.var) + return (inputs - obs_mean) / obs_std + +# Building the AI + +class Policy(): + + def __init__(self, input_size, output_size): + self.theta = np.zeros((output_size, input_size)) + print("self.theta=",self.theta) + def evaluate(self, input, delta = None, direction = None): + if direction is None: + return np.clip(self.theta.dot(input), -1.0, 1.0) + elif direction == "positive": + return np.clip((self.theta + hp.noise*delta).dot(input), -1.0, 1.0) + else: + return np.clip((self.theta - hp.noise*delta).dot(input), -1.0, 1.0) + + def sample_deltas(self): + return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)] + + def update(self, rollouts, sigma_r): + step = np.zeros(self.theta.shape) + for r_pos, r_neg, d in rollouts: + step += (r_pos - r_neg) * d + self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step + +# Exploring the policy on one specific direction and over one episode + +def explore(env, normalizer, policy, direction = None, delta = None): + state = env.reset() + done = False + num_plays = 0. + sum_rewards = 0 + while not done and num_plays < hp.episode_length: + normalizer.observe(state) + state = normalizer.normalize(state) + action = policy.evaluate(state, delta, direction) + state, reward, done, _ = env.step(action) + reward = max(min(reward, 1), -1) + sum_rewards += reward + num_plays += 1 + return sum_rewards + +# Training the AI + +def train(env, policy, normalizer, hp): + + for step in range(hp.nb_steps): + + # Initializing the perturbations deltas and the positive/negative rewards + deltas = policy.sample_deltas() + positive_rewards = [0] * hp.nb_directions + negative_rewards = [0] * hp.nb_directions + + # Getting the positive rewards in the positive directions + for k in range(hp.nb_directions): + positive_rewards[k] = explore(env, normalizer, policy, direction = "positive", delta = deltas[k]) + + # Getting the negative rewards in the negative/opposite directions + for k in range(hp.nb_directions): + negative_rewards[k] = explore(env, normalizer, policy, direction = "negative", delta = deltas[k]) + + # Gathering all the positive/negative rewards to compute the standard deviation of these rewards + all_rewards = np.array(positive_rewards + negative_rewards) + sigma_r = all_rewards.std() + + # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions + scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))} + order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions] + rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order] + + # Updating our policy + policy.update(rollouts, sigma_r) + + # Printing the final reward of the policy after the update + reward_evaluation = explore(env, normalizer, policy) + print('Step:', step, 'Reward:', reward_evaluation) + +# Running the main code + +def mkdir(base, name): + path = os.path.join(base, name) + if not os.path.exists(path): + os.makedirs(path) + return path +work_dir = mkdir('exp', 'brs') +monitor_dir = mkdir(work_dir, 'monitor') + +hp = Hp() +np.random.seed(hp.seed) +env = gym.make(hp.env_name) +# env.render(mode = "human") +#env = wrappers.Monitor(env, monitor_dir, force = True) +nb_inputs = env.observation_space.shape[0] +nb_outputs = env.action_space.shape[0] +policy = Policy(nb_inputs, nb_outputs) +normalizer = Normalizer(nb_inputs) +train(env, policy, normalizer, hp)