Merge branch 'master' into merge_upstream

2020-01-15 13:24:59 +01:00
parent ee9f25405e 6428ed5e5f
commit ef20f9e62c
370 changed files with 496645 additions and 22916 deletions
--- a/examples/pybullet/gym/pybullet_envs/init.py
+++ b/examples/pybullet/gym/pybullet_envs/init.py
@@ -25,6 +25,14 @@ register(
    reward_threshold=190.0,
 )

+register(
+    id='CartPoleContinuousBulletEnv-v0',
+    entry_point='pybullet_envs.bullet:CartPoleContinuousBulletEnv',
+    max_episode_steps=200,
+    reward_threshold=190.0,
+)
+
+
 register(
    id='MinitaurBulletEnv-v0',
    entry_point='pybullet_envs.bullet:MinitaurBulletEnv',
--- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
@@ -26,6 +26,7 @@ from __future__ import print_function
 import os

 import gym
+from gym import wrappers
 import tensorflow as tf

 from . import tools
--- a/examples/pybullet/gym/pybullet_envs/bullet/init.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/init.py
@@ -1,4 +1,5 @@
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
+from pybullet_envs.bullet.cartpole_bullet import CartPoleContinuousBulletEnv
 from pybullet_envs.bullet.minitaur_gym_env import MinitaurBulletEnv
 from pybullet_envs.bullet.minitaur_duck_gym_env import MinitaurBulletDuckEnv
 from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
--- a/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
@@ -1,29 +0,0 @@
-import functools
-import inspect
-import pybullet
-
-
-class BulletClient(object):
-  """A wrapper for pybullet to manage different clients."""
-
-  def __init__(self, connection_mode=pybullet.DIRECT, options=""):
-    """Create a simulation and connect to it."""
-    self._client = pybullet.connect(pybullet.SHARED_MEMORY)
-    if (self._client < 0):
-      print("options=", options)
-      self._client = pybullet.connect(connection_mode, options=options)
-    self._shapes = {}
-
-  def __del__(self):
-    """Clean up connection if not already done."""
-    try:
-      pybullet.disconnect(physicsClientId=self._client)
-    except pybullet.error:
-      pass
-
-  def __getattr__(self, name):
-    """Inject the client id into Bullet functions."""
-    attribute = getattr(pybullet, name)
-    if inspect.isbuiltin(attribute):
-      attribute = functools.partial(attribute, physicsClientId=self._client)
-    return attribute
--- a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
@@ -15,8 +15,9 @@ from gym.utils import seeding
 import numpy as np
 import time
 import subprocess
-import pybullet as p
+import pybullet as p2
 import pybullet_data
+import pybullet_utils.bullet_client as bc
 from pkg_resources import parse_version

 logger = logging.getLogger(__name__)
@@ -25,13 +26,13 @@ logger = logging.getLogger(__name__)
 class CartPoleBulletEnv(gym.Env):
  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

-  def __init__(self, renders=True):
+  def __init__(self, renders=False, discrete_actions=True):
    # start the bullet physics server
    self._renders = renders
-    if (renders):
-      p.connect(p.GUI)
-    else:
-      p.connect(p.DIRECT)
+    self._discrete_actions = discrete_actions
+    self._render_height = 200
+    self._render_width = 320
+    self._physics_client_id = -1
    self.theta_threshold_radians = 12 * 2 * math.pi / 360
    self.x_threshold = 0.4  #2.4
    high = np.array([
@@ -42,7 +43,13 @@ class CartPoleBulletEnv(gym.Env):

    self.force_mag = 10

-    self.action_space = spaces.Discrete(2)
+    if self._discrete_actions:
+      self.action_space = spaces.Discrete(2)
+    else:
+      action_dim = 1
+      action_high = np.array([self.force_mag] * action_dim)
+      self.action_space = spaces.Box(-action_high, action_high)
+    
    self.observation_space = spaces.Box(-high, high, dtype=np.float32)

    self.seed()
@@ -58,7 +65,11 @@ class CartPoleBulletEnv(gym.Env):
    return [seed]

  def step(self, action):
-    force = self.force_mag if action == 1 else -self.force_mag
+    p = self._p
+    if self._discrete_actions:
+      force = self.force_mag if action == 1 else -self.force_mag
+    else:
+      force = action[0]

    p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
    p.stepSimulation()
@@ -77,19 +88,27 @@ class CartPoleBulletEnv(gym.Env):

  def reset(self):
    #    print("-----------reset simulation---------------")
-    p.resetSimulation()
-    self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
-                               [0, 0, 0])
-    p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
-    p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
-    p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
-    self.timeStep = 0.02
-    p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
-    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
-    p.setGravity(0, 0, -9.8)
-    p.setTimeStep(self.timeStep)
-    p.setRealTimeSimulation(0)
-
+    if self._physics_client_id < 0:
+      if self._renders:
+        self._p = bc.BulletClient(connection_mode=p2.GUI)
+      else:
+        self._p = bc.BulletClient()
+      self._physics_client_id = self._p._client
+ 
+      p = self._p
+      p.resetSimulation()
+      self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
+                                 [0, 0, 0])
+      p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
+      p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
+      p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
+      self.timeStep = 0.02
+      p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
+      p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
+      p.setGravity(0, 0, -9.8)
+      p.setTimeStep(self.timeStep)
+      p.setRealTimeSimulation(0)
+    p = self._p
    randstate = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
    p.resetJointState(self.cartpole, 1, randstate[0], randstate[1])
    p.resetJointState(self.cartpole, 0, randstate[2], randstate[3])
@@ -99,4 +118,51 @@ class CartPoleBulletEnv(gym.Env):
    return np.array(self.state)

  def render(self, mode='human', close=False):
-    return
+    if mode == "human":
+      self._renders = True
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos=[0,0,0]
+    self._cam_dist = 2
+    self._cam_pitch = 0.3
+    self._cam_yaw = 0
+    if (self._physics_client_id>=0):
+      view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+      proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+             aspect=float(self._render_width) /
+             self._render_height,
+             nearVal=0.1,
+             farVal=100.0)
+      (_, _, px, _, _) = self._p.getCameraImage(
+          width=self._render_width,
+          height=self._render_height,
+          renderer=self._p.ER_BULLET_HARDWARE_OPENGL,
+          viewMatrix=view_matrix,
+          projectionMatrix=proj_matrix)
+    else:
+      px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
+    rgb_array = np.array(px, dtype=np.uint8)
+    rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
+  def configure(self, args):
+    pass
+    
+  def close(self):
+    if self._physics_client_id >= 0:
+      self._p.disconnect()
+    self._physics_client_id = -1
+
+class CartPoleContinuousBulletEnv(CartPoleBulletEnv):
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
+
+  def __init__(self, renders=False):
+    # start the bullet physics server
+    CartPoleBulletEnv.__init__(self, renders, discrete_actions=False)
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
@@ -17,7 +17,7 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import pybullet
-from . import bullet_client
+import pybullet_utils.bullet_client as bc
 from . import minitaur
 import pybullet_data
 from . import minitaur_env_randomizer
@@ -150,9 +150,9 @@ class MinitaurBulletDuckEnv(gym.Env):
      self._action_repeat *= NUM_SUBSTEPS

    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
-      self._pybullet_client = bullet_client.BulletClient()
+      self._pybullet_client = bc.BulletClient()

    self.seed()
    self.reset()
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -14,7 +14,7 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import pybullet
-from . import bullet_client
+import pybullet_utils.bullet_client as bc
 from . import minitaur
 import os
 import pybullet_data
@@ -145,9 +145,9 @@ class MinitaurBulletEnv(gym.Env):
      self._action_repeat *= NUM_SUBSTEPS

    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
-      self._pybullet_client = bullet_client.BulletClient()
+      self._pybullet_client = bc.BulletClient()

    self.seed()
    self.reset()
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -12,7 +12,7 @@ import numpy as np
 import pybullet
 from . import racecar
 import random
-from . import bullet_client
+import pybullet_utils.bullet_client as bc
 import pybullet_data
 from pkg_resources import parse_version

@@ -40,9 +40,9 @@ class RacecarGymEnv(gym.Env):
    self._renders = renders
    self._isDiscrete = isDiscrete
    if self._renders:
-      self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._p = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
-      self._p = bullet_client.BulletClient()
+      self._p = bc.BulletClient()

    self.seed()
    #self.reset()
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -10,7 +10,7 @@ from gym.utils import seeding
 import numpy as np
 import time
 import pybullet
-from . import bullet_client
+import pybullet_utils.bullet_client as bc
 from . import racecar
 import random
 import pybullet_data
@@ -42,9 +42,9 @@ class RacecarZEDGymEnv(gym.Env):

    self._isDiscrete = isDiscrete
    if self._renders:
-      self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._p = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
-      self._p = bullet_client.BulletClient()
+      self._p = bc.BulletClient()

    self.seed()
    self.reset()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
@@ -25,12 +25,12 @@ class HumanoidStablePD(object):
    self._mocap_data = mocap_data
    self._arg_parser = arg_parser
    print("LOADING humanoid!")
-
+    flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER+self._pybullet_client.URDF_USE_SELF_COLLISION+self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
    self._sim_model = self._pybullet_client.loadURDF(
        "humanoid/humanoid.urdf", [0, 0.889540259, 0],
        globalScaling=0.25,
        useFixedBase=useFixedBase,
-        flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
+        flags=flags)

    #self._pybullet_client.setCollisionFilterGroupMask(self._sim_model,-1,collisionFilterGroup=0,collisionFilterMask=0)
    #for j in range (self._pybullet_client.getNumJoints(self._sim_model)):
@@ -347,7 +347,6 @@ class HumanoidStablePD(object):
    #static char* kwlist[] = { "bodyUniqueId", 
    #"jointIndices", 
    #"controlMode", "targetPositions", "targetVelocities", "forces", "positionGains", "velocityGains", "maxVelocities", "physicsClientId", NULL };
-  
    self._pybullet_client.setJointMotorControlMultiDofArray(self._sim_model,
                                                           indices,
                                                           self._pybullet_client.STABLE_PD_CONTROL,
@@ -811,7 +810,10 @@ class HumanoidStablePD(object):
    rootPosSim, rootOrnSim = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
    rootPosKin, rootOrnKin = self._pybullet_client.getBasePositionAndOrientation(self._kin_model)
    linVelSim, angVelSim = self._pybullet_client.getBaseVelocity(self._sim_model)
-    linVelKin, angVelKin = self._pybullet_client.getBaseVelocity(self._kin_model)
+    #don't read the velocities from the kinematic model (they are zero), use the pose interpolator velocity
+    #see also issue https://github.com/bulletphysics/bullet3/issues/2401 
+    linVelKin = self._poseInterpolator._baseLinVel
+    angVelKin = self._poseInterpolator._baseAngVel

    root_rot_err = self.calcRootRotDiff(rootOrnSim, rootOrnKin)
    pose_err += root_rot_w * root_rot_err
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
@@ -37,6 +37,8 @@ def build_arg_parser(args):
  arg_parser.load_args(args)

  arg_file = arg_parser.parse_string('arg_file', '')
+  if arg_file == '':
+    arg_file = "run_humanoid3d_backflip_args.txt"
  if (arg_file != ''):
    path = pybullet_data.getDataPath() + "/args/" + arg_file
    succ = arg_parser.load_file(path)
--- a/examples/pybullet/gym/pybullet_envs/env_bases.py
+++ b/examples/pybullet/gym/pybullet_envs/env_bases.py
@@ -1,10 +1,17 @@
 import gym, gym.spaces, gym.utils, gym.utils.seeding
 import numpy as np
 import pybullet
+import os
+
 from pybullet_utils import bullet_client

 from pkg_resources import parse_version

+try:
+  if os.environ["PYBULLET_EGL"]:
+    import pkgutil
+except:
+  pass

 class MJCFBaseBulletEnv(gym.Env):
  """
@@ -31,6 +38,7 @@ class MJCFBaseBulletEnv(gym.Env):

    self.action_space = robot.action_space
    self.observation_space = robot.observation_space
+    #self.reset()

  def configure(self, args):
    self.robot.args = args
@@ -48,7 +56,19 @@ class MJCFBaseBulletEnv(gym.Env):
        self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
      else:
        self._p = bullet_client.BulletClient()
-
+      self._p.resetSimulation()
+      #optionally enable EGL for faster headless rendering
+      try:
+        if os.environ["PYBULLET_EGL"]:
+          con_mode = self._p.getConnectionInfo()['connectionMethod']
+          if con_mode==self._p.DIRECT:
+            egl = pkgutil.get_loader('eglRenderer')
+            if (egl):
+              self._p.loadPlugin(egl.get_filename(), "_eglRendererPlugin")
+            else:
+              self._p.loadPlugin("eglRendererPlugin")
+      except:
+        pass
      self.physicsClientId = self._p._client
      self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)

@@ -77,24 +97,35 @@ class MJCFBaseBulletEnv(gym.Env):
    if (hasattr(self, 'robot')):
      if (hasattr(self.robot, 'body_xyz')):
        base_pos = self.robot.body_xyz
-
-    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+    if (self.physicsClientId>=0):
+      view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
                                                            distance=self._cam_dist,
                                                            yaw=self._cam_yaw,
                                                            pitch=self._cam_pitch,
                                                            roll=0,
                                                            upAxisIndex=2)
-    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+      proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
                                                     aspect=float(self._render_width) /
                                                     self._render_height,
                                                     nearVal=0.1,
                                                     farVal=100.0)
-    (_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
+      (_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
                                              height=self._render_height,
                                              viewMatrix=view_matrix,
                                              projectionMatrix=proj_matrix,
                                              renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
-    rgb_array = np.array(px)
+      try:
+        # Keep the previous orientation of the camera set by the user.
+        con_mode = self._p.getConnectionInfo()['connectionMethod']
+        if con_mode==self._p.SHARED_MEMORY or con_mode == self._p.GUI:
+          [yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
+          self._p.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
+      except:
+        pass
+
+    else:
+      px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
+    rgb_array = np.array(px, dtype=np.uint8)
    rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
--- a/examples/pybullet/gym/pybullet_envs/examples/batchsim3.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/batchsim3.py
@@ -0,0 +1,189 @@
+import os
+import inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0, parentdir)
+
+from pybullet_utils import bullet_client
+from pybullet_envs.examples import panda_sim
+
+
+import time
+
+useGUI = False
+timeStep = 1./60.
+
+# Importing the libraries
+import os
+import time
+import multiprocessing as mp
+from multiprocessing import Process, Pipe
+
+pandaEndEffectorIndex = 11 #8
+pandaNumDofs = 7
+
+
+_RESET = 1
+_CLOSE = 2
+_EXPLORE = 3
+
+
+def ExploreWorker(rank, num_processes, childPipe, args):
+  print("hi:",rank, " out of ", num_processes)  
+  import pybullet as op1
+  import pybullet_data as pd
+  logName=""
+  p1=0
+  n = 0
+  space = 2 
+  simulations=[]
+  sims_per_worker = 10
+  
+  offsetY = rank*space
+  while True:
+    n += 1
+    try:
+      # Only block for short times to have keyboard exceptions be raised.
+      if not childPipe.poll(0.0001):
+        continue
+      message, payload = childPipe.recv()
+    except (EOFError, KeyboardInterrupt):
+      break
+    if message == _RESET:
+      if (useGUI):
+        p1 = bullet_client.BulletClient(op1.GUI)
+      else:
+        p1 = bullet_client.BulletClient(op1.DIRECT)
+      p1.setTimeStep(timeStep)
+      
+      p1.setPhysicsEngineParameter(numSolverIterations=8)
+      p1.setPhysicsEngineParameter(minimumSolverIslandSize=100)
+      p1.configureDebugVisualizer(p1.COV_ENABLE_Y_AXIS_UP,1)
+      p1.configureDebugVisualizer(p1.COV_ENABLE_RENDERING,0)
+      p1.setAdditionalSearchPath(pd.getDataPath())
+      p1.setGravity(0,-9.8,0)
+      logName = str("batchsim")+str(rank)
+      for j in range (3):
+        offsetX = 0#-sims_per_worker/2.0*space
+        for i in range (sims_per_worker):
+          offset=[offsetX,0, offsetY]
+          sim = panda_sim.PandaSim(p1, offset)
+          simulations.append(sim)
+          offsetX += space 
+        offsetY += space 
+      childPipe.send(["reset ok"])
+      p1.configureDebugVisualizer(p1.COV_ENABLE_RENDERING,1)
+      for i in range (100):
+        p1.stepSimulation()
+      
+      logId = p1.startStateLogging(op1.STATE_LOGGING_PROFILE_TIMINGS,logName)
+      continue
+    if message == _EXPLORE:
+      sum_rewards=rank
+      
+      if useGUI:
+        numSteps = int(20000)
+      else:
+        numSteps = int(5)
+      for i in range (numSteps):
+        for s in simulations:
+          s.step()
+        p1.stepSimulation()
+      #print("logId=",logId)
+      #print("numSteps=",numSteps)
+
+      childPipe.send([sum_rewards])
+      continue
+    if message == _CLOSE:
+      p1.stopStateLogging(logId)
+      childPipe.send(["close ok"])
+      break
+  childPipe.close()
+  
+
+if __name__ == "__main__":
+  mp.freeze_support()
+  if useGUI:
+    num_processes = 1
+  else:
+    num_processes = 12
+  processes = []
+  args=[0]*num_processes
+  
+  childPipes = []
+  parentPipes = []
+
+  for pr in range(num_processes):
+    parentPipe, childPipe = Pipe()
+    parentPipes.append(parentPipe)
+    childPipes.append(childPipe)
+
+  for rank in range(num_processes):
+    p = mp.Process(target=ExploreWorker, args=(rank, num_processes, childPipes[rank],  args))
+    p.start()
+    processes.append(p)
+      
+  
+  for parentPipe in parentPipes:
+    parentPipe.send([_RESET, "blaat"])
+  
+  positive_rewards = [0]*num_processes
+  for k in range(num_processes):
+    #print("reset msg=",parentPipes[k].recv()[0])
+    msg = parentPipes[k].recv()[0]
+  
+  for parentPipe in parentPipes:
+    parentPipe.send([_EXPLORE, "blaat"])
+  
+  positive_rewards = [0]*num_processes
+  for k in range(num_processes):
+    positive_rewards[k] = parentPipes[k].recv()[0]
+    #print("positive_rewards=",positive_rewards[k])
+
+  
+  for parentPipe in parentPipes:
+    parentPipe.send([_EXPLORE, "blaat"])
+  
+  positive_rewards = [0]*num_processes
+  for k in range(num_processes):
+    positive_rewards[k] = parentPipes[k].recv()[0]
+    #print("positive_rewards=",positive_rewards[k])
+    msg = positive_rewards[k]
+
+  for parentPipe in parentPipes:
+    parentPipe.send([_EXPLORE, "blaat"])
+  
+  positive_rewards = [0]*num_processes
+  for k in range(num_processes):
+    positive_rewards[k] = parentPipes[k].recv()[0]
+    #print("positive_rewards=",positive_rewards[k])
+
+  
+  for parentPipe in parentPipes:
+    parentPipe.send([_CLOSE, "pay2"])
+  
+  for p in processes:
+    p.join()
+  
+  #now we merge the separate json files into a single one
+  fnameout = 'batchsim.json'
+  count = 0
+  outfile = open(fnameout, "w+")
+  outfile.writelines(["{\"traceEvents\":[\n"])
+  numFiles = num_processes
+  for num in range(numFiles):
+    print("num=",num)
+    fname = 'batchsim%d_0.json' % (num)
+    with open(fname) as infile:
+        for line in infile:
+          if "pid" in line:
+            line = line.replace('\"pid\":1', '\"pid\":'+str(num))
+            if num < (numFiles-1) and not "{}}," in line:
+              line = line.replace('{}}', '{}},')
+              print("line[",count,"]=",line)
+            outfile.write(line)
+          count += 1
+  print ("count=",count)
+  outfile.writelines(["],\n"])
+  outfile.writelines(["\"displayTimeUnit\": \"ns\"}\n"])
+  outfile.close()
--- a/examples/pybullet/gym/pybullet_envs/examples/laikago.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/laikago.py
@@ -1,114 +0,0 @@
-import pybullet as p
-import pybullet_data as pd
-
-import time
-
-p.connect(p.GUI)
-p.setAdditionalSearchPath(pd.getDataPath())
-
-plane = p.loadURDF("plane.urdf")
-p.setGravity(0, 0, -9.8)
-p.setTimeStep(1. / 500)
-#p.setDefaultContactERP(0)
-#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
-urdfFlags = p.URDF_USE_SELF_COLLISION
-quadruped = p.loadURDF("laikago/laikago.urdf", [0, 0, .5], [0, 0.5, 0.5, 0],
-                       flags=urdfFlags,
-                       useFixedBase=False)
-
-#enable collision between lower legs
-
-for j in range(p.getNumJoints(quadruped)):
-  print(p.getJointInfo(quadruped, j))
-
-#2,5,8 and 11 are the lower legs
-lower_legs = [2, 5, 8, 11]
-for l0 in lower_legs:
-  for l1 in lower_legs:
-    if (l1 > l0):
-      enableCollision = 1
-      print("collision for pair", l0, l1,
-            p.getJointInfo(quadruped, l0)[12],
-            p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
-      p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)
-
-jointIds = []
-paramIds = []
-jointOffsets = []
-jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
-jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-
-for i in range(4):
-  jointOffsets.append(0)
-  jointOffsets.append(-0.7)
-  jointOffsets.append(0.7)
-
-maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
-
-for j in range(p.getNumJoints(quadruped)):
-  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
-  info = p.getJointInfo(quadruped, j)
-  #print(info)
-  jointName = info[1]
-  jointType = info[2]
-  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
-    jointIds.append(j)
-
-p.getCameraImage(480, 320)
-p.setRealTimeSimulation(0)
-
-joints = []
-
-with open(pd.getDataPath() + "/laikago/data1.txt", "r") as filestream:
-  for line in filestream:
-    print("line=", line)
-    maxForce = p.readUserDebugParameter(maxForceId)
-    currentline = line.split(",")
-    #print (currentline)
-    #print("-----")
-    frame = currentline[0]
-    t = currentline[1]
-    #print("frame[",frame,"]")
-    joints = currentline[2:14]
-    #print("joints=",joints)
-    for j in range(12):
-      targetPos = float(joints[j])
-      p.setJointMotorControl2(quadruped,
-                              jointIds[j],
-                              p.POSITION_CONTROL,
-                              jointDirections[j] * targetPos + jointOffsets[j],
-                              force=maxForce)
-    p.stepSimulation()
-    for lower_leg in lower_legs:
-      #print("points for ", quadruped, " link: ", lower_leg)
-      pts = p.getContactPoints(quadruped, -1, lower_leg)
-      #print("num points=",len(pts))
-      #for pt in pts:
-      # print(pt[9])
-    time.sleep(1. / 500.)
-
-for j in range(p.getNumJoints(quadruped)):
-  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
-  info = p.getJointInfo(quadruped, j)
-  js = p.getJointState(quadruped, j)
-  #print(info)
-  jointName = info[1]
-  jointType = info[2]
-  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
-    paramIds.append(
-        p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
-                                (js[0] - jointOffsets[j]) / jointDirections[j]))
-
-p.setRealTimeSimulation(1)
-
-while (1):
-
-  for i in range(len(paramIds)):
-    c = paramIds[i]
-    targetPos = p.readUserDebugParameter(c)
-    maxForce = p.readUserDebugParameter(maxForceId)
-    p.setJointMotorControl2(quadruped,
-                            jointIds[i],
-                            p.POSITION_CONTROL,
-                            jointDirections[i] * targetPos + jointOffsets[i],
-                            force=maxForce)
--- a/examples/pybullet/gym/pybullet_envs/examples/loadpanda.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/loadpanda.py
@@ -0,0 +1,21 @@
+import pybullet as p
+import pybullet_data as pd
+import math
+import time
+import numpy as np
+from pybullet_envs.examples import panda_sim
+
+p.connect(p.GUI)
+p.configureDebugVisualizer(p.COV_ENABLE_Y_AXIS_UP,1)
+p.setAdditionalSearchPath(pd.getDataPath())
+
+timeStep=1./60.
+p.setTimeStep(timeStep)
+p.setGravity(0,-9.8,0)
+ 
+panda = panda_sim.PandaSim(p,[0,0,0])
+while (1):
+	panda.step()
+	p.stepSimulation()
+	time.sleep(timeStep)
+	
--- a/examples/pybullet/gym/pybullet_envs/examples/loadxarm.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/loadxarm.py
@@ -0,0 +1,20 @@
+import pybullet as p
+import pybullet_data as pd
+import math
+import time
+import numpy as np
+import xarm_sim
+
+p.connect(p.GUI)
+p.setAdditionalSearchPath(pd.getDataPath())
+
+timeStep=1./60.
+p.setTimeStep(timeStep)
+p.setGravity(0,0,-9.8)
+ 
+xarm = xarm_sim.XArm6Sim(p,[0,0,0])
+while (1):
+	xarm.step()
+	p.stepSimulation()
+	time.sleep(timeStep)
+	
--- a/examples/pybullet/gym/pybullet_envs/examples/panda_sim.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/panda_sim.py
@@ -0,0 +1,65 @@
+import time
+import numpy as np
+import math
+
+useNullSpace = 1
+ikSolver = 0
+pandaEndEffectorIndex = 11 #8
+pandaNumDofs = 7
+
+ll = [-7]*pandaNumDofs
+#upper limits for null space (todo: set them to proper range)
+ul = [7]*pandaNumDofs
+#joint ranges for null space (todo: set them to proper range)
+jr = [7]*pandaNumDofs
+#restposes for null space
+jointPositions=[0.98, 0.458, 0.31, -2.24, -0.30, 2.66, 2.32, 0.02, 0.02]
+rp = jointPositions
+
+class PandaSim(object):
+  def __init__(self, bullet_client, offset):
+    self.bullet_client = bullet_client
+    self.offset = np.array(offset)
+    
+    #print("offset=",offset)
+    flags = self.bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES
+    legos=[]
+    self.bullet_client.loadURDF("tray/traybox.urdf", [0+offset[0], 0+offset[1], -0.6+offset[2]], [-0.5, -0.5, -0.5, 0.5], flags=flags)
+    legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([0.1, 0.3, -0.5])+self.offset, flags=flags))
+    legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([-0.1, 0.3, -0.5])+self.offset, flags=flags))
+    legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([0.1, 0.3, -0.7])+self.offset, flags=flags))
+    sphereId = self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.6])+self.offset, flags=flags)
+    self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.5])+self.offset, flags=flags)
+    self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.7])+self.offset, flags=flags)
+    orn=[-0.707107, 0.0, 0.0, 0.707107]#p.getQuaternionFromEuler([-math.pi/2,math.pi/2,0])
+    eul = self.bullet_client.getEulerFromQuaternion([-0.5, -0.5, -0.5, 0.5])
+    self.panda = self.bullet_client.loadURDF("franka_panda/panda.urdf", np.array([0,0,0])+self.offset, orn, useFixedBase=True, flags=flags)
+    index = 0
+    for j in range(self.bullet_client.getNumJoints(self.panda)):
+      self.bullet_client.changeDynamics(self.panda, j, linearDamping=0, angularDamping=0)
+      info = self.bullet_client.getJointInfo(self.panda, j)
+  
+      jointName = info[1]
+      jointType = info[2]
+      if (jointType == self.bullet_client.JOINT_PRISMATIC):
+        
+        self.bullet_client.resetJointState(self.panda, j, jointPositions[index]) 
+        index=index+1
+      if (jointType == self.bullet_client.JOINT_REVOLUTE):
+        self.bullet_client.resetJointState(self.panda, j, jointPositions[index]) 
+        index=index+1
+    self.t = 0.
+  def reset(self):
+    pass
+
+  def step(self):
+    t = self.t
+    self.t += 1./60.
+    pos = [self.offset[0]+0.2 * math.sin(1.5 * t), self.offset[1]+0.044, self.offset[2]+-0.6 + 0.1 * math.cos(1.5 * t)]
+    orn = self.bullet_client.getQuaternionFromEuler([math.pi/2.,0.,0.])
+    jointPoses = self.bullet_client.calculateInverseKinematics(self.panda,pandaEndEffectorIndex, pos, orn, ll, ul,
+      jr, rp, maxNumIterations=5)
+    for i in range(pandaNumDofs):
+        self.bullet_client.setJointMotorControl2(self.panda, i, self.bullet_client.POSITION_CONTROL, jointPoses[i],force=5 * 240.)
+    pass
+  
--- a/examples/pybullet/gym/pybullet_envs/examples/testBike.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/testBike.py
@@ -1,139 +0,0 @@
-import os
-import inspect
-currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
-parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0, parentdir)
-
-import pybullet as p
-import math
-import time
-import pybullet_data
-
-p.connect(p.GUI)
-#p.loadURDF("wheel.urdf",[0,0,3])
-p.setAdditionalSearchPath(pybullet_data.getDataPath())
-plane = p.loadURDF("plane100.urdf", [0, 0, 0])
-timestep = 1. / 240.
-
-bike = -1
-for i in range(1):
-
-  bike = p.loadURDF("bicycle/bike.urdf", [0, 0 + 3 * i, 1.5], [0, 0, 0, 1], useFixedBase=False)
-  p.setJointMotorControl2(bike, 0, p.VELOCITY_CONTROL, targetVelocity=0, force=0.05)
-  #p.setJointMotorControl2(bike,1,p.VELOCITY_CONTROL,targetVelocity=5, force=1000)
-  p.setJointMotorControl2(bike, 1, p.VELOCITY_CONTROL, targetVelocity=5, force=0)
-  p.setJointMotorControl2(bike, 2, p.VELOCITY_CONTROL, targetVelocity=15, force=20)
-
-  p.changeDynamics(plane, -1, mass=0, lateralFriction=1, linearDamping=0, angularDamping=0)
-  p.changeDynamics(bike, 1, lateralFriction=1, linearDamping=0, angularDamping=0)
-  p.changeDynamics(bike, 2, lateralFriction=1, linearDamping=0, angularDamping=0)
-  #p.resetJointState(bike,1,0,100)
-  #p.resetJointState(bike,2,0,100)
-  #p.resetBaseVelocity(bike,[0,0,0],[0,0,0])
-#p.setPhysicsEngineParameter(numSubSteps=0)
-#bike=p.loadURDF("frame.urdf",useFixedBase=True)
-#bike = p.loadURDF("handlebar.urdf", useFixedBase=True)
-#p.loadURDF("handlebar.urdf",[0,2,1])
-#coord	=	p.loadURDF("handlebar.urdf", [0.7700000000000005,	0, 0.22000000000000006],useFixedBase=True)#	p.loadURDF("coordinateframe.urdf",[-2,0,1],useFixedBase=True)
-#coord	=	p.loadURDF("coordinateframe.urdf",[-2,0,1],useFixedBase=True)
-p.setGravity(0, 0, -10)
-p.setRealTimeSimulation(0)
-#coordPos	=	[0,0,0]
-#coordOrnEuler = [0,0,0]
-
-#coordPos= [0.7000000000000004, 0, 0.22000000000000006]
-#coordOrnEuler= [0, -0.2617993877991496, 0]
-
-coordPos = [0.07, 0, -0.6900000000000004]
-coordOrnEuler = [0, 0, 0]
-
-coordPos2 = [0, 0, 0]
-coordOrnEuler2 = [0, 0, 0]
-
-invPos, invOrn = p.invertTransform(coordPos, p.getQuaternionFromEuler(coordOrnEuler))
-mPos, mOrn = p.multiplyTransforms(invPos, invOrn, coordPos2,
-                                  p.getQuaternionFromEuler(coordOrnEuler2))
-eul = p.getEulerFromQuaternion(mOrn)
-print("rpy=\"", eul[0], eul[1], eul[2], "\" xyz=\"", mPos[0], mPos[1], mPos[2])
-
-shift = 0
-gui = 1
-
-frame = 0
-states = []
-states.append(p.saveState())
-#observations=[]
-#observations.append(obs)
-
-running = True
-reverting = False
-p.getCameraImage(320, 200)  #,renderer=p.ER_BULLET_HARDWARE_OPENGL )
-
-while (1):
-
-  updateCam = 0
-  keys = p.getKeyboardEvents()
-  for k, v in keys.items():
-    if (reverting or (k == p.B3G_LEFT_ARROW and (v & p.KEY_WAS_TRIGGERED))):
-      reverting = True
-      stateIndex = len(states) - 1
-      #print("prestateIndex=",stateIndex)
-      time.sleep(timestep)
-      updateCam = 1
-      if (stateIndex > 0):
-        stateIndex -= 1
-        states = states[:stateIndex + 1]
-        #observations=observations[:stateIndex+1]
-
-        #print("states=",states)
-      #print("stateIndex =",stateIndex )
-      p.restoreState(states[stateIndex])
-      #obs=observations[stateIndex]
-
-      #obs, r, done, _ = env.step(a)
-    if (k == p.B3G_LEFT_ARROW and (v & p.KEY_WAS_RELEASED)):
-      reverting = False
-
-    if (k == ord('1') and (v & p.KEY_WAS_TRIGGERED)):
-      gui = not gui
-
-    if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_WAS_RELEASED)):
-      running = False
-
-    if (running or (k == p.B3G_RIGHT_ARROW and (v & p.KEY_WAS_TRIGGERED))):
-      running = True
-
-  if (running):
-
-    p.stepSimulation()
-
-    updateCam = 1
-    time.sleep(timestep)
-    pts = p.getContactPoints()
-    #print("numPoints=",len(pts))
-    #for point in pts:
-    #	print("Point:bodyA=", point[1],"bodyB=",point[2],"linkA=",point[3],"linkB=",point[4],"dist=",point[8],"force=",point[9])
-
-    states.append(p.saveState())
-    #observations.append(obs)
-    stateIndex = len(states)
-    #print("stateIndex =",stateIndex )
-    frame += 1
-  if (updateCam):
-    distance = 5
-    yaw = 0
-    humanPos, humanOrn = p.getBasePositionAndOrientation(bike)
-    humanBaseVel = p.getBaseVelocity(bike)
-    #print("frame",frame, "humanPos=",humanPos, "humanVel=",humanBaseVel)
-    if (gui):
-
-      camInfo = p.getDebugVisualizerCamera()
-      curTargetPos = camInfo[11]
-      distance = camInfo[10]
-      yaw = camInfo[8]
-      pitch = camInfo[9]
-      targetPos = [
-          0.95 * curTargetPos[0] + 0.05 * humanPos[0], 0.95 * curTargetPos[1] + 0.05 * humanPos[1],
-          curTargetPos[2]
-      ]
-      p.resetDebugVisualizerCamera(distance, yaw, pitch, targetPos)
--- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py
+++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py
@@ -9,12 +9,12 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):

  def __init__(self, robot, render=False):
    # print("WalkerBase::__init__ start")
-    MJCFBaseBulletEnv.__init__(self, robot, render)
-
    self.camera_x = 0
    self.walk_target_x = 1e3  # kilometer away
    self.walk_target_y = 0
    self.stateId = -1
+    MJCFBaseBulletEnv.__init__(self, robot, render)
+

  def create_single_player_scene(self, bullet_client):
    self.stadium_scene = SinglePlayerStadiumScene(bullet_client,
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/bullet_client.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/bullet_client.py
@@ -1,54 +0,0 @@
-"""A wrapper for pybullet to manage different clients."""
-from __future__ import absolute_import
-from __future__ import division
-import functools
-import inspect
-import pybullet
-
-
-class BulletClient(object):
-  """A wrapper for pybullet to manage different clients."""
-
-  def __init__(self, connection_mode=None):
-    """Creates a Bullet client and connects to a simulation.
-
-    Args:
-      connection_mode:
-        `None` connects to an existing simulation or, if fails, creates a
-          new headless simulation,
-        `pybullet.GUI` creates a new simulation with a GUI,
-        `pybullet.DIRECT` creates a headless simulation,
-        `pybullet.SHARED_MEMORY` connects to an existing simulation.
-    """
-    self._shapes = {}
-
-    if connection_mode is None:
-      self._client = pybullet.connect(pybullet.SHARED_MEMORY)
-      if self._client >= 0:
-        return
-      else:
-        connection_mode = pybullet.DIRECT
-    self._client = pybullet.connect(connection_mode)
-
-  def __del__(self):
-    """Clean up connection if not already done."""
-    try:
-      pybullet.disconnect(physicsClientId=self._client)
-    except pybullet.error:
-      pass
-
-  def __getattr__(self, name):
-    """Inject the client id into Bullet functions."""
-    attribute = getattr(pybullet, name)
-    if inspect.isbuiltin(attribute):
-      if name not in [
-          "invertTransform",
-          "multiplyTransforms",
-          "getMatrixFromQuaternion",
-          "getEulerFromQuaternion",
-          "computeViewMatrixFromYawPitchRoll",
-          "computeProjectionMatrixFOV",
-          "getQuaternionFromEuler",
-      ]:  # A temporary hack for now.
-        attribute = functools.partial(attribute, physicsClientId=self._client)
-    return attribute
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
@@ -122,7 +122,7 @@ class MinitaurExtendedEnv(MinitaurReactiveEnv):
    leg_model = []
    if self._include_leg_model:
      raw_motor_angles = self.minitaur.GetMotorAngles()
-      leg_model = self._convert_to_leg_model(raw_motor_angles)
+      leg_model = self.convert_to_leg_model(raw_motor_angles)

    observation_list = (
        [parent_observation] + history_states + history_actions +
@@ -185,7 +185,7 @@ class MinitaurExtendedEnv(MinitaurReactiveEnv):
    if self._never_terminate:
      return False

-    leg_model = self._convert_to_leg_model(self.minitaur.GetMotorAngles())
+    leg_model = self.convert_to_leg_model(self.minitaur.GetMotorAngles())
    swing0 = leg_model[0]
    swing1 = leg_model[2]
    maximum_swing_angle = 0.8
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py
@@ -14,7 +14,7 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import pybullet
-from pybullet_envs.minitaur.envs import bullet_client
+import pybullet_utils.bullet_client as bc
 import pybullet_data
 from pybullet_envs.minitaur.envs import minitaur
 from pybullet_envs.minitaur.envs import minitaur_derpy
@@ -223,9 +223,9 @@ class MinitaurGymEnv(gym.Env):
    self._env_randomizers = convert_to_list(env_randomizer) if env_randomizer else []
    self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
    else:
-      self._pybullet_client = bullet_client.BulletClient()
+      self._pybullet_client = bc.BulletClient()
    if self._urdf_version is None:
      self._urdf_version = DEFAULT_URDF_VERSION
    self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
--- a/examples/pybullet/gym/pybullet_envs/prediction/pybullet_sim_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/prediction/pybullet_sim_gym_env.py
@@ -14,7 +14,7 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import pybullet
-from pybullet_envs.bullet import bullet_client
+import pybullet_utils.bullet_client as bc

 from pybullet_envs.prediction import boxstack_pybullet_sim

@@ -71,10 +71,10 @@ class PyBulletSimGymEnv(gym.Env):
    print("urdf_root=" + self._urdf_root)

    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI,
+      self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI,
                                                         options=optionstring)
    else:
-      self._pybullet_client = bullet_client.BulletClient()
+      self._pybullet_client = bc.BulletClient()

    if (debug_visualization == False):
      self._pybullet_client.configureDebugVisualizer(flag=self._pybullet_client.COV_ENABLE_GUI,
--- a/examples/pybullet/gym/pybullet_envs/scene_stadium.py
+++ b/examples/pybullet/gym/pybullet_envs/scene_stadium.py
@@ -32,7 +32,7 @@ class StadiumScene(Scene):
      for i in self.ground_plane_mjcf:
        self._p.changeDynamics(i, -1, lateralFriction=0.8, restitution=0.5)
        self._p.changeVisualShape(i, -1, rgbaColor=[1, 1, 1, 0.8])
-        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION, 1)
+        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,i)

      #	for j in range(p.getNumJoints(i)):
      #		self._p.changeDynamics(i,j,lateralFriction=0)
--- a/examples/pybullet/gym/pybullet_envs/stable_baselines/init.py
+++ b/examples/pybullet/gym/pybullet_envs/stable_baselines/init.py
--- a/examples/pybullet/gym/pybullet_envs/stable_baselines/train.py
+++ b/examples/pybullet/gym/pybullet_envs/stable_baselines/train.py
@@ -0,0 +1,60 @@
+# Code adapted from https://github.com/araffin/rl-baselines-zoo
+# it requires stable-baselines to be installed
+# Colab Notebook: https://colab.research.google.com/drive/1nZkHO4QTYfAksm9ZTaZ5vXyC7szZxC3F
+# Author: Antonin RAFFIN
+# MIT License
+import argparse
+
+import pybullet_envs
+
+import gym
+import numpy as np
+from stable_baselines import SAC, TD3
+from stable_baselines.common.noise import NormalActionNoise
+
+from utils import TimeFeatureWrapper, EvalCallback
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--algo', help='RL Algorithm (Soft Actor-Critic by default)', default='sac',
+                        type=str, required=False, choices=['sac', 'td3'])
+    parser.add_argument('--env', type=str, default='HalfCheetahBulletEnv-v0', help='environment ID')
+    parser.add_argument('-n', '--n-timesteps', help='Number of training timesteps', default=int(1e6),
+                        type=int)
+    args = parser.parse_args()
+
+    env_id = args.env
+    n_timesteps = args.n_timesteps
+    save_path = '{}_{}'.format(args.algo, env_id)
+
+    # Instantiate and wrap the environment
+    env = TimeFeatureWrapper(gym.make(env_id))
+
+    # Create the evaluation environment and callback
+    eval_env = TimeFeatureWrapper(gym.make(env_id))
+    callback = EvalCallback(eval_env, best_model_save_path=save_path + '_best')
+
+    algo = {
+        'sac': SAC,
+        'td3': TD3
+    }[args.algo]
+
+    n_actions = env.action_space.shape[0]
+
+    # Tuned hyperparameters from https://github.com/araffin/rl-baselines-zoo
+    hyperparams = {
+        'sac': dict(batch_size=256, gamma=0.98, policy_kwargs=dict(layers=[256, 256]),
+                    learning_starts=10000, buffer_size=int(2e5), tau=0.01),
+
+        'td3': dict(batch_size=100, policy_kwargs=dict(layers=[400, 300]),
+                    learning_rate=1e-3, learning_starts=10000, buffer_size=int(1e6),
+                    train_freq=1000, gradient_steps=1000,
+                    action_noise=NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)))
+    }[args.algo]
+
+    model = algo('MlpPolicy', env, verbose=1, **hyperparams)
+    model.learn(n_timesteps, callback=callback)
+
+    print("Saving to {}.zip".format(save_path))
+    model.save(save_path)
--- a/examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
+++ b/examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
@@ -0,0 +1,114 @@
+# Code adapted from https://github.com/araffin/rl-baselines-zoo
+# it requires stable-baselines to be installed
+# Author: Antonin RAFFIN
+# MIT License
+import gym
+import numpy as np
+from gym.wrappers import TimeLimit
+
+from stable_baselines.common.evaluation import evaluate_policy
+
+
+class TimeFeatureWrapper(gym.Wrapper):
+    """
+    Add remaining time to observation space for fixed length episodes.
+    See https://arxiv.org/abs/1712.00378 and https://github.com/aravindr93/mjrl/issues/13.
+
+    :param env: (gym.Env)
+    :param max_steps: (int) Max number of steps of an episode
+        if it is not wrapped in a TimeLimit object.
+    :param test_mode: (bool) In test mode, the time feature is constant,
+        equal to zero. This allow to check that the agent did not overfit this feature,
+        learning a deterministic pre-defined sequence of actions.
+    """
+    def __init__(self, env, max_steps=1000, test_mode=False):
+        assert isinstance(env.observation_space, gym.spaces.Box)
+        # Add a time feature to the observation
+        low, high = env.observation_space.low, env.observation_space.high
+        low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
+        env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
+
+        super(TimeFeatureWrapper, self).__init__(env)
+
+        if isinstance(env, TimeLimit):
+            self._max_steps = env._max_episode_steps
+        else:
+            self._max_steps = max_steps
+        self._current_step = 0
+        self._test_mode = test_mode
+
+    def reset(self):
+        self._current_step = 0
+        return self._get_obs(self.env.reset())
+
+    def step(self, action):
+        self._current_step += 1
+        obs, reward, done, info = self.env.step(action)
+        return self._get_obs(obs), reward, done, info
+
+    def _get_obs(self, obs):
+        """
+        Concatenate the time feature to the current observation.
+
+        :param obs: (np.ndarray)
+        :return: (np.ndarray)
+        """
+        # Remaining time is more general
+        time_feature = 1 - (self._current_step / self._max_steps)
+        if self._test_mode:
+            time_feature = 1.0
+        # Optionnaly: concatenate [time_feature, time_feature ** 2]
+        return np.concatenate((obs, [time_feature]))
+
+
+class EvalCallback(object):
+    """
+    Callback for evaluating an agent.
+
+    :param eval_env: (gym.Env) The environment used for initialization
+    :param n_eval_episodes: (int) The number of episodes to test the agent
+    :param eval_freq: (int) Evaluate the agent every eval_freq call of the callback.
+    :param deterministic: (bool)
+    :param best_model_save_path: (str)
+    :param verbose: (int)
+    """
+    def __init__(self, eval_env, n_eval_episodes=5, eval_freq=10000,
+                 deterministic=True, best_model_save_path=None, verbose=1):
+        super(EvalCallback, self).__init__()
+        self.n_eval_episodes = n_eval_episodes
+        self.eval_freq = eval_freq
+        self.best_mean_reward = -np.inf
+        self.deterministic = deterministic
+        self.eval_env = eval_env
+        self.verbose = verbose
+        self.model, self.num_timesteps = None, 0
+        self.best_model_save_path = best_model_save_path
+        self.n_calls = 0
+
+    def __call__(self, locals_, globals_):
+        """
+        :param locals_: (dict)
+        :param globals_: (dict)
+        :return: (bool)
+        """
+        self.n_calls += 1
+        self.model = locals_['self']
+        self.num_timesteps = self.model.num_timesteps
+
+        if self.n_calls % self.eval_freq == 0:
+            episode_rewards, _ = evaluate_policy(self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes,
+                                                 deterministic=self.deterministic, return_episode_rewards=True)
+
+
+            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
+            if self.verbose > 0:
+                print("Eval num_timesteps={}, "
+                      "episode_reward={:.2f} +/- {:.2f}".format(self.num_timesteps, mean_reward, std_reward))
+
+            if mean_reward > self.best_mean_reward:
+                if self.best_model_save_path is not None:
+                    print("Saving best model")
+                    self.model.save(self.best_model_save_path)
+                self.best_mean_reward = mean_reward
+
+        return True