add yapf style and apply yapf to format all Python files

This recreates pull request #2192
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py
@@ -4,11 +4,10 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-print("parentdir=",parentdir)
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)

-
-from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env  import PyBulletDeepMimicEnv
+from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
 from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
 from pybullet_utils.logger import Logger
 from pybullet_envs.deep_mimic.testrl import update_world, update_timestep, build_world
@@ -17,36 +16,40 @@ import pybullet_utils.mpi_util as MPIUtil
 args = []
 world = None

+
 def run():
-    global update_timestep
-    global world
+  global update_timestep
+  global world

-    done = False
-    while not (done):
-        update_world(world, update_timestep)
+  done = False
+  while not (done):
+    update_world(world, update_timestep)
+
+  return

-    return

 def shutdown():
-    global world
+  global world
+
+  Logger.print2('Shutting down...')
+  world.shutdown()
+  return

-    Logger.print2('Shutting down...')
-    world.shutdown()
-    return

 def main():
-    global args
-    global world
+  global args
+  global world

-    # Command line arguments
-    args = sys.argv[1:]
-    enable_draw = False
-    world = build_world(args, enable_draw)
+  # Command line arguments
+  args = sys.argv[1:]
+  enable_draw = False
+  world = build_world(args, enable_draw)

-    run()
-    shutdown()
+  run()
+  shutdown()
+
+  return

-    return

 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
@@ -1,6 +1,7 @@
 from enum import Enum

+
 class ActionSpace(Enum):
-    Null = 0
-    Continuous = 1
-    Discrete = 2
+  Null = 0
+  Continuous = 1
+  Discrete = 2
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
@@ -1,19 +1,21 @@
-from abc import  abstractmethod
+from abc import abstractmethod
 import sys, abc
 if sys.version_info >= (3, 4):
-    ABC = abc.ABC
+  ABC = abc.ABC
 else:
-    ABC = abc.ABCMeta('ABC', (), {})
+  ABC = abc.ABCMeta('ABC', (), {})

 import numpy as np
 from enum import Enum

+
 class Env(ABC):
-    class Terminate(Enum):
-        Null = 0
-        Fail = 1
-        Succ = 2
-        
-    def __init__(self, args, enable_draw):
-        self.enable_draw = enable_draw
-        return
+
+  class Terminate(Enum):
+    Null = 0
+    Fail = 1
+    Succ = 2
+
+  def __init__(self, args, enable_draw):
+    self.enable_draw = enable_draw
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
@@ -1,36 +1,61 @@
 from pybullet_utils import bullet_client
 import math
-	
+
+
 class HumanoidPoseInterpolator(object):
+
  def __init__(self):
    pass
-  
-  def Reset(self,basePos=[0,0,0], baseOrn=[0,0,0,1],chestRot=[0,0,0,1], neckRot=[0,0,0,1],rightHipRot= [0,0,0,1], rightKneeRot=[0],rightAnkleRot = [0,0,0,1],
-            rightShoulderRot = [0,0,0,1],rightElbowRot = [0], leftHipRot = [0,0,0,1], leftKneeRot = [0],leftAnkleRot = [0,0,0,1],
-           leftShoulderRot = [0,0,0,1] ,leftElbowRot = [0],
-           baseLinVel = [0,0,0],baseAngVel = [0,0,0], chestVel = [0,0,0],neckVel = [0,0,0],rightHipVel = [0,0,0],rightKneeVel = [0],
-           rightAnkleVel = [0,0,0],rightShoulderVel = [0,0,0],rightElbowVel = [0],leftHipVel = [0,0,0],leftKneeVel = [0],leftAnkleVel = [0,0,0],leftShoulderVel = [0,0,0],leftElbowVel = [0]
-            ):
-    
+
+  def Reset(self,
+            basePos=[0, 0, 0],
+            baseOrn=[0, 0, 0, 1],
+            chestRot=[0, 0, 0, 1],
+            neckRot=[0, 0, 0, 1],
+            rightHipRot=[0, 0, 0, 1],
+            rightKneeRot=[0],
+            rightAnkleRot=[0, 0, 0, 1],
+            rightShoulderRot=[0, 0, 0, 1],
+            rightElbowRot=[0],
+            leftHipRot=[0, 0, 0, 1],
+            leftKneeRot=[0],
+            leftAnkleRot=[0, 0, 0, 1],
+            leftShoulderRot=[0, 0, 0, 1],
+            leftElbowRot=[0],
+            baseLinVel=[0, 0, 0],
+            baseAngVel=[0, 0, 0],
+            chestVel=[0, 0, 0],
+            neckVel=[0, 0, 0],
+            rightHipVel=[0, 0, 0],
+            rightKneeVel=[0],
+            rightAnkleVel=[0, 0, 0],
+            rightShoulderVel=[0, 0, 0],
+            rightElbowVel=[0],
+            leftHipVel=[0, 0, 0],
+            leftKneeVel=[0],
+            leftAnkleVel=[0, 0, 0],
+            leftShoulderVel=[0, 0, 0],
+            leftElbowVel=[0]):
+
    self._basePos = basePos
    self._baseLinVel = baseLinVel
    #print("HumanoidPoseInterpolator.Reset: baseLinVel = ", baseLinVel)
-    self._baseOrn =baseOrn
+    self._baseOrn = baseOrn
    self._baseAngVel = baseAngVel
-    
+
    self._chestRot = chestRot
-    self._chestVel =chestVel
+    self._chestVel = chestVel
    self._neckRot = neckRot
    self._neckVel = neckVel
-    
+
    self._rightHipRot = rightHipRot
    self._rightHipVel = rightHipVel
-    self._rightKneeRot =rightKneeRot
+    self._rightKneeRot = rightKneeRot
    self._rightKneeVel = rightKneeVel
    self._rightAnkleRot = rightAnkleRot
    self._rightAnkleVel = rightAnkleVel
-    
-    self._rightShoulderRot =rightShoulderRot
+
+    self._rightShoulderRot = rightShoulderRot
    self._rightShoulderVel = rightShoulderVel
    self._rightElbowRot = rightElbowRot
    self._rightElbowVel = rightElbowVel
@@ -39,225 +64,253 @@ class HumanoidPoseInterpolator(object):
    self._leftHipVel = leftHipVel
    self._leftKneeRot = leftKneeRot
    self._leftKneeVel = leftKneeVel
-    self._leftAnkleRot =leftAnkleRot
+    self._leftAnkleRot = leftAnkleRot
    self._leftAnkleVel = leftAnkleVel
-    
+
    self._leftShoulderRot = leftShoulderRot
    self._leftShoulderVel = leftShoulderVel
-    self._leftElbowRot =leftElbowRot
+    self._leftElbowRot = leftElbowRot
    self._leftElbowVel = leftElbowVel

-  def ComputeLinVel(self,posStart, posEnd, deltaTime):
-    vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
+  def ComputeLinVel(self, posStart, posEnd, deltaTime):
+    vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
+           (posEnd[2] - posStart[2]) / deltaTime]
    return vel
-  
-  def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
-    pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
+    pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
+                                                        ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-    
+
  def NormalizeVector(self, vec):
-    length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
-    if (length2>0):
+    length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2]
+    if (length2 > 0):
      length = math.sqrt(length2)
-      
+
  def NormalizeQuaternion(self, orn):
-    length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
-    if (length2>0):
+    length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2] + orn[3] * orn[3]
+    if (length2 > 0):
      length = math.sqrt(length2)
-      orn[0]/=length
-      orn[1]/=length
-      orn[2]/=length
-      orn[3]/=length
+      orn[0] /= length
+      orn[1] /= length
+      orn[2] /= length
+      orn[3] /= length
      return orn
-      
+
    #print("Normalize? length=",length)

-    
  def PostProcessMotionData(self, frameData):
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    
-    chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
-    
-    neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
-    rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
-    rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
-    rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
-    leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
-    leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
-    leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
-    
-    
-  def GetPose(self):
-     pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
-       self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
-       self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
-       self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
-       self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
-       self._rightKneeRot[0],
-       self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
-       self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
-       self._rightElbowRot[0],
-       self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
-       self._leftKneeRot[0],
-       self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
-       self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
-       self._leftElbowRot[0]	]
-     return pose
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]

-  def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
+    chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
+
+    neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
+    rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
+    rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
+    rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
+    leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
+    leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
+    leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
+
+  def GetPose(self):
+    pose = [
+        self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
+        self._baseOrn[2], self._baseOrn[3], self._chestRot[0], self._chestRot[1],
+        self._chestRot[2], self._chestRot[3], self._neckRot[0], self._neckRot[1], self._neckRot[2],
+        self._neckRot[3], self._rightHipRot[0], self._rightHipRot[1], self._rightHipRot[2],
+        self._rightHipRot[3], self._rightKneeRot[0], self._rightAnkleRot[0],
+        self._rightAnkleRot[1], self._rightAnkleRot[2], self._rightAnkleRot[3],
+        self._rightShoulderRot[0], self._rightShoulderRot[1], self._rightShoulderRot[2],
+        self._rightShoulderRot[3], self._rightElbowRot[0], self._leftHipRot[0],
+        self._leftHipRot[1], self._leftHipRot[2], self._leftHipRot[3], self._leftKneeRot[0],
+        self._leftAnkleRot[0], self._leftAnkleRot[1], self._leftAnkleRot[2], self._leftAnkleRot[3],
+        self._leftShoulderRot[0], self._leftShoulderRot[1], self._leftShoulderRot[2],
+        self._leftShoulderRot[3], self._leftElbowRot[0]
+    ]
+    return pose
+
+  def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
    keyFrameDuration = frameData[0]
-    basePos1Start = [frameData[1],frameData[2],frameData[3]]
-    basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]), 
-      basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]), 
-      basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
-    self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
-    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
-    self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
-    
+    basePos1Start = [frameData[1], frameData[2], frameData[3]]
+    basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._basePos = [
+        basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
+        basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
+        basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
+    ]
+    self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
+    baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
+    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
+    self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
+                                          bullet_client)
+
    ##pre-rotate to make z-up
    #y2zPos=[0,0,0.0]
    #y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
    #basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)

-    chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
-    chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
-    self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
-    self._chestVel = self.ComputeAngVelRel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
-    
-    neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
-    neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
-    self._neckRot =  bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
-    self._neckVel = self.ComputeAngVelRel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
-    
-    rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
-    rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
-    self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
-    self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
-    
+    chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
+    chestRotEnd = [frameDataNext[9], frameDataNext[10], frameDataNext[11], frameDataNext[8]]
+    self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart, chestRotEnd, frameFraction)
+    self._chestVel = self.ComputeAngVelRel(chestRotStart, chestRotEnd, keyFrameDuration,
+                                           bullet_client)
+
+    neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
+    neckRotEnd = [frameDataNext[13], frameDataNext[14], frameDataNext[15], frameDataNext[12]]
+    self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart, neckRotEnd, frameFraction)
+    self._neckVel = self.ComputeAngVelRel(neckRotStart, neckRotEnd, keyFrameDuration,
+                                          bullet_client)
+
+    rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
+    rightHipRotEnd = [frameDataNext[17], frameDataNext[18], frameDataNext[19], frameDataNext[16]]
+    self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart, rightHipRotEnd,
+                                                         frameFraction)
+    self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart, rightHipRotEnd, keyFrameDuration,
+                                              bullet_client)
+
    rightKneeRotStart = [frameData[20]]
    rightKneeRotEnd = [frameDataNext[20]]
-    self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
-    self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
-    
-    rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
-    rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
-    self._rightAnkleRot =  bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
-    self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
-      
-    rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
-    rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
-    self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
-    self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
-    
+    self._rightKneeRot = [
+        rightKneeRotStart[0] + frameFraction * (rightKneeRotEnd[0] - rightKneeRotStart[0])
+    ]
+    self._rightKneeVel = [(rightKneeRotEnd[0] - rightKneeRotStart[0]) / keyFrameDuration]
+
+    rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
+    rightAnkleRotEnd = [frameDataNext[22], frameDataNext[23], frameDataNext[24], frameDataNext[21]]
+    self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart, rightAnkleRotEnd,
+                                                           frameFraction)
+    self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart, rightAnkleRotEnd,
+                                                keyFrameDuration, bullet_client)
+
+    rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
+    rightShoulderRotEnd = [
+        frameDataNext[26], frameDataNext[27], frameDataNext[28], frameDataNext[25]
+    ]
+    self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,
+                                                              rightShoulderRotEnd, frameFraction)
+    self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart, rightShoulderRotEnd,
+                                                   keyFrameDuration, bullet_client)
+
    rightElbowRotStart = [frameData[29]]
    rightElbowRotEnd = [frameDataNext[29]]
-    self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
-    self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
-    
-    leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
-    leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
-    self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
-    self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
-    
+    self._rightElbowRot = [
+        rightElbowRotStart[0] + frameFraction * (rightElbowRotEnd[0] - rightElbowRotStart[0])
+    ]
+    self._rightElbowVel = [(rightElbowRotEnd[0] - rightElbowRotStart[0]) / keyFrameDuration]
+
+    leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
+    leftHipRotEnd = [frameDataNext[31], frameDataNext[32], frameDataNext[33], frameDataNext[30]]
+    self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart, leftHipRotEnd,
+                                                        frameFraction)
+    self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd, keyFrameDuration,
+                                             bullet_client)
+
    leftKneeRotStart = [frameData[34]]
    leftKneeRotEnd = [frameDataNext[34]]
-    self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
-    self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
-    
-    leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
-    leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
-    self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
-    self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
+    self._leftKneeRot = [
+        leftKneeRotStart[0] + frameFraction * (leftKneeRotEnd[0] - leftKneeRotStart[0])
+    ]
+    self._leftKneeVel = [(leftKneeRotEnd[0] - leftKneeRotStart[0]) / keyFrameDuration]

-    leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
-    leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
-    self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
-    self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
+    leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
+    leftAnkleRotEnd = [frameDataNext[36], frameDataNext[37], frameDataNext[38], frameDataNext[35]]
+    self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart, leftAnkleRotEnd,
+                                                          frameFraction)
+    self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart, leftAnkleRotEnd,
+                                               keyFrameDuration, bullet_client)
+
+    leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
+    leftShoulderRotEnd = [
+        frameDataNext[40], frameDataNext[41], frameDataNext[42], frameDataNext[39]
+    ]
+    self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,
+                                                             leftShoulderRotEnd, frameFraction)
+    self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart, leftShoulderRotEnd,
+                                                  keyFrameDuration, bullet_client)

    leftElbowRotStart = [frameData[43]]
    leftElbowRotEnd = [frameDataNext[43]]
-    self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
-    self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
+    self._leftElbowRot = [
+        leftElbowRotStart[0] + frameFraction * (leftElbowRotEnd[0] - leftElbowRotStart[0])
+    ]
+    self._leftElbowVel = [(leftElbowRotEnd[0] - leftElbowRotStart[0]) / keyFrameDuration]

    pose = self.GetPose()
    return pose
-    
+
  def ConvertFromAction(self, pybullet_client, action):
    #turn action into pose
-    
-    self.Reset()#?? needed?
-    index=0
+
+    self.Reset()  #?? needed?
+    index = 0
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
    #print("pose._chestRot=",pose._chestRot)

    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._rightKneeRot = [angle]
-        
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._rightElbowRot = [angle]
-    
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._leftKneeRot = [angle]
-    
-    
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-        
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._leftElbowRot = [angle]
-    
+
    pose = self.GetPose()
    return pose
-  
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
@@ -1,7 +1,9 @@
 import json
 import math

+
 class MotionCaptureData(object):
+
  def __init__(self):
    self.Reset()

@@ -13,30 +15,33 @@ class MotionCaptureData(object):
      self._motion_data = json.load(f)

  def NumFrames(self):
-    return  len(self._motion_data['Frames'])
+    return len(self._motion_data['Frames'])

  def KeyFrameDuraction(self):
-  	return self._motion_data['Frames'][0][0]
- 
+    return self._motion_data['Frames'][0][0]
+
  def getCycleTime(self):
    keyFrameDuration = self.KeyFrameDuraction()
-    cycleTime = keyFrameDuration*(self.NumFrames()-1)
+    cycleTime = keyFrameDuration * (self.NumFrames() - 1)
    return cycleTime
-    
+
  def calcCycleCount(self, simTime, cycleTime):
-    phases = simTime / cycleTime;
+    phases = simTime / cycleTime
    count = math.floor(phases)
    loop = True
    #count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
    return count
-    
+
  def computeCycleOffset(self):
-    firstFrame=0
-    lastFrame = self.NumFrames()-1
+    firstFrame = 0
+    lastFrame = self.NumFrames() - 1
    frameData = self._motion_data['Frames'][0]
    frameDataNext = self._motion_data['Frames'][lastFrame]
-    
-    basePosStart = [frameData[1],frameData[2],frameData[3]]
-    basePosEnd = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._cycleOffset = [basePosEnd[0]-basePosStart[0],basePosEnd[1]-basePosStart[1],basePosEnd[2]-basePosStart[2]]
-    return self._cycleOffset
+
+    basePosStart = [frameData[1], frameData[2], frameData[3]]
+    basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._cycleOffset = [
+        basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
+        basePosEnd[2] - basePosStart[2]
+    ]
+    return self._cycleOffset
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
@@ -9,301 +9,313 @@ from pybullet_envs.deep_mimic.env import humanoid_stable_pd
 import pybullet_data
 import pybullet as p1
 import random
-      
-      
+
+
 class PyBulletDeepMimicEnv(Env):
-    def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
-      super().__init__(arg_parser, enable_draw)
-      self._num_agents = 1
-      self._pybullet_client = pybullet_client
-      self._isInitialized = False
-      self._useStablePD = True
-      self._arg_parser = arg_parser
-      self.reset()
-    
-    def reset(self):
-      
-      
-      if not self._isInitialized:
-        if self.enable_draw:
-          self._pybullet_client =  bullet_client.BulletClient(connection_mode=p1.GUI)
-          #disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
-          self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI,0)
-        else:
-          self._pybullet_client  =  bullet_client.BulletClient()
-        
-        self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
-        z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
-        self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
-        #print("planeId=",self._planeId)
-        self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
-        self._pybullet_client.setGravity(0,-9.8,0)
-        
-        self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
-        self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
-        
-        self._mocapData = motion_capture_data.MotionCaptureData()
-        
-        motion_file = self._arg_parser.parse_strings('motion_file')
-        print("motion_file=",motion_file[0])
-                
-        motionPath = pybullet_data.getDataPath()+"/"+motion_file[0]
-        #motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
-        self._mocapData.Load(motionPath)
-        timeStep = 1./600.
-        useFixedBase=False
-        self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
-        self._isInitialized = True
-        
-        self._pybullet_client.setTimeStep(timeStep)
-        self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
-       
-        
-        selfCheck = False
-        if (selfCheck):
-          curTime = 0
-          while self._pybullet_client.isConnected():
-            self._humanoid.setSimTime(curTime)
-            state = self._humanoid.getState()
-            #print("state=",state)
-            pose = self._humanoid.computePose(self._humanoid._frameFraction)
-            for i in range (10):
-              curTime+=timeStep
-              #taus = self._humanoid.computePDForces(pose)
-              #self._humanoid.applyPDForces(taus)
-              #self._pybullet_client.stepSimulation()
-            time.sleep(timeStep)
-      #print("numframes = ", self._humanoid._mocap_data.NumFrames())
-      #startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
-      rnrange = 1000
-      rn = random.randint(0,rnrange)
-      startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
-      self.t = startTime
-      
-      self._humanoid.setSimTime(startTime)
-      
-      self._humanoid.resetPose()
-      #this clears the contact points. Todo: add API to explicitly clear all contact points?
-      #self._pybullet_client.stepSimulation()
-      self._humanoid.resetPose()
-      self.needs_update_time = self.t-1#force update
-       	
-    def get_num_agents(self):
-    	return self._num_agents
-    	
-    def get_action_space(self, agent_id):
-    	return ActionSpace(ActionSpace.Continuous)
-    	
-    def get_reward_min(self, agent_id):
-    	return 0
-    
-    def get_reward_max(self, agent_id):
-    	return 1
-    	
-    def get_reward_fail(self, agent_id):
-    	return self.get_reward_min(agent_id)
-    
-    def get_reward_succ(self, agent_id):
-    	return self.get_reward_max(agent_id)
-    	
-    #scene_name == "imitate" -> cDrawSceneImitate
-    def get_state_size(self, agent_id):
-        #cCtController::GetStateSize()
-        #int state_size = cDeepMimicCharController::GetStateSize();
-        #                     state_size += GetStatePoseSize();#106
-	      #                     state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
-        #state_size += GetStatePhaseSize();#1
-        #197
-        return 197

-    def build_state_norm_groups(self, agent_id):
-        #if (mEnablePhaseInput)
-	      #{
-		    #int phase_group = gNormGroupNone;
-		    #int phase_offset = GetStatePhaseOffset();
-		    #int phase_size = GetStatePhaseSize();
-		    #out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
-        groups = [0]*self.get_state_size(agent_id)
-        groups[0] = -1
-        return groups	
-        
-    def build_state_offset(self, agent_id):
-        out_offset = [0]*self.get_state_size(agent_id)
-        phase_offset = -0.5
-        out_offset[0] = phase_offset
-        return np.array(out_offset)
-    
-    def build_state_scale(self, agent_id):
-        out_scale = [1]*self.get_state_size(agent_id)
-        phase_scale = 2
-        out_scale[0] = phase_scale
-        return np.array(out_scale)
+  def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
+    super().__init__(arg_parser, enable_draw)
+    self._num_agents = 1
+    self._pybullet_client = pybullet_client
+    self._isInitialized = False
+    self._useStablePD = True
+    self._arg_parser = arg_parser
+    self.reset()

-    def get_goal_size(self, agent_id):
-        return 0
+  def reset(self):

-    def get_action_size(self, agent_id):
-     	  ctrl_size = 43 #numDof
-     	  root_size = 7
-     	  return ctrl_size - root_size
-     	  
-    def build_goal_norm_groups(self, agent_id):
-        return np.array([])
-        
-    def build_goal_offset(self, agent_id):
-        return np.array([])
-    
-    def build_goal_scale(self, agent_id):
-        return np.array([])
-        
-    def build_action_offset(self, agent_id):
-    	  out_offset = [0] * self.get_action_size(agent_id)
-    	  out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
-    	    -0.200000000,0.0000000000,0.0000000000,	0.00000000,	-0.2000000,	1.57000000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	0.00000000,	0.00000000,	0.00000000,	-0.2000000,	-1.5700000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	1.57000000,	0.00000000,	0.00000000,	0.00000000,	-0.2000000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	-1.5700000]
-    	  #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
-    	  #see cCtCtrlUtil::BuildOffsetScalePDSpherical
-    	  return np.array(out_offset)
-    	  
-    def build_action_scale(self, agent_id):
-    	  out_scale = [1] * self.get_action_size(agent_id)
-    	  #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
-    	  #see cCtCtrlUtil::BuildOffsetScalePDSpherical
-    	  out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
-    	    1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
-    	    1.000000000000,	1.000000000000,	0.159235668789,	0.159235668789,	1.000000000000,
-    	    1.000000000000,	1.000000000000,	0.079617834394,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.159235668789,	0.120772946859,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.159235668789,	0.159235668789,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.107758620689,	1.000000000000,	1.000000000000,	1.000000000000,
-    	    0.159235668789]
-    	  return np.array(out_scale)
-    
-    def build_action_bound_min(self, agent_id):
-    	  #see cCtCtrlUtil::BuildBoundsPDSpherical
-    	  out_scale = [-1] * self.get_action_size(agent_id)
-    	  out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
-    	    -1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000,	-1.000000000,
-	    	  -1.000000000,	-7.850000000,	-6.280000000,	-1.000000000,	-1.000000000,	-1.000000000,
-	    	  -12.56000000,	-1.000000000,	-1.000000000,	-1.000000000,	-4.710000000,
-	    	  -7.779999999,	-1.000000000,	-1.000000000,	-1.000000000,	-7.850000000,
-	    	  -6.280000000,	-1.000000000,	-1.000000000,	-1.000000000,	-8.460000000,
-	    	  -1.000000000,	-1.000000000,	-1.000000000,	-4.710000000]
+    if not self._isInitialized:
+      if self.enable_draw:
+        self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
+        #disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
+        self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
+      else:
+        self._pybullet_client = bullet_client.BulletClient()
+
+      self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
+      z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
+      self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
+                                                     z2y,
+                                                     useMaximalCoordinates=True)
+      #print("planeId=",self._planeId)
+      self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
+      self._pybullet_client.setGravity(0, -9.8, 0)
+
+      self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
+      self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
+
+      self._mocapData = motion_capture_data.MotionCaptureData()
+
+      motion_file = self._arg_parser.parse_strings('motion_file')
+      print("motion_file=", motion_file[0])
+
+      motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
+      #motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
+      self._mocapData.Load(motionPath)
+      timeStep = 1. / 600.
+      useFixedBase = False
+      self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData,
+                                                           timeStep, useFixedBase)
+      self._isInitialized = True

-    	  return out_scale
-    
-    def build_action_bound_max(self, agent_id):
-    	  out_scale = [1] * self.get_action_size(agent_id)
-    	  out_scale=[
-    	    4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
-    	    1.000000000,1.000000000,8.779999999,1.000000000,	1.0000000,	1.0000000,
-    	  	4.7100000,	6.2800000,	1.0000000,	1.0000000,	1.0000000,
-	    	  12.560000,	1.0000000,	1.0000000,	1.0000000,	7.8500000,
-	    	  8.7799999,	1.0000000,	1.0000000,	1.0000000,	4.7100000,
-	    	  6.2800000,	1.0000000,	1.0000000,	1.0000000,	10.100000,
-	    	  1.0000000,	1.0000000,	1.0000000,	7.8500000]
-    	  return out_scale
-    	  
-    def set_mode(self, mode):
-    	  self._mode = mode
-    	  
-    def need_new_action(self, agent_id):
-        if self.t>=self.needs_update_time:
-          self.needs_update_time = self.t + 1./30.
-          return True
-        return False
-        
-    def record_state(self, agent_id):
-        state = self._humanoid.getState()
-        
-        return np.array(state)
-        
-        
-    def record_goal(self, agent_id):
-        return np.array([])
-        
-    def calc_reward(self, agent_id):
-        kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
-        reward = self._humanoid.getReward(kinPose)
-        return reward
-        
-    def set_action(self, agent_id, action):
-        #print("action=",)
-        #for a in action:
-        #  print(a)
-        np.savetxt("pb_action.csv", action, delimiter=",")
-        self.desiredPose = self._humanoid.convertActionToPose(action)
-        #we need the target root positon and orientation to be zero, to be compatible with deep mimic
-        self.desiredPose[0] = 0
-        self.desiredPose[1] = 0
-        self.desiredPose[2] = 0
-        self.desiredPose[3] = 0
-        self.desiredPose[4] = 0
-        self.desiredPose[5] = 0
-        self.desiredPose[6] = 0
-        target_pose = np.array(self.desiredPose)
-        
-        
-        np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
-        
-        #print("set_action: desiredPose=", self.desiredPose)
-        
-    def log_val(self, agent_id, val):
-        pass
-        
-        
-    def update(self, timeStep):
-      #print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
      self._pybullet_client.setTimeStep(timeStep)
-      self._humanoid._timeStep = timeStep
-        
-      for i in range(1):
-          self.t += timeStep
-          self._humanoid.setSimTime(self.t)
-          
-          if self.desiredPose:
-            kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
-            self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=True)
-            #pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
-            #self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
-            #print("desiredPositions=",self.desiredPose)
-            maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200,  150, 90, 90, 90, 90, 100,100,100,100, 60]
-            
-            if self._useStablePD:
-              taus = self._humanoid.computePDForces(self.desiredPose, desiredVelocities=None, maxForces=maxForces)
-              self._humanoid.applyPDForces(taus)
-            else:
-              self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
-            
-            self._pybullet_client.stepSimulation()
-            
+      self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)

-    def set_sample_count(self, count):
-        return
-        
-    def check_terminate(self, agent_id):
-      return Env.Terminate(self.is_episode_end())
-      
-    def is_episode_end(self):
-      isEnded = self._humanoid.terminates()
-      #also check maximum time, 20 seconds (todo get from file)
-      #print("self.t=",self.t)
-      if (self.t>20):
-        isEnded = True
-      return isEnded
-      
-    def check_valid_episode(self):
-      #could check if limbs exceed velocity threshold
-      return true
-    
-    def getKeyboardEvents(self):
-      return self._pybullet_client.getKeyboardEvents()
-    
-    def isKeyTriggered(self, keys, key):
-      o = ord(key)
-      #print("ord=",o)
-      if o in keys:
-        return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
-      return False
+      selfCheck = False
+      if (selfCheck):
+        curTime = 0
+        while self._pybullet_client.isConnected():
+          self._humanoid.setSimTime(curTime)
+          state = self._humanoid.getState()
+          #print("state=",state)
+          pose = self._humanoid.computePose(self._humanoid._frameFraction)
+          for i in range(10):
+            curTime += timeStep
+            #taus = self._humanoid.computePDForces(pose)
+            #self._humanoid.applyPDForces(taus)
+            #self._pybullet_client.stepSimulation()
+          time.sleep(timeStep)
+    #print("numframes = ", self._humanoid._mocap_data.NumFrames())
+    #startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
+    rnrange = 1000
+    rn = random.randint(0, rnrange)
+    startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
+    self.t = startTime
+
+    self._humanoid.setSimTime(startTime)
+
+    self._humanoid.resetPose()
+    #this clears the contact points. Todo: add API to explicitly clear all contact points?
+    #self._pybullet_client.stepSimulation()
+    self._humanoid.resetPose()
+    self.needs_update_time = self.t - 1  #force update
+
+  def get_num_agents(self):
+    return self._num_agents
+
+  def get_action_space(self, agent_id):
+    return ActionSpace(ActionSpace.Continuous)
+
+  def get_reward_min(self, agent_id):
+    return 0
+
+  def get_reward_max(self, agent_id):
+    return 1
+
+  def get_reward_fail(self, agent_id):
+    return self.get_reward_min(agent_id)
+
+  def get_reward_succ(self, agent_id):
+    return self.get_reward_max(agent_id)
+
+  #scene_name == "imitate" -> cDrawSceneImitate
+  def get_state_size(self, agent_id):
+    #cCtController::GetStateSize()
+    #int state_size = cDeepMimicCharController::GetStateSize();
+    #                     state_size += GetStatePoseSize();#106
+    #                     state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
+    #state_size += GetStatePhaseSize();#1
+    #197
+    return 197
+
+  def build_state_norm_groups(self, agent_id):
+    #if (mEnablePhaseInput)
+    #{
+    #int phase_group = gNormGroupNone;
+    #int phase_offset = GetStatePhaseOffset();
+    #int phase_size = GetStatePhaseSize();
+    #out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
+    groups = [0] * self.get_state_size(agent_id)
+    groups[0] = -1
+    return groups
+
+  def build_state_offset(self, agent_id):
+    out_offset = [0] * self.get_state_size(agent_id)
+    phase_offset = -0.5
+    out_offset[0] = phase_offset
+    return np.array(out_offset)
+
+  def build_state_scale(self, agent_id):
+    out_scale = [1] * self.get_state_size(agent_id)
+    phase_scale = 2
+    out_scale[0] = phase_scale
+    return np.array(out_scale)
+
+  def get_goal_size(self, agent_id):
+    return 0
+
+  def get_action_size(self, agent_id):
+    ctrl_size = 43  #numDof
+    root_size = 7
+    return ctrl_size - root_size
+
+  def build_goal_norm_groups(self, agent_id):
+    return np.array([])
+
+  def build_goal_offset(self, agent_id):
+    return np.array([])
+
+  def build_goal_scale(self, agent_id):
+    return np.array([])
+
+  def build_action_offset(self, agent_id):
+    out_offset = [0] * self.get_action_size(agent_id)
+    out_offset = [
+        0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
+        0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
+        0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+        -0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
+        0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+        -0.2000000, -1.5700000
+    ]
+    #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+    #see cCtCtrlUtil::BuildOffsetScalePDSpherical
+    return np.array(out_offset)
+
+  def build_action_scale(self, agent_id):
+    out_scale = [1] * self.get_action_size(agent_id)
+    #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+    #see cCtCtrlUtil::BuildOffsetScalePDSpherical
+    out_scale = [
+        0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
+        1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
+        1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
+        1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
+        0.159235668789
+    ]
+    return np.array(out_scale)
+
+  def build_action_bound_min(self, agent_id):
+    #see cCtCtrlUtil::BuildBoundsPDSpherical
+    out_scale = [-1] * self.get_action_size(agent_id)
+    out_scale = [
+        -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
+        -1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
+        -1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
+        -1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
+        -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
+        -1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
+        -1.000000000, -4.710000000
+    ]
+
+    return out_scale
+
+  def build_action_bound_max(self, agent_id):
+    out_scale = [1] * self.get_action_size(agent_id)
+    out_scale = [
+        4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
+        1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
+        1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
+        8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
+        1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
+    ]
+    return out_scale
+
+  def set_mode(self, mode):
+    self._mode = mode
+
+  def need_new_action(self, agent_id):
+    if self.t >= self.needs_update_time:
+      self.needs_update_time = self.t + 1. / 30.
+      return True
+    return False
+
+  def record_state(self, agent_id):
+    state = self._humanoid.getState()
+
+    return np.array(state)
+
+  def record_goal(self, agent_id):
+    return np.array([])
+
+  def calc_reward(self, agent_id):
+    kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
+    reward = self._humanoid.getReward(kinPose)
+    return reward
+
+  def set_action(self, agent_id, action):
+    #print("action=",)
+    #for a in action:
+    #  print(a)
+    np.savetxt("pb_action.csv", action, delimiter=",")
+    self.desiredPose = self._humanoid.convertActionToPose(action)
+    #we need the target root positon and orientation to be zero, to be compatible with deep mimic
+    self.desiredPose[0] = 0
+    self.desiredPose[1] = 0
+    self.desiredPose[2] = 0
+    self.desiredPose[3] = 0
+    self.desiredPose[4] = 0
+    self.desiredPose[5] = 0
+    self.desiredPose[6] = 0
+    target_pose = np.array(self.desiredPose)
+
+    np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
+
+    #print("set_action: desiredPose=", self.desiredPose)
+
+  def log_val(self, agent_id, val):
+    pass
+
+  def update(self, timeStep):
+    #print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
+    self._pybullet_client.setTimeStep(timeStep)
+    self._humanoid._timeStep = timeStep
+
+    for i in range(1):
+      self.t += timeStep
+      self._humanoid.setSimTime(self.t)
+
+      if self.desiredPose:
+        kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
+        self._humanoid.initializePose(self._humanoid._poseInterpolator,
+                                      self._humanoid._kin_model,
+                                      initBase=True)
+        #pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
+        #self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
+        #print("desiredPositions=",self.desiredPose)
+        maxForces = [
+            0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
+            90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
+            100, 100, 60
+        ]
+
+        if self._useStablePD:
+          taus = self._humanoid.computePDForces(self.desiredPose,
+                                                desiredVelocities=None,
+                                                maxForces=maxForces)
+          self._humanoid.applyPDForces(taus)
+        else:
+          self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
+
+        self._pybullet_client.stepSimulation()
+
+  def set_sample_count(self, count):
+    return
+
+  def check_terminate(self, agent_id):
+    return Env.Terminate(self.is_episode_end())
+
+  def is_episode_end(self):
+    isEnded = self._humanoid.terminates()
+    #also check maximum time, 20 seconds (todo get from file)
+    #print("self.t=",self.t)
+    if (self.t > 20):
+      isEnded = True
+    return isEnded
+
+  def check_valid_episode(self):
+    #could check if limbs exceed velocity threshold
+    return true
+
+  def getKeyboardEvents(self):
+    return self._pybullet_client.getKeyboardEvents()
+
+  def isKeyTriggered(self, keys, key):
+    o = ord(key)
+    #print("ord=",o)
+    if o in keys:
+      return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
+    return False
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadrupedPoseInterpolator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadrupedPoseInterpolator.py
@@ -1,54 +1,66 @@
 from pybullet_utils import bullet_client
 import math
-	
+
+
 class QuadrupedPoseInterpolator(object):
+
  def __init__(self):
    pass
-    
-    
-  def ComputeLinVel(self,posStart, posEnd, deltaTime):
-    vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
+
+  def ComputeLinVel(self, posStart, posEnd, deltaTime):
+    vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
+           (posEnd[2] - posStart[2]) / deltaTime]
    return vel
-  
-  def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
-    pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
+    pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
+                                                        ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
+
+  def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
    keyFrameDuration = frameData[0]
-    basePos1Start = [frameData[1],frameData[2],frameData[3]]
-    basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]), 
-      basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]), 
-      basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
-    self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
-    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
-    self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
-    
-    jointPositions=[self._basePos[0],self._basePos[1],self._basePos[2],
-       self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3]]
-    jointVelocities=[self._baseLinVel[0],self._baseLinVel[1],self._baseLinVel[2],
-      self._baseAngVel[0],self._baseAngVel[1],self._baseAngVel[2]]
-    
-    for j in range (12):
-      index=j+8
-      jointPosStart=frameData[index]
-      jointPosEnd=frameDataNext[index]
-      jointPos=jointPosStart+frameFraction*(jointPosEnd-jointPosStart)
-      jointVel=(jointPosEnd-jointPosStart)/keyFrameDuration
+    basePos1Start = [frameData[1], frameData[2], frameData[3]]
+    basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._basePos = [
+        basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
+        basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
+        basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
+    ]
+    self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
+    baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
+    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
+    self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
+                                          bullet_client)
+
+    jointPositions = [
+        self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
+        self._baseOrn[2], self._baseOrn[3]
+    ]
+    jointVelocities = [
+        self._baseLinVel[0], self._baseLinVel[1], self._baseLinVel[2], self._baseAngVel[0],
+        self._baseAngVel[1], self._baseAngVel[2]
+    ]
+
+    for j in range(12):
+      index = j + 8
+      jointPosStart = frameData[index]
+      jointPosEnd = frameDataNext[index]
+      jointPos = jointPosStart + frameFraction * (jointPosEnd - jointPosStart)
+      jointVel = (jointPosEnd - jointPosStart) / keyFrameDuration
      jointPositions.append(jointPos)
      jointVelocities.append(jointVel)
    self._jointPositions = jointPositions
    self._jointVelocities = jointVelocities
-    return jointPositions,jointVelocities
+    return jointPositions, jointVelocities
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadruped_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadruped_stable_pd.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testHumanoid.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testHumanoid.py
@@ -8,87 +8,89 @@ import pybullet as p1
 import humanoid_pose_interpolator
 import numpy as np

-pybullet_client =  bullet_client.BulletClient(connection_mode=p1.GUI)
+pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)

 pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
-z2y = pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
+z2y = pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
 #planeId = pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
-planeId= pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
-pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)        
+planeId = pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
+                                   z2y,
+                                   useMaximalCoordinates=True)
+pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
 #print("planeId=",planeId)

-pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
-pybullet_client.setGravity(0,-9.8,0)
+pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
+pybullet_client.setGravity(0, -9.8, 0)

 pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)

-
 mocapData = motion_capture_data.MotionCaptureData()
 #motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_walk.txt"
-motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_backflip.txt"
+motionPath = pybullet_data.getDataPath() + "/data/motions/humanoid3d_backflip.txt"
 mocapData.Load(motionPath)
-timeStep = 1./600
-useFixedBase=False
+timeStep = 1. / 600
+useFixedBase = False
 humanoid = humanoid_stable_pd.HumanoidStablePD(pybullet_client, mocapData, timeStep, useFixedBase)
 isInitialized = True

 pybullet_client.setTimeStep(timeStep)
 pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
-timeId = pybullet_client.addUserDebugParameter("time",0,10,0)
+timeId = pybullet_client.addUserDebugParameter("time", 0, 10, 0)
+

-  
 def isKeyTriggered(keys, key):
  o = ord(key)
  if o in keys:
    return keys[ord(key)] & pybullet_client.KEY_WAS_TRIGGERED
  return False
-  
+
+
 animating = False
 singleStep = False

-
-t=0
+t = 0
 while (1):

  keys = pybullet_client.getKeyboardEvents()
  #print(keys)
  if isKeyTriggered(keys, ' '):
-      animating = not animating
+    animating = not animating

  if isKeyTriggered(keys, 'b'):
-      singleStep = True
-  
+    singleStep = True
+
  if animating or singleStep:
-    
-    
+
    singleStep = False
    #t = pybullet_client.readUserDebugParameter(timeId)
    #print("t=",t)
-    for i in range (1):
+    for i in range(1):

-      print("t=",t)
+      print("t=", t)
      humanoid.setSimTime(t)
-        
+
      humanoid.computePose(humanoid._frameFraction)
      pose = humanoid._poseInterpolator
      #humanoid.initializePose(pose=pose, phys_model = humanoid._sim_model, initBase=True, initializeVelocity=True)
      #humanoid.resetPose()
-      
-      
+
      desiredPose = humanoid.computePose(humanoid._frameFraction)
-      #desiredPose = desiredPose.GetPose() 
+      #desiredPose = desiredPose.GetPose()
      #curPose = HumanoidPoseInterpolator()
      #curPose.reset()
      s = humanoid.getState()
      #np.savetxt("pb_record_state_s.csv", s, delimiter=",")
-      maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200,  150, 90, 90, 90, 90, 100,100,100,100, 60]
+      maxForces = [
+          0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90, 90,
+          90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100, 100,
+          100, 60
+      ]
      taus = humanoid.computePDForces(desiredPose, desiredVelocities=None, maxForces=maxForces)
-      
+
      #print("taus=",taus)
      humanoid.applyPDForces(taus)
-      
+
      pybullet_client.stepSimulation()
-      t+=1./600.
-      
-    
-  time.sleep(1./600.)
+      t += 1. / 600.
+
+  time.sleep(1. / 600.)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testLaikago.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testLaikago.py
@@ -8,240 +8,258 @@ import motion_capture_data
 import quadrupedPoseInterpolator

 useConstraints = False
-  
-p =  bullet_client.BulletClient(connection_mode=p1.GUI)
+
+p = bullet_client.BulletClient(connection_mode=p1.GUI)
 p.setAdditionalSearchPath(pybullet_data.getDataPath())

 plane = p.loadURDF("plane.urdf")
-p.setGravity(0,0,-10)
-timeStep=1./500
+p.setGravity(0, 0, -10)
+timeStep = 1. / 500
 p.setTimeStep(timeStep)
 #p.setDefaultContactERP(0)
-#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS 
+#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
 urdfFlags = p.URDF_USE_SELF_COLLISION

-
-startPos=[0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
-startOrn=[0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
-quadruped = p.loadURDF("laikago/laikago.urdf",startPos,startOrn, flags = urdfFlags,useFixedBase=False)
-p.resetBasePositionAndOrientation(quadruped,startPos,startOrn)
+startPos = [0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
+startOrn = [0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
+quadruped = p.loadURDF("laikago/laikago.urdf",
+                       startPos,
+                       startOrn,
+                       flags=urdfFlags,
+                       useFixedBase=False)
+p.resetBasePositionAndOrientation(quadruped, startPos, startOrn)
 if not useConstraints:
  for j in range(p.getNumJoints(quadruped)):
-    p.setJointMotorControl2(quadruped,j,p.POSITION_CONTROL,force=0)
-  
-#This cube is added as a soft constraint to keep the laikago from falling 
+    p.setJointMotorControl2(quadruped, j, p.POSITION_CONTROL, force=0)
+
+#This cube is added as a soft constraint to keep the laikago from falling
 #since we didn't train it yet, it doesn't balance
-cube = p.loadURDF("cube_no_rotation.urdf",[0,0,-0.5],[0,0.5,0.5,0])
-p.setCollisionFilterGroupMask(cube,-1,0,0)
+cube = p.loadURDF("cube_no_rotation.urdf", [0, 0, -0.5], [0, 0.5, 0.5, 0])
+p.setCollisionFilterGroupMask(cube, -1, 0, 0)
 for j in range(p.getNumJoints(cube)):
-  p.setJointMotorControl2(cube,j,p.POSITION_CONTROL,force=0)
-  p.setCollisionFilterGroupMask(cube,j,0,0)
-  p.changeVisualShape(cube,j,rgbaColor=[1,0,0,0])
-cid = p.createConstraint(cube,p.getNumJoints(cube)-1,quadruped,-1,p.JOINT_FIXED,[0,0,0],[0,1,0],[0,0,0])
+  p.setJointMotorControl2(cube, j, p.POSITION_CONTROL, force=0)
+  p.setCollisionFilterGroupMask(cube, j, 0, 0)
+  p.changeVisualShape(cube, j, rgbaColor=[1, 0, 0, 0])
+cid = p.createConstraint(cube,
+                         p.getNumJoints(cube) - 1, quadruped, -1, p.JOINT_FIXED, [0, 0, 0],
+                         [0, 1, 0], [0, 0, 0])
 p.changeConstraint(cid, maxForce=10)

+jointIds = []
+paramIds = []
+jointOffsets = []
+jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
+jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

-jointIds=[]
-paramIds=[]
-jointOffsets=[]
-jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
-jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
-
-for i in range (4):
+for i in range(4):
  jointOffsets.append(0)
  jointOffsets.append(-0.7)
  jointOffsets.append(0.7)

-maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
+maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)

-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                jointIds.append(j)
-
-    
-startQ=[0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892, -0.068262, 0.836745, -1.534517]
 for j in range(p.getNumJoints(quadruped)):
-  p.resetJointState(quadruped,jointIds[j],jointDirections[j]*startQ[j]+jointOffsets[j])
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    jointIds.append(j)

+startQ = [
+    0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892,
+    -0.068262, 0.836745, -1.534517
+]
+for j in range(p.getNumJoints(quadruped)):
+  p.resetJointState(quadruped, jointIds[j], jointDirections[j] * startQ[j] + jointOffsets[j])

 qpi = quadrupedPoseInterpolator.QuadrupedPoseInterpolator()

 #enable collision between lower legs

-for j in range (p.getNumJoints(quadruped)):
-		print(p.getJointInfo(quadruped,j))
+for j in range(p.getNumJoints(quadruped)):
+  print(p.getJointInfo(quadruped, j))

 #2,5,8 and 11 are the lower legs
-lower_legs = [2,5,8,11]
+lower_legs = [2, 5, 8, 11]
 for l0 in lower_legs:
-	for l1 in lower_legs:
-		if (l1>l0):
-			enableCollision = 1
-			print("collision for pair",l0,l1, p.getJointInfo(quadruped,l0)[12],p.getJointInfo(quadruped,l1)[12], "enabled=",enableCollision)
-			p.setCollisionFilterPair(quadruped, quadruped, 2,5,enableCollision)
+  for l1 in lower_legs:
+    if (l1 > l0):
+      enableCollision = 1
+      print("collision for pair", l0, l1,
+            p.getJointInfo(quadruped, l0)[12],
+            p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
+      p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)

-jointIds=[]
-paramIds=[]
-jointOffsets=[]
-jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
-jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
+jointIds = []
+paramIds = []
+jointOffsets = []
+jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
+jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

-for i in range (4):
-	jointOffsets.append(0)
-	jointOffsets.append(-0.7)
-	jointOffsets.append(0.7)
+for i in range(4):
+  jointOffsets.append(0)
+  jointOffsets.append(-0.7)
+  jointOffsets.append(0.7)

-maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
+maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)

-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                jointIds.append(j)
+for j in range(p.getNumJoints(quadruped)):
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    jointIds.append(j)

-		
-p.getCameraImage(480,320)
+p.getCameraImage(480, 320)
 p.setRealTimeSimulation(0)

-joints=[]
-
+joints = []

 mocapData = motion_capture_data.MotionCaptureData()

-motionPath = pybullet_data.getDataPath()+"/data/motions/laikago_walk.txt"
+motionPath = pybullet_data.getDataPath() + "/data/motions/laikago_walk.txt"

 mocapData.Load(motionPath)
-print("mocapData.NumFrames=",mocapData.NumFrames())
-print("mocapData.KeyFrameDuraction=",mocapData.KeyFrameDuraction())
-print("mocapData.getCycleTime=",mocapData.getCycleTime())
-print("mocapData.computeCycleOffset=",mocapData.computeCycleOffset())
+print("mocapData.NumFrames=", mocapData.NumFrames())
+print("mocapData.KeyFrameDuraction=", mocapData.KeyFrameDuraction())
+print("mocapData.getCycleTime=", mocapData.getCycleTime())
+print("mocapData.computeCycleOffset=", mocapData.computeCycleOffset())

 stablePD = pd_controller_stable.PDControllerStable(p)

 cycleTime = mocapData.getCycleTime()
-t=0
+t = 0

-while t<10.*cycleTime:
+while t < 10. * cycleTime:
  #get interpolated joint
  keyFrameDuration = mocapData.KeyFrameDuraction()
  cycleTime = mocapData.getCycleTime()
  cycleCount = mocapData.calcCycleCount(t, cycleTime)
-  
+
  #print("cycleTime=",cycleTime)
  #print("cycleCount=",cycleCount)
-  
+
  #print("cycles=",cycles)
-  frameTime = t - cycleCount*cycleTime
+  frameTime = t - cycleCount * cycleTime
  #print("frameTime=",frameTime)
-  if (frameTime<0):
+  if (frameTime < 0):
    frameTime += cycleTime

-  frame = int(frameTime/keyFrameDuration)
-  frameNext = frame+1
-  if (frameNext >=  mocapData.NumFrames()):
+  frame = int(frameTime / keyFrameDuration)
+  frameNext = frame + 1
+  if (frameNext >= mocapData.NumFrames()):
    frameNext = frame
-  frameFraction = (frameTime - frame*keyFrameDuration)/(keyFrameDuration)
+  frameFraction = (frameTime - frame * keyFrameDuration) / (keyFrameDuration)
  #print("frame=",frame)
  #print("frameFraction=",frameFraction)
  frameData = mocapData._motion_data['Frames'][frame]
  frameDataNext = mocapData._motion_data['Frames'][frameNext]
-    
-  jointsStr,qdot=qpi.Slerp(frameFraction, frameData, frameDataNext, p)
-  
+
+  jointsStr, qdot = qpi.Slerp(frameFraction, frameData, frameDataNext, p)
+
  maxForce = p.readUserDebugParameter(maxForceId)
-  print("jointIds=",jointIds)
+  print("jointIds=", jointIds)

  if useConstraints:
-    for j in range (12):
+    for j in range(12):
      #skip the base positional dofs
-      targetPos = float(jointsStr[j+7])
-      p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
-    
+      targetPos = float(jointsStr[j + 7])
+      p.setJointMotorControl2(quadruped,
+                              jointIds[j],
+                              p.POSITION_CONTROL,
+                              jointDirections[j] * targetPos + jointOffsets[j],
+                              force=maxForce)
+
  else:
-    desiredPositions=[]
-    for j in range (7):
+    desiredPositions = []
+    for j in range(7):
      targetPosUnmodified = float(jointsStr[j])
      desiredPositions.append(targetPosUnmodified)
-    for j in range (12):
-      targetPosUnmodified = float(jointsStr[j+7])
-      targetPos=jointDirections[j]*targetPosUnmodified+jointOffsets[j]
+    for j in range(12):
+      targetPosUnmodified = float(jointsStr[j + 7])
+      targetPos = jointDirections[j] * targetPosUnmodified + jointOffsets[j]
      desiredPositions.append(targetPos)
-    numBaseDofs=6
-    totalDofs=12+numBaseDofs
-    desiredVelocities=None
-    if desiredVelocities==None:
-      desiredVelocities = [0]*totalDofs 
-    taus = stablePD.computePD(bodyUniqueId=quadruped, 
-      jointIndices = jointIds,
-      desiredPositions = desiredPositions,
-      desiredVelocities = desiredVelocities,
-      kps = [4000]*totalDofs,
-      kds = [40]*totalDofs,
-      maxForces = [500]*totalDofs, 
-      timeStep=timeStep)
-    
-    dofIndex=6
+    numBaseDofs = 6
+    totalDofs = 12 + numBaseDofs
+    desiredVelocities = None
+    if desiredVelocities == None:
+      desiredVelocities = [0] * totalDofs
+    taus = stablePD.computePD(bodyUniqueId=quadruped,
+                              jointIndices=jointIds,
+                              desiredPositions=desiredPositions,
+                              desiredVelocities=desiredVelocities,
+                              kps=[4000] * totalDofs,
+                              kds=[40] * totalDofs,
+                              maxForces=[500] * totalDofs,
+                              timeStep=timeStep)
+
+    dofIndex = 6
    scaling = 1
-    for index in range (len(jointIds)):
+    for index in range(len(jointIds)):
      jointIndex = jointIds[index]
-      force=[scaling*taus[dofIndex]]
-      print("force[", jointIndex,"]=",force)
-      p.setJointMotorControlMultiDof(quadruped, jointIndex, controlMode=p.TORQUE_CONTROL, force=force)
-      dofIndex+=1
-   
+      force = [scaling * taus[dofIndex]]
+      print("force[", jointIndex, "]=", force)
+      p.setJointMotorControlMultiDof(quadruped,
+                                     jointIndex,
+                                     controlMode=p.TORQUE_CONTROL,
+                                     force=force)
+      dofIndex += 1

  p.stepSimulation()
-  t+=timeStep
+  t += timeStep
  time.sleep(timeStep)

-useOrgData=False
+useOrgData = False
 if useOrgData:
-  with open("data1.txt","r") as filestream:
-  	for line in filestream:
-  		maxForce = p.readUserDebugParameter(maxForceId)
-  		currentline = line.split(",")
-  		frame = currentline[0]
-  		t = currentline[1]
-  		joints=currentline[2:14]
-  		for j in range (12):
-  			targetPos = float(joints[j])
-  			p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
-  		p.stepSimulation()
-  		for lower_leg in lower_legs:
-  			#print("points for ", quadruped, " link: ", lower_leg)
-  			pts = p.getContactPoints(quadruped,-1, lower_leg)
-  			#print("num points=",len(pts))
-  			#for pt in pts:
-  			#	print(pt[9])
-  		time.sleep(1./500.)
-
-
-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        js = p.getJointState(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"),-4,4,(js[0]-jointOffsets[j])/jointDirections[j]))
+  with open("data1.txt", "r") as filestream:
+    for line in filestream:
+      maxForce = p.readUserDebugParameter(maxForceId)
+      currentline = line.split(",")
+      frame = currentline[0]
+      t = currentline[1]
+      joints = currentline[2:14]
+      for j in range(12):
+        targetPos = float(joints[j])
+        p.setJointMotorControl2(quadruped,
+                                jointIds[j],
+                                p.POSITION_CONTROL,
+                                jointDirections[j] * targetPos + jointOffsets[j],
+                                force=maxForce)
+      p.stepSimulation()
+      for lower_leg in lower_legs:
+        #print("points for ", quadruped, " link: ", lower_leg)
+        pts = p.getContactPoints(quadruped, -1, lower_leg)
+        #print("num points=",len(pts))
+        #for pt in pts:
+        #	print(pt[9])
+      time.sleep(1. / 500.)

+for j in range(p.getNumJoints(quadruped)):
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  js = p.getJointState(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    paramIds.append(
+        p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
+                                (js[0] - jointOffsets[j]) / jointDirections[j]))

 p.setRealTimeSimulation(1)

 while (1):
-	
-	for i in range(len(paramIds)):
-		c = paramIds[i]
-		targetPos = p.readUserDebugParameter(c)
-		maxForce = p.readUserDebugParameter(maxForceId)
-		p.setJointMotorControl2(quadruped,jointIds[i],p.POSITION_CONTROL,jointDirections[i]*targetPos+jointOffsets[i], force=maxForce)
-	
+
+  for i in range(len(paramIds)):
+    c = paramIds[i]
+    targetPos = p.readUserDebugParameter(c)
+    maxForce = p.readUserDebugParameter(maxForceId)
+    p.setJointMotorControl2(quadruped,
+                            jointIds[i],
+                            p.POSITION_CONTROL,
+                            jointDirections[i] * targetPos + jointOffsets[i],
+                            force=maxForce)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py
@@ -5,17 +5,18 @@ import pybullet_data

 AGENT_TYPE_KEY = "AgentType"

-def build_agent(world, id, file):
-    agent = None
-    with open(pybullet_data.getDataPath()+"/"+file) as data_file:    
-        json_data = json.load(data_file)
-        
-        assert AGENT_TYPE_KEY in json_data
-        agent_type = json_data[AGENT_TYPE_KEY]
-        
-        if (agent_type == PPOAgent.NAME):
-            agent = PPOAgent(world, id, json_data)
-        else:
-            assert False, 'Unsupported agent type: ' + agent_type

-    return agent
+def build_agent(world, id, file):
+  agent = None
+  with open(pybullet_data.getDataPath() + "/" + file) as data_file:
+    json_data = json.load(data_file)
+
+    assert AGENT_TYPE_KEY in json_data
+    agent_type = json_data[AGENT_TYPE_KEY]
+
+    if (agent_type == PPOAgent.NAME):
+      agent = PPOAgent(world, id, json_data)
+    else:
+      assert False, 'Unsupported agent type: ' + agent_type
+
+  return agent
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py
@@ -2,53 +2,54 @@ import json
 import numpy as np
 import pybullet_utils.math_util as MathUtil

+
 class ExpParams(object):
-    RATE_KEY = 'Rate'
-    INIT_ACTION_RATE_KEY = 'InitActionRate'
-    NOISE_KEY = 'Noise'
-    NOISE_INTERNAL_KEY = 'NoiseInternal'
-    TEMP_KEY = 'Temp'
+  RATE_KEY = 'Rate'
+  INIT_ACTION_RATE_KEY = 'InitActionRate'
+  NOISE_KEY = 'Noise'
+  NOISE_INTERNAL_KEY = 'NoiseInternal'
+  TEMP_KEY = 'Temp'

-    def __init__(self):
-        self.rate = 0.2
-        self.init_action_rate = 0
-        self.noise = 0.1
-        self.noise_internal = 0
-        self.temp = 0.1
-        return
+  def __init__(self):
+    self.rate = 0.2
+    self.init_action_rate = 0
+    self.noise = 0.1
+    self.noise_internal = 0
+    self.temp = 0.1
+    return

-    def __str__(self):
-        str = ''
-        str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
-        str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
-        str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
-        str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
-        str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
-        return str
+  def __str__(self):
+    str = ''
+    str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
+    str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
+    str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
+    str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
+    str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
+    return str

-    def load(self, json_data):
-        if (self.RATE_KEY in json_data):
-            self.rate = json_data[self.RATE_KEY]
+  def load(self, json_data):
+    if (self.RATE_KEY in json_data):
+      self.rate = json_data[self.RATE_KEY]

-        if (self.INIT_ACTION_RATE_KEY in json_data):
-            self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
+    if (self.INIT_ACTION_RATE_KEY in json_data):
+      self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]

-        if (self.NOISE_KEY in json_data):
-            self.noise = json_data[self.NOISE_KEY]
+    if (self.NOISE_KEY in json_data):
+      self.noise = json_data[self.NOISE_KEY]

-        if (self.NOISE_INTERNAL_KEY in json_data):
-            self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
+    if (self.NOISE_INTERNAL_KEY in json_data):
+      self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]

-        if (self.TEMP_KEY in json_data):
-            self.temp = json_data[self.TEMP_KEY]
+    if (self.TEMP_KEY in json_data):
+      self.temp = json_data[self.TEMP_KEY]

-        return
+    return

-    def lerp(self, other, t):
-        lerp_params = ExpParams()
-        lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
-        lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
-        lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
-        lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
-        lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
-        return lerp_params
+  def lerp(self, other, t):
+    lerp_params = ExpParams()
+    lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
+    lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
+    lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
+    lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
+    lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
+    return lerp_params
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/init.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/init.py
@@ -1 +1 @@
-from . import *
+from . import *
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py
@@ -3,11 +3,12 @@ import pybullet_envs.deep_mimic.learning.tf_util as TFUtil

 NAME = "fc_2layers_1024units"

-def build_net(input_tfs, reuse=False):
-    layers = [1024, 512]
-    activation = tf.nn.relu

-    input_tf = tf.concat(axis=-1, values=input_tfs)          
-    h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
-    h = activation(h)
-    return h
+def build_net(input_tfs, reuse=False):
+  layers = [1024, 512]
+  activation = tf.nn.relu
+
+  input_tf = tf.concat(axis=-1, values=input_tfs)
+  h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
+  h = activation(h)
+  return h
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py
@@ -1,11 +1,12 @@
 import pybullet_envs.deep_mimic.learning.nets.fc_2layers_1024units as fc_2layers_1024units

-def build_net(net_name, input_tfs, reuse=False):
-    net = None

-    if (net_name == fc_2layers_1024units.NAME):
-        net = fc_2layers_1024units.build_net(input_tfs, reuse)
-    else:
-        assert False, 'Unsupported net: ' + net_name
-    
-    return net
+def build_net(net_name, input_tfs, reuse=False):
+  net = None
+
+  if (net_name == fc_2layers_1024units.NAME):
+    net = fc_2layers_1024units.build_net(input_tfs, reuse)
+  else:
+    assert False, 'Unsupported net: ' + net_name
+
+  return net
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py
@@ -3,147 +3,149 @@ import copy
 import pybullet_utils.mpi_util as MPIUtil
 from pybullet_utils.logger import Logger

+
 class Normalizer(object):
-    CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
+  CHECK_SYNC_COUNT = 50000  # check synchronization after a certain number of entries

-    # these group IDs must be the same as those in CharController.h
-    NORM_GROUP_SINGLE = 0
-    NORM_GROUP_NONE = -1
+  # these group IDs must be the same as those in CharController.h
+  NORM_GROUP_SINGLE = 0
+  NORM_GROUP_NONE = -1

-    class Group(object):
-        def __init__(self, id, indices):
-            self.id = id
-            self.indices = indices
-            return
+  class Group(object):

-    def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
-        self.eps = eps
-        self.clip = clip
-        self.mean = np.zeros(size)
-        self.mean_sq = np.zeros(size)
-        self.std = np.ones(size)
-        self.count = 0
-        self.groups = self._build_groups(groups_ids)
+    def __init__(self, id, indices):
+      self.id = id
+      self.indices = indices
+      return

-        self.new_count = 0
-        self.new_sum = np.zeros_like(self.mean)
-        self.new_sum_sq = np.zeros_like(self.mean_sq)
-        return
+  def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
+    self.eps = eps
+    self.clip = clip
+    self.mean = np.zeros(size)
+    self.mean_sq = np.zeros(size)
+    self.std = np.ones(size)
+    self.count = 0
+    self.groups = self._build_groups(groups_ids)

-    def record(self, x):
-        size = self.get_size()
-        is_array = isinstance(x, np.ndarray)
-        if not is_array:
-            assert(size == 1)
-            x = np.array([[x]])
+    self.new_count = 0
+    self.new_sum = np.zeros_like(self.mean)
+    self.new_sum_sq = np.zeros_like(self.mean_sq)
+    return

-        assert x.shape[-1] == size, \
-            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
-        x = np.reshape(x, [-1, size])
+  def record(self, x):
+    size = self.get_size()
+    is_array = isinstance(x, np.ndarray)
+    if not is_array:
+      assert (size == 1)
+      x = np.array([[x]])

-        self.new_count += x.shape[0]
-        self.new_sum += np.sum(x, axis=0)
-        self.new_sum_sq += np.sum(np.square(x), axis=0)
-        return
+    assert x.shape[-1] == size, \
+        Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
+    x = np.reshape(x, [-1, size])

-    def update(self):
-        new_count = MPIUtil.reduce_sum(self.new_count)
-        new_sum = MPIUtil.reduce_sum(self.new_sum)
-        new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
+    self.new_count += x.shape[0]
+    self.new_sum += np.sum(x, axis=0)
+    self.new_sum_sq += np.sum(np.square(x), axis=0)
+    return

-        new_total = self.count + new_count
-        if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
-            assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
+  def update(self):
+    new_count = MPIUtil.reduce_sum(self.new_count)
+    new_sum = MPIUtil.reduce_sum(self.new_sum)
+    new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)

-        if new_count > 0:
-            new_mean = self._process_group_data(new_sum / new_count, self.mean)
-            new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
-            w_old = float(self.count) / new_total
-            w_new = float(new_count) / new_total
+    new_total = self.count + new_count
+    if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
+      assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')

-            self.mean = w_old * self.mean + w_new * new_mean
-            self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
-            self.count = new_total
-            self.std = self.calc_std(self.mean, self.mean_sq)
+    if new_count > 0:
+      new_mean = self._process_group_data(new_sum / new_count, self.mean)
+      new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
+      w_old = float(self.count) / new_total
+      w_new = float(new_count) / new_total

-            self.new_count = 0
-            self.new_sum.fill(0)
-            self.new_sum_sq.fill(0)
+      self.mean = w_old * self.mean + w_new * new_mean
+      self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
+      self.count = new_total
+      self.std = self.calc_std(self.mean, self.mean_sq)

-        return
+      self.new_count = 0
+      self.new_sum.fill(0)
+      self.new_sum_sq.fill(0)

-    def get_size(self):
-        return self.mean.size
+    return

-    def set_mean_std(self, mean, std):
-        size = self.get_size()
-        is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
-        
-        if not is_array:
-            assert(size == 1)
-            mean = np.array([mean])
-            std = np.array([std])
+  def get_size(self):
+    return self.mean.size

-        assert len(mean) == size and len(std) == size, \
-            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
-        
-        self.mean = mean
-        self.std = std
-        self.mean_sq = self.calc_mean_sq(self.mean, self.std)
-        return
+  def set_mean_std(self, mean, std):
+    size = self.get_size()
+    is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)

-    def normalize(self, x):
-        norm_x = (x - self.mean) / self.std
-        norm_x = np.clip(norm_x, -self.clip, self.clip)
-        return norm_x
+    if not is_array:
+      assert (size == 1)
+      mean = np.array([mean])
+      std = np.array([std])

-    def unnormalize(self, norm_x):
-        x = norm_x * self.std + self.mean
-        return x
+    assert len(mean) == size and len(std) == size, \
+        Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))

-    def calc_std(self, mean, mean_sq):
-        var = mean_sq - np.square(mean)
-        # some time floating point errors can lead to small negative numbers
-        var = np.maximum(var, 0)
-        std = np.sqrt(var)
-        std = np.maximum(std, self.eps)
-        return std
+    self.mean = mean
+    self.std = std
+    self.mean_sq = self.calc_mean_sq(self.mean, self.std)
+    return

-    def calc_mean_sq(self, mean, std):
-        return np.square(std) + np.square(self.mean)
+  def normalize(self, x):
+    norm_x = (x - self.mean) / self.std
+    norm_x = np.clip(norm_x, -self.clip, self.clip)
+    return norm_x

-    def check_synced(self):
-        synced = True
-        if MPIUtil.is_root_proc():
-            vars = np.concatenate([self.mean, self.mean_sq])
-            MPIUtil.bcast(vars)
-        else:
-            vars_local = np.concatenate([self.mean, self.mean_sq])
-            vars_root = np.empty_like(vars_local)
-            MPIUtil.bcast(vars_root)
-            synced = (vars_local == vars_root).all()
-        return synced
+  def unnormalize(self, norm_x):
+    x = norm_x * self.std + self.mean
+    return x

-    def _build_groups(self, groups_ids):
-        groups = []
-        if groups_ids is None:
-            curr_id = self.NORM_GROUP_SINGLE
-            curr_list = np.arange(self.get_size()).astype(np.int32)
-            groups.append(self.Group(curr_id, curr_list))
-        else:
-            ids = np.unique(groups_ids)
-            for id in ids:
-                curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
-                groups.append(self.Group(id, curr_list))
+  def calc_std(self, mean, mean_sq):
+    var = mean_sq - np.square(mean)
+    # some time floating point errors can lead to small negative numbers
+    var = np.maximum(var, 0)
+    std = np.sqrt(var)
+    std = np.maximum(std, self.eps)
+    return std

-        return groups
+  def calc_mean_sq(self, mean, std):
+    return np.square(std) + np.square(self.mean)

-    def _process_group_data(self, new_data, old_data):
-        proc_data = new_data.copy()
-        for group in self.groups:
-            if group.id == self.NORM_GROUP_NONE:
-                proc_data[group.indices] = old_data[group.indices]
-            elif group.id != self.NORM_GROUP_SINGLE:
-                avg = np.mean(new_data[group.indices])
-                proc_data[group.indices] = avg
-        return proc_data
+  def check_synced(self):
+    synced = True
+    if MPIUtil.is_root_proc():
+      vars = np.concatenate([self.mean, self.mean_sq])
+      MPIUtil.bcast(vars)
+    else:
+      vars_local = np.concatenate([self.mean, self.mean_sq])
+      vars_root = np.empty_like(vars_local)
+      MPIUtil.bcast(vars_root)
+      synced = (vars_local == vars_root).all()
+    return synced
+
+  def _build_groups(self, groups_ids):
+    groups = []
+    if groups_ids is None:
+      curr_id = self.NORM_GROUP_SINGLE
+      curr_list = np.arange(self.get_size()).astype(np.int32)
+      groups.append(self.Group(curr_id, curr_list))
+    else:
+      ids = np.unique(groups_ids)
+      for id in ids:
+        curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
+        groups.append(self.Group(id, curr_list))
+
+    return groups
+
+  def _process_group_data(self, new_data, old_data):
+    proc_data = new_data.copy()
+    for group in self.groups:
+      if group.id == self.NORM_GROUP_NONE:
+        proc_data[group.indices] = old_data[group.indices]
+      elif group.id != self.NORM_GROUP_SINGLE:
+        avg = np.mean(new_data[group.indices])
+        proc_data[group.indices] = avg
+    return proc_data
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py
@@ -1,46 +1,47 @@
 import numpy as np
 from pybullet_envs.deep_mimic.env.env import Env

+
 class Path(object):
-    def __init__(self):
-        self.clear()
-        return

-    def pathlength(self):
-        return len(self.actions)
+  def __init__(self):
+    self.clear()
+    return

-    def is_valid(self):
-        valid = True
-        l = self.pathlength()
-        valid &= len(self.states) == l + 1
-        valid &= len(self.goals) == l + 1
-        valid &= len(self.actions) == l
-        valid &= len(self.logps) == l
-        valid &= len(self.rewards) == l
-        valid &= len(self.flags) == l
+  def pathlength(self):
+    return len(self.actions)

-        return valid
+  def is_valid(self):
+    valid = True
+    l = self.pathlength()
+    valid &= len(self.states) == l + 1
+    valid &= len(self.goals) == l + 1
+    valid &= len(self.actions) == l
+    valid &= len(self.logps) == l
+    valid &= len(self.rewards) == l
+    valid &= len(self.flags) == l

-    def check_vals(self):
-        for vals in [self.states, self.goals, self.actions, self.logps,
-                  self.rewards]:
-            for v in vals:
-                if not np.isfinite(v).all():
-                    return False
-        return True
+    return valid

-    def clear(self):
-        self.states = []
-        self.goals = []
-        self.actions = []
-        self.logps = []
-        self.rewards = []
-        self.flags = []
-        self.terminate = Env.Terminate.Null
-        return
+  def check_vals(self):
+    for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
+      for v in vals:
+        if not np.isfinite(v).all():
+          return False
+    return True

-    def get_pathlen(self):
-        return len(self.rewards)
+  def clear(self):
+    self.states = []
+    self.goals = []
+    self.actions = []
+    self.logps = []
+    self.rewards = []
+    self.flags = []
+    self.terminate = Env.Terminate.Null
+    return

-    def calc_return(self):
-        return sum(self.rewards)
+  def get_pathlen(self):
+    return len(self.rewards)
+
+  def calc_return(self):
+    return sum(self.rewards)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py
@@ -13,341 +13,343 @@ import pybullet_utils.mpi_util as MPIUtil
 import pybullet_utils.math_util as MathUtil
 from pybullet_envs.deep_mimic.env.action_space import ActionSpace
 from pybullet_envs.deep_mimic.env.env import Env
-
 '''
 Policy Gradient Agent
 '''

+
 class PGAgent(TFAgent):
-    NAME = 'PG'
+  NAME = 'PG'

-    ACTOR_NET_KEY = 'ActorNet'
-    ACTOR_STEPSIZE_KEY = 'ActorStepsize'
-    ACTOR_MOMENTUM_KEY = 'ActorMomentum'
-    ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
-    ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
+  ACTOR_NET_KEY = 'ActorNet'
+  ACTOR_STEPSIZE_KEY = 'ActorStepsize'
+  ACTOR_MOMENTUM_KEY = 'ActorMomentum'
+  ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
+  ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'

-    CRITIC_NET_KEY = 'CriticNet'
-    CRITIC_STEPSIZE_KEY = 'CriticStepsize'
-    CRITIC_MOMENTUM_KEY = 'CriticMomentum'
-    CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
-    
-    EXP_ACTION_FLAG = 1 << 0
+  CRITIC_NET_KEY = 'CriticNet'
+  CRITIC_STEPSIZE_KEY = 'CriticStepsize'
+  CRITIC_MOMENTUM_KEY = 'CriticMomentum'
+  CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'

-    def __init__(self, world, id, json_data): 
-        self._exp_action = False
-        super().__init__(world, id, json_data)
-        return
+  EXP_ACTION_FLAG = 1 << 0

-    def reset(self):
-        super().reset()
-        self._exp_action = False
-        return
+  def __init__(self, world, id, json_data):
+    self._exp_action = False
+    super().__init__(world, id, json_data)
+    return

-    def _check_action_space(self):
-        action_space = self.get_action_space()
-        return action_space == ActionSpace.Continuous
+  def reset(self):
+    super().reset()
+    self._exp_action = False
+    return

-    def _load_params(self, json_data):
-        super()._load_params(json_data)
-        self.val_min, self.val_max = self._calc_val_bounds(self.discount)
-        self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
-        return
+  def _check_action_space(self):
+    action_space = self.get_action_space()
+    return action_space == ActionSpace.Continuous

-    def _build_nets(self, json_data):
-        assert self.ACTOR_NET_KEY in json_data
-        assert self.CRITIC_NET_KEY in json_data
+  def _load_params(self, json_data):
+    super()._load_params(json_data)
+    self.val_min, self.val_max = self._calc_val_bounds(self.discount)
+    self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
+    return

-        actor_net_name = json_data[self.ACTOR_NET_KEY]
-        critic_net_name = json_data[self.CRITIC_NET_KEY]
-        actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
-        
-        s_size = self.get_state_size()
-        g_size = self.get_goal_size()
-        a_size = self.get_action_size()
+  def _build_nets(self, json_data):
+    assert self.ACTOR_NET_KEY in json_data
+    assert self.CRITIC_NET_KEY in json_data

-        # setup input tensors
-        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
-        self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
-        self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
-        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
-        self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
+    actor_net_name = json_data[self.ACTOR_NET_KEY]
+    critic_net_name = json_data[self.CRITIC_NET_KEY]
+    actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
+                                   ) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

-        with tf.variable_scope('main'):
-            with tf.variable_scope('actor'):
-                self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
-            with tf.variable_scope('critic'):
-                self.critic_tf = self._build_net_critic(critic_net_name)
+    s_size = self.get_state_size()
+    g_size = self.get_goal_size()
+    a_size = self.get_action_size()

-        if (self.actor_tf != None):
-            Logger.print2('Built actor net: ' + actor_net_name)
+    # setup input tensors
+    self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")  # observations
+    self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")  # target value s
+    self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")  # advantage
+    self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")  # target actions
+    self.g_tf = tf.placeholder(tf.float32,
+                               shape=([None, g_size] if self.has_goal() else None),
+                               name="g")  # goals

-        if (self.critic_tf != None):
-            Logger.print2('Built critic net: ' + critic_net_name)
+    with tf.variable_scope('main'):
+      with tf.variable_scope('actor'):
+        self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
+      with tf.variable_scope('critic'):
+        self.critic_tf = self._build_net_critic(critic_net_name)

-        return
+    if (self.actor_tf != None):
+      Logger.print2('Built actor net: ' + actor_net_name)

-    def _build_normalizers(self):
-        super()._build_normalizers()
-        with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
-            with tf.variable_scope(self.RESOURCE_SCOPE):
-                val_offset, val_scale = self._calc_val_offset_scale(self.discount)
-                self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
-                self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
-        return
+    if (self.critic_tf != None):
+      Logger.print2('Built critic net: ' + critic_net_name)

-    def _init_normalizers(self):
-        super()._init_normalizers()
-        with self.sess.as_default(), self.graph.as_default():
-            self.val_norm.update()
-        return
+    return

-    def _load_normalizers(self):
-        super()._load_normalizers()
-        self.val_norm.load()
-        return
+  def _build_normalizers(self):
+    super()._build_normalizers()
+    with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
+      with tf.variable_scope(self.RESOURCE_SCOPE):
+        val_offset, val_scale = self._calc_val_offset_scale(self.discount)
+        self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
+        self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
+    return

-    def _build_losses(self, json_data):
-        actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
-        critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
+  def _init_normalizers(self):
+    super()._init_normalizers()
+    with self.sess.as_default(), self.graph.as_default():
+      self.val_norm.update()
+    return

-        norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
-        self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
+  def _load_normalizers(self):
+    super()._load_normalizers()
+    self.val_norm.load()
+    return

-        if (critic_weight_decay != 0):
-            self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
-        
-        norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
-        norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
+  def _build_losses(self, json_data):
+    actor_weight_decay = 0 if (
+        self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
+    critic_weight_decay = 0 if (
+        self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]

-        self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
-        self.actor_loss_tf *= self.adv_tf
-        self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
+    norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
+        self.critic_tf)
+    self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))

-        norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
-        norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
-        a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
-        a_bound_loss /= self.exp_params_curr.noise
-        self.actor_loss_tf += a_bound_loss
+    if (critic_weight_decay != 0):
+      self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')

-        if (actor_weight_decay != 0):
-            self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
-        
-        return
+    norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
+    norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf

-    def _build_solvers(self, json_data):
-        actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
-        actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
-        critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
-        critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
-        
-        critic_vars = self._tf_vars('main/critic')
-        critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
-        self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
-        self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
+    self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
+    self.actor_loss_tf *= self.adv_tf
+    self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)

-        actor_vars = self._tf_vars('main/actor')
-        actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
-        self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
-        self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
+    norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
+    norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
+    a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
+    a_bound_loss /= self.exp_params_curr.noise
+    self.actor_loss_tf += a_bound_loss

-        return
+    if (actor_weight_decay != 0):
+      self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')

-    def _build_net_actor(self, net_name, init_output_scale):
-        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
-        input_tfs = [norm_s_tf]
-        if (self.has_goal()):
-            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
-            input_tfs += [norm_g_tf]
-        
-        h = NetBuilder.build_net(net_name, input_tfs)
-        norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
-                                kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
-        
-        a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
-        return a_tf
-    
-    def _build_net_critic(self, net_name):
-        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
-        input_tfs = [norm_s_tf]
-        if (self.has_goal()):
-            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
-            input_tfs += [norm_g_tf]
-        
-        h = NetBuilder.build_net(net_name, input_tfs)
-        norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
-                                kernel_initializer=TFUtil.xavier_initializer);
+    return

-        norm_val_tf = tf.reshape(norm_val_tf, [-1])
-        val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
-        return val_tf
+  def _build_solvers(self, json_data):
+    actor_stepsize = 0.001 if (
+        self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
+    actor_momentum = 0.9 if (
+        self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
+    critic_stepsize = 0.01 if (
+        self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
+    critic_momentum = 0.9 if (
+        self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]

-    def _initialize_vars(self):
-        super()._initialize_vars()
-        self._sync_solvers()
-        return
+    critic_vars = self._tf_vars('main/critic')
+    critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
+                                            momentum=critic_momentum)
+    self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
+    self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)

-    def _sync_solvers(self):
-        self.actor_solver.sync()
-        self.critic_solver.sync()
-        return
+    actor_vars = self._tf_vars('main/actor')
+    actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
+    self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
+    self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)

-    def _decide_action(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            self._exp_action = False
-            a = self._eval_actor(s, g)[0]
-            logp = 0
+    return

-            if self._enable_stoch_policy():
-                # epsilon-greedy
-                rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
-                if rand_action:
-                    norm_exp_noise = np.random.randn(*a.shape)
-                    norm_exp_noise *= self.exp_params_curr.noise
-                    exp_noise = norm_exp_noise * self.a_norm.std
-                    a += exp_noise
+  def _build_net_actor(self, net_name, init_output_scale):
+    norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
+    input_tfs = [norm_s_tf]
+    if (self.has_goal()):
+      norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
+      input_tfs += [norm_g_tf]

-                    logp = self._calc_action_logp(norm_exp_noise)
-                    self._exp_action = True
+    h = NetBuilder.build_net(net_name, input_tfs)
+    norm_a_tf = tf.layers.dense(inputs=h,
+                                units=self.get_action_size(),
+                                activation=None,
+                                kernel_initializer=tf.random_uniform_initializer(
+                                    minval=-init_output_scale, maxval=init_output_scale))

-        return a, logp
+    a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
+    return a_tf

-    def _enable_stoch_policy(self):
-        return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
+  def _build_net_critic(self, net_name):
+    norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
+    input_tfs = [norm_s_tf]
+    if (self.has_goal()):
+      norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
+      input_tfs += [norm_g_tf]

-    def _eval_actor(self, s, g):
-        s = np.reshape(s, [-1, self.get_state_size()])
-        g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
-          
-        feed = {
-            self.s_tf : s,
-            self.g_tf : g
-        }
+    h = NetBuilder.build_net(net_name, input_tfs)
+    norm_val_tf = tf.layers.dense(inputs=h,
+                                  units=1,
+                                  activation=None,
+                                  kernel_initializer=TFUtil.xavier_initializer)

-        a = self.actor_tf.eval(feed)
-        return a
-    
-    def _eval_critic(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            s = np.reshape(s, [-1, self.get_state_size()])
-            g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
+    norm_val_tf = tf.reshape(norm_val_tf, [-1])
+    val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
+    return val_tf

-            feed = {
-                self.s_tf : s,
-                self.g_tf : g
-            }
+  def _initialize_vars(self):
+    super()._initialize_vars()
+    self._sync_solvers()
+    return

-            val = self.critic_tf.eval(feed)    
-        return val
+  def _sync_solvers(self):
+    self.actor_solver.sync()
+    self.critic_solver.sync()
+    return

-    def _record_flags(self):
-        flags = int(0)
-        if (self._exp_action):
-            flags = flags | self.EXP_ACTION_FLAG
-        return flags
+  def _decide_action(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      self._exp_action = False
+      a = self._eval_actor(s, g)[0]
+      logp = 0

-    def _train_step(self):
-        super()._train_step()
+      if self._enable_stoch_policy():
+        # epsilon-greedy
+        rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
+        if rand_action:
+          norm_exp_noise = np.random.randn(*a.shape)
+          norm_exp_noise *= self.exp_params_curr.noise
+          exp_noise = norm_exp_noise * self.a_norm.std
+          a += exp_noise

-        critic_loss = self._update_critic()
-        actor_loss = self._update_actor()
-        critic_loss = MPIUtil.reduce_avg(critic_loss)
-        actor_loss = MPIUtil.reduce_avg(actor_loss)
+          logp = self._calc_action_logp(norm_exp_noise)
+          self._exp_action = True

-        critic_stepsize = self.critic_solver.get_stepsize()
-        actor_stepsize = self.actor_solver.get_stepsize()
-        
-        self.logger.log_tabular('Critic_Loss', critic_loss)
-        self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
-        self.logger.log_tabular('Actor_Loss', actor_loss) 
-        self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
+    return a, logp

-        return
+  def _enable_stoch_policy(self):
+    return self.enable_training and (self._mode == self.Mode.TRAIN or
+                                     self._mode == self.Mode.TRAIN_END)

-    def _update_critic(self):
-        idx = self.replay_buffer.sample(self._local_mini_batch_size)
-        s = self.replay_buffer.get('states', idx)
-        g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
-        
-        tar_V = self._calc_updated_vals(idx)
-        tar_V = np.clip(tar_V, self.val_min, self.val_max)
+  def _eval_actor(self, s, g):
+    s = np.reshape(s, [-1, self.get_state_size()])
+    g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.tar_val_tf: tar_V
-        }
+    feed = {self.s_tf: s, self.g_tf: g}

-        loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
-        self.critic_solver.update(grads)
-        return loss
-    
-    def _update_actor(self):
-        key = self.EXP_ACTION_FLAG
-        idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
-        has_goal = self.has_goal()
+    a = self.actor_tf.eval(feed)
+    return a

-        s = self.replay_buffer.get('states', idx)
-        g = self.replay_buffer.get('goals', idx) if has_goal else None
-        a = self.replay_buffer.get('actions', idx)
+  def _eval_critic(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      s = np.reshape(s, [-1, self.get_state_size()])
+      g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        V_new = self._calc_updated_vals(idx)
-        V_old = self._eval_critic(s, g)
-        adv = V_new - V_old
+      feed = {self.s_tf: s, self.g_tf: g}

-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.a_tf: a,
-            self.adv_tf: adv
-        }
+      val = self.critic_tf.eval(feed)
+    return val

-        loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
-        self.actor_solver.update(grads)
+  def _record_flags(self):
+    flags = int(0)
+    if (self._exp_action):
+      flags = flags | self.EXP_ACTION_FLAG
+    return flags

-        return loss
+  def _train_step(self):
+    super()._train_step()

-    def _calc_updated_vals(self, idx):
-        r = self.replay_buffer.get('rewards', idx)
+    critic_loss = self._update_critic()
+    actor_loss = self._update_actor()
+    critic_loss = MPIUtil.reduce_avg(critic_loss)
+    actor_loss = MPIUtil.reduce_avg(actor_loss)

-        if self.discount == 0:
-            new_V = r
-        else:
-            next_idx = self.replay_buffer.get_next_idx(idx)
-            s_next = self.replay_buffer.get('states', next_idx)
-            g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
+    critic_stepsize = self.critic_solver.get_stepsize()
+    actor_stepsize = self.actor_solver.get_stepsize()

-            is_end = self.replay_buffer.is_path_end(idx)
-            is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
-            is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
-            is_fail = np.logical_and(is_end, is_fail) 
-            is_succ = np.logical_and(is_end, is_succ) 
+    self.logger.log_tabular('Critic_Loss', critic_loss)
+    self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
+    self.logger.log_tabular('Actor_Loss', actor_loss)
+    self.logger.log_tabular('Actor_Stepsize', actor_stepsize)

-            V_next = self._eval_critic(s_next, g_next)
-            V_next[is_fail] = self.val_fail
-            V_next[is_succ] = self.val_succ
+    return

-            new_V = r + self.discount * V_next
-        return new_V
+  def _update_critic(self):
+    idx = self.replay_buffer.sample(self._local_mini_batch_size)
+    s = self.replay_buffer.get('states', idx)
+    g = self.replay_buffer.get('goals', idx) if self.has_goal() else None

-    def _calc_action_logp(self, norm_action_deltas):
-        # norm action delta are for the normalized actions (scaled by self.a_norm.std) 
-        stdev = self.exp_params_curr.noise
-        assert stdev > 0
+    tar_V = self._calc_updated_vals(idx)
+    tar_V = np.clip(tar_V, self.val_min, self.val_max)

-        a_size = self.get_action_size()
-        logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
-        logp += -0.5 * a_size * np.log(2 * np.pi)
-        logp += -a_size * np.log(stdev)
-        return logp
+    feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_V}

-    def _log_val(self, s, g):
-        val = self._eval_critic(s, g)
-        norm_val = self.val_norm.normalize(val)
-        self.world.env.log_val(self.id, norm_val[0])
-        return
+    loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
+    self.critic_solver.update(grads)
+    return loss

-    def _build_replay_buffer(self, buffer_size):
-        super()._build_replay_buffer(buffer_size)
-        self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
-        return
+  def _update_actor(self):
+    key = self.EXP_ACTION_FLAG
+    idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
+    has_goal = self.has_goal()
+
+    s = self.replay_buffer.get('states', idx)
+    g = self.replay_buffer.get('goals', idx) if has_goal else None
+    a = self.replay_buffer.get('actions', idx)
+
+    V_new = self._calc_updated_vals(idx)
+    V_old = self._eval_critic(s, g)
+    adv = V_new - V_old
+
+    feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv}
+
+    loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
+    self.actor_solver.update(grads)
+
+    return loss
+
+  def _calc_updated_vals(self, idx):
+    r = self.replay_buffer.get('rewards', idx)
+
+    if self.discount == 0:
+      new_V = r
+    else:
+      next_idx = self.replay_buffer.get_next_idx(idx)
+      s_next = self.replay_buffer.get('states', next_idx)
+      g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
+
+      is_end = self.replay_buffer.is_path_end(idx)
+      is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
+      is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
+      is_fail = np.logical_and(is_end, is_fail)
+      is_succ = np.logical_and(is_end, is_succ)
+
+      V_next = self._eval_critic(s_next, g_next)
+      V_next[is_fail] = self.val_fail
+      V_next[is_succ] = self.val_succ
+
+      new_V = r + self.discount * V_next
+    return new_V
+
+  def _calc_action_logp(self, norm_action_deltas):
+    # norm action delta are for the normalized actions (scaled by self.a_norm.std)
+    stdev = self.exp_params_curr.noise
+    assert stdev > 0
+
+    a_size = self.get_action_size()
+    logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
+    logp += -0.5 * a_size * np.log(2 * np.pi)
+    logp += -a_size * np.log(stdev)
+    return logp
+
+  def _log_val(self, s, g):
+    val = self._eval_critic(s, g)
+    norm_val = self.val_norm.normalize(val)
+    self.world.env.log_val(self.id, norm_val[0])
+    return
+
+  def _build_replay_buffer(self, buffer_size):
+    super()._build_replay_buffer(buffer_size)
+    self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
@@ -10,359 +10,374 @@ from pybullet_utils.logger import Logger
 import pybullet_utils.mpi_util as MPIUtil
 import pybullet_utils.math_util as MathUtil
 from pybullet_envs.deep_mimic.env.env import Env
-
 '''
 Proximal Policy Optimization Agent
 '''

+
 class PPOAgent(PGAgent):
-    NAME = "PPO"
-    EPOCHS_KEY = "Epochs"
-    BATCH_SIZE_KEY = "BatchSize"
-    RATIO_CLIP_KEY = "RatioClip"
-    NORM_ADV_CLIP_KEY = "NormAdvClip"
-    TD_LAMBDA_KEY = "TDLambda"
-    TAR_CLIP_FRAC = "TarClipFrac"
-    ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
+  NAME = "PPO"
+  EPOCHS_KEY = "Epochs"
+  BATCH_SIZE_KEY = "BatchSize"
+  RATIO_CLIP_KEY = "RatioClip"
+  NORM_ADV_CLIP_KEY = "NormAdvClip"
+  TD_LAMBDA_KEY = "TDLambda"
+  TAR_CLIP_FRAC = "TarClipFrac"
+  ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"

-    def __init__(self, world, id, json_data): 
-        super().__init__(world, id, json_data)
-        return
+  def __init__(self, world, id, json_data):
+    super().__init__(world, id, json_data)
+    return

-    def _load_params(self, json_data):
-        super()._load_params(json_data)
+  def _load_params(self, json_data):
+    super()._load_params(json_data)

-        self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
-        self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
-        self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
-        self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
-        self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
-        self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
-        self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
+    self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
+    self.batch_size = 1024 if (
+        self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
+    self.ratio_clip = 0.2 if (
+        self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
+    self.norm_adv_clip = 5 if (
+        self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
+    self.td_lambda = 0.95 if (
+        self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
+    self.tar_clip_frac = -1 if (
+        self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
+    self.actor_stepsize_decay = 0.5 if (
+        self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]

-        num_procs = MPIUtil.get_num_procs()
-        local_batch_size = int(self.batch_size / num_procs)
-        min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
-        assert(self.replay_buffer_size > min_replay_size)
+    num_procs = MPIUtil.get_num_procs()
+    local_batch_size = int(self.batch_size / num_procs)
+    min_replay_size = 2 * local_batch_size  # needed to prevent buffer overflow
+    assert (self.replay_buffer_size > min_replay_size)

-        self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
+    self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)

-        return
+    return

-    def _build_nets(self, json_data):
-        assert self.ACTOR_NET_KEY in json_data
-        assert self.CRITIC_NET_KEY in json_data
+  def _build_nets(self, json_data):
+    assert self.ACTOR_NET_KEY in json_data
+    assert self.CRITIC_NET_KEY in json_data

-        actor_net_name = json_data[self.ACTOR_NET_KEY]
-        critic_net_name = json_data[self.CRITIC_NET_KEY]
-        actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
+    actor_net_name = json_data[self.ACTOR_NET_KEY]
+    critic_net_name = json_data[self.CRITIC_NET_KEY]
+    actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
+                                   ) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

-        s_size = self.get_state_size()
-        g_size = self.get_goal_size()
-        a_size = self.get_action_size()
+    s_size = self.get_state_size()
+    g_size = self.get_goal_size()
+    a_size = self.get_action_size()

-        # setup input tensors
-        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
-        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
-        self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
-        self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
-        self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
-        self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
-        self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
+    # setup input tensors
+    self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
+    self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
+    self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
+    self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
+    self.g_tf = tf.placeholder(tf.float32,
+                               shape=([None, g_size] if self.has_goal() else None),
+                               name="g")
+    self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
+    self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")

-        with tf.variable_scope('main'):
-            with tf.variable_scope('actor'):
-                self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
-            with tf.variable_scope('critic'):
-                self.critic_tf = self._build_net_critic(critic_net_name)
-                
-        if (self.a_mean_tf != None):
-            Logger.print2('Built actor net: ' + actor_net_name)
+    with tf.variable_scope('main'):
+      with tf.variable_scope('actor'):
+        self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
+      with tf.variable_scope('critic'):
+        self.critic_tf = self._build_net_critic(critic_net_name)

-        if (self.critic_tf != None):
-            Logger.print2('Built critic net: ' + critic_net_name)
-        
-        self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
-        norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
-        norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
-        self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
-        self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
+    if (self.a_mean_tf != None):
+      Logger.print2('Built actor net: ' + actor_net_name)

-        return
+    if (self.critic_tf != None):
+      Logger.print2('Built critic net: ' + critic_net_name)

-    def _build_losses(self, json_data):
-        actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
-        critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
-        
-        norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
-        self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
+    self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
+    norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
+    norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
+    self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
+    self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf,
+                                                      mean_tf=None,
+                                                      std_tf=self.norm_a_std_tf)

-        if (critic_weight_decay != 0):
-            self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
-        
-        norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
-        self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
+    return

-        self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
-        ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
-        actor_loss0 = self.adv_tf * ratio_tf
-        actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
-        self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
+  def _build_losses(self, json_data):
+    actor_weight_decay = 0 if (
+        self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
+    critic_weight_decay = 0 if (
+        self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]

-        norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
-        norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
-        a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
-        self.actor_loss_tf += a_bound_loss
+    norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
+        self.critic_tf)
+    self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))

-        if (actor_weight_decay != 0):
-            self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
-        
-        # for debugging
-        self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
+    if (critic_weight_decay != 0):
+      self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')

-        return
+    norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
+    self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)

-    def _build_solvers(self, json_data):
-        actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
-        actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
-        critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
-        critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
-        
-        critic_vars = self._tf_vars('main/critic')
-        critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
-        self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
-        self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
+    self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf,
+                                             self.norm_a_std_tf)
+    ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
+    actor_loss0 = self.adv_tf * ratio_tf
+    actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip,
+                                                 1 + self.ratio_clip)
+    self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))

-        self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
-        self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
-        self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
+    norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
+    norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
+    a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
+    self.actor_loss_tf += a_bound_loss

-        actor_vars = self._tf_vars('main/actor')
-        actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
-        self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
-        self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
-        
-        return
+    if (actor_weight_decay != 0):
+      self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')

-    def _decide_action(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
-            #print("_decide_action._exp_action=",self._exp_action)
-            a, logp = self._eval_actor(s, g, self._exp_action)
-        return a[0], logp[0]
+    # for debugging
+    self.clip_frac_tf = tf.reduce_mean(
+        tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))

-    def _eval_actor(self, s, g, enable_exp):
-        s = np.reshape(s, [-1, self.get_state_size()])
-        g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
-          
-        feed = {
-            self.s_tf : s,
-            self.g_tf : g,
-            self.exp_mask_tf: np.array([1 if enable_exp else 0])
-        }
+    return

-        a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
-        return a, logp
+  def _build_solvers(self, json_data):
+    actor_stepsize = 0.001 if (
+        self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
+    actor_momentum = 0.9 if (
+        self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
+    critic_stepsize = 0.01 if (
+        self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
+    critic_momentum = 0.9 if (
+        self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]

-    def _train_step(self):
-        adv_eps = 1e-5
+    critic_vars = self._tf_vars('main/critic')
+    critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
+                                            momentum=critic_momentum)
+    self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
+    self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)

-        start_idx = self.replay_buffer.buffer_tail
-        end_idx = self.replay_buffer.buffer_head
-        assert(start_idx == 0)
-        assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
-        assert(start_idx < end_idx)
+    self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32,
+                                              name='actor_stepsize',
+                                              initializer=actor_stepsize,
+                                              trainable=False)
+    self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
+    self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)

-        idx = np.array(list(range(start_idx, end_idx)))        
-        end_mask = self.replay_buffer.is_path_end(idx)
-        end_mask = np.logical_not(end_mask) 
-        
-        vals = self._compute_batch_vals(start_idx, end_idx)
-        new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
+    actor_vars = self._tf_vars('main/actor')
+    actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf,
+                                           momentum=actor_momentum)
+    self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
+    self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)

-        valid_idx = idx[end_mask]
-        exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
-        num_valid_idx = valid_idx.shape[0]
-        num_exp_idx = exp_idx.shape[0]
-        exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
-        
-        local_sample_count = valid_idx.size
-        global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
-        mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
-        
-        adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
-        new_vals = np.clip(new_vals, self.val_min, self.val_max)
+    return

-        adv_mean = np.mean(adv)
-        adv_std = np.std(adv)
-        adv = (adv - adv_mean) / (adv_std + adv_eps)
-        adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
+  def _decide_action(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(
+          self.exp_params_curr.rate)
+      #print("_decide_action._exp_action=",self._exp_action)
+      a, logp = self._eval_actor(s, g, self._exp_action)
+    return a[0], logp[0]

-        critic_loss = 0
-        actor_loss = 0
-        actor_clip_frac = 0
+  def _eval_actor(self, s, g, enable_exp):
+    s = np.reshape(s, [-1, self.get_state_size()])
+    g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        for e in range(self.epochs):
-            np.random.shuffle(valid_idx)
-            np.random.shuffle(exp_idx)
+    feed = {self.s_tf: s, self.g_tf: g, self.exp_mask_tf: np.array([1 if enable_exp else 0])}

-            for b in range(mini_batches):
-                batch_idx_beg = b * self._local_mini_batch_size
-                batch_idx_end = batch_idx_beg + self._local_mini_batch_size
+    a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
+    return a, logp

-                critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
-                actor_batch = critic_batch.copy()
-                critic_batch = np.mod(critic_batch, num_valid_idx)
-                actor_batch = np.mod(actor_batch, num_exp_idx)
-                shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
+  def _train_step(self):
+    adv_eps = 1e-5

-                critic_batch = valid_idx[critic_batch]
-                actor_batch = exp_idx[actor_batch]
-                critic_batch_vals = new_vals[critic_batch]
-                actor_batch_adv = adv[actor_batch[:,1]]
+    start_idx = self.replay_buffer.buffer_tail
+    end_idx = self.replay_buffer.buffer_head
+    assert (start_idx == 0)
+    assert (self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size
+           )  # must avoid overflow
+    assert (start_idx < end_idx)

-                critic_s = self.replay_buffer.get('states', critic_batch)
-                critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
-                curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
+    idx = np.array(list(range(start_idx, end_idx)))
+    end_mask = self.replay_buffer.is_path_end(idx)
+    end_mask = np.logical_not(end_mask)

-                actor_s = self.replay_buffer.get("states", actor_batch[:,0])
-                actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
-                actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
-                actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
-                curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
-                
-                critic_loss += curr_critic_loss
-                actor_loss += np.abs(curr_actor_loss)
-                actor_clip_frac += curr_actor_clip_frac
+    vals = self._compute_batch_vals(start_idx, end_idx)
+    new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)

-                if (shuffle_actor):
-                    np.random.shuffle(exp_idx)
+    valid_idx = idx[end_mask]
+    exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
+    num_valid_idx = valid_idx.shape[0]
+    num_exp_idx = exp_idx.shape[0]
+    exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])

-        total_batches = mini_batches * self.epochs
-        critic_loss /= total_batches
-        actor_loss /= total_batches
-        actor_clip_frac /= total_batches
+    local_sample_count = valid_idx.size
+    global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
+    mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))

-        critic_loss = MPIUtil.reduce_avg(critic_loss)
-        actor_loss = MPIUtil.reduce_avg(actor_loss)
-        actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
+    adv = new_vals[exp_idx[:, 0]] - vals[exp_idx[:, 0]]
+    new_vals = np.clip(new_vals, self.val_min, self.val_max)

-        critic_stepsize = self.critic_solver.get_stepsize()
-        actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
+    adv_mean = np.mean(adv)
+    adv_std = np.std(adv)
+    adv = (adv - adv_mean) / (adv_std + adv_eps)
+    adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)

-        self.logger.log_tabular('Critic_Loss', critic_loss)
-        self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
-        self.logger.log_tabular('Actor_Loss', actor_loss) 
-        self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
-        self.logger.log_tabular('Clip_Frac', actor_clip_frac)
-        self.logger.log_tabular('Adv_Mean', adv_mean)
-        self.logger.log_tabular('Adv_Std', adv_std)
+    critic_loss = 0
+    actor_loss = 0
+    actor_clip_frac = 0

-        self.replay_buffer.clear()
+    for e in range(self.epochs):
+      np.random.shuffle(valid_idx)
+      np.random.shuffle(exp_idx)

-        return
+      for b in range(mini_batches):
+        batch_idx_beg = b * self._local_mini_batch_size
+        batch_idx_end = batch_idx_beg + self._local_mini_batch_size

-    def _get_iters_per_update(self):
-        return 1
+        critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
+        actor_batch = critic_batch.copy()
+        critic_batch = np.mod(critic_batch, num_valid_idx)
+        actor_batch = np.mod(actor_batch, num_exp_idx)
+        shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)

-    def _valid_train_step(self):
-        samples = self.replay_buffer.get_current_size()
-        exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
-        global_sample_count = int(MPIUtil.reduce_sum(samples))
-        global_exp_min = int(MPIUtil.reduce_min(exp_samples))
-        return (global_sample_count > self.batch_size) and (global_exp_min > 0)
+        critic_batch = valid_idx[critic_batch]
+        actor_batch = exp_idx[actor_batch]
+        critic_batch_vals = new_vals[critic_batch]
+        actor_batch_adv = adv[actor_batch[:, 1]]

-    def _compute_batch_vals(self, start_idx, end_idx):
-        states = self.replay_buffer.get_all("states")[start_idx:end_idx]
-        goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
-        
-        idx = np.array(list(range(start_idx, end_idx)))        
-        is_end = self.replay_buffer.is_path_end(idx)
-        is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
-        is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
-        is_fail = np.logical_and(is_end, is_fail) 
-        is_succ = np.logical_and(is_end, is_succ) 
+        critic_s = self.replay_buffer.get('states', critic_batch)
+        critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
+        curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)

-        vals = self._eval_critic(states, goals)
-        vals[is_fail] = self.val_fail
-        vals[is_succ] = self.val_succ
+        actor_s = self.replay_buffer.get("states", actor_batch[:, 0])
+        actor_g = self.replay_buffer.get("goals", actor_batch[:, 0]) if self.has_goal() else None
+        actor_a = self.replay_buffer.get("actions", actor_batch[:, 0])
+        actor_logp = self.replay_buffer.get("logps", actor_batch[:, 0])
+        curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a,
+                                                                   actor_logp, actor_batch_adv)

-        return vals
+        critic_loss += curr_critic_loss
+        actor_loss += np.abs(curr_actor_loss)
+        actor_clip_frac += curr_actor_clip_frac

-    def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
-        rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
+        if (shuffle_actor):
+          np.random.shuffle(exp_idx)

-        if self.discount == 0:
-            new_vals = rewards.copy()
+    total_batches = mini_batches * self.epochs
+    critic_loss /= total_batches
+    actor_loss /= total_batches
+    actor_clip_frac /= total_batches
+
+    critic_loss = MPIUtil.reduce_avg(critic_loss)
+    actor_loss = MPIUtil.reduce_avg(actor_loss)
+    actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
+
+    critic_stepsize = self.critic_solver.get_stepsize()
+    actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
+
+    self.logger.log_tabular('Critic_Loss', critic_loss)
+    self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
+    self.logger.log_tabular('Actor_Loss', actor_loss)
+    self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
+    self.logger.log_tabular('Clip_Frac', actor_clip_frac)
+    self.logger.log_tabular('Adv_Mean', adv_mean)
+    self.logger.log_tabular('Adv_Std', adv_std)
+
+    self.replay_buffer.clear()
+
+    return
+
+  def _get_iters_per_update(self):
+    return 1
+
+  def _valid_train_step(self):
+    samples = self.replay_buffer.get_current_size()
+    exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
+    global_sample_count = int(MPIUtil.reduce_sum(samples))
+    global_exp_min = int(MPIUtil.reduce_min(exp_samples))
+    return (global_sample_count > self.batch_size) and (global_exp_min > 0)
+
+  def _compute_batch_vals(self, start_idx, end_idx):
+    states = self.replay_buffer.get_all("states")[start_idx:end_idx]
+    goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
+
+    idx = np.array(list(range(start_idx, end_idx)))
+    is_end = self.replay_buffer.is_path_end(idx)
+    is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
+    is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
+    is_fail = np.logical_and(is_end, is_fail)
+    is_succ = np.logical_and(is_end, is_succ)
+
+    vals = self._eval_critic(states, goals)
+    vals[is_fail] = self.val_fail
+    vals[is_succ] = self.val_succ
+
+    return vals
+
+  def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
+    rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
+
+    if self.discount == 0:
+      new_vals = rewards.copy()
+    else:
+      new_vals = np.zeros_like(val_buffer)
+
+      curr_idx = start_idx
+      while curr_idx < end_idx:
+        idx0 = curr_idx - start_idx
+        idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
+        r = rewards[idx0:idx1]
+        v = val_buffer[idx0:(idx1 + 1)]
+
+        new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
+        curr_idx = idx1 + start_idx + 1
+
+    return new_vals
+
+  def _update_critic(self, s, g, tar_vals):
+    feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_vals}
+
+    loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
+    self.critic_solver.update(grads)
+    return loss
+
+  def _update_actor(self, s, g, a, logp, adv):
+    feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv, self.old_logp_tf: logp}
+
+    loss, grads, clip_frac = self.sess.run(
+        [self.actor_loss_tf, self.actor_grad_tf, self.clip_frac_tf], feed)
+    self.actor_solver.update(grads)
+
+    return loss, clip_frac
+
+  def update_actor_stepsize(self, clip_frac):
+    clip_tol = 1.5
+    step_scale = 2
+    max_stepsize = 1e-2
+    min_stepsize = 1e-8
+    warmup_iters = 5
+
+    actor_stepsize = self.actor_solver.get_stepsize()
+    if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
+      min_clip = self.tar_clip_frac / clip_tol
+      max_clip = self.tar_clip_frac * clip_tol
+      under_tol = clip_frac < min_clip
+      over_tol = clip_frac > max_clip
+
+      if (over_tol or under_tol):
+        if (over_tol):
+          actor_stepsize *= self.actor_stepsize_decay
        else:
-            new_vals = np.zeros_like(val_buffer)
+          actor_stepsize /= self.actor_stepsize_decay

-            curr_idx = start_idx
-            while curr_idx < end_idx:
-                idx0 = curr_idx - start_idx
-                idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
-                r = rewards[idx0:idx1]
-                v = val_buffer[idx0:(idx1 + 1)]
+        actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
+        self.set_actor_stepsize(actor_stepsize)

-                new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
-                curr_idx = idx1 + start_idx + 1
-        
-        return new_vals
+    return actor_stepsize

-    def _update_critic(self, s, g, tar_vals):
-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.tar_val_tf: tar_vals
-        }
-
-        loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
-        self.critic_solver.update(grads)
-        return loss
-    
-    def _update_actor(self, s, g, a, logp, adv):
-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.a_tf: a,
-            self.adv_tf: adv,
-            self.old_logp_tf: logp
-        }
-
-        loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
-                                                        self.clip_frac_tf], feed)
-        self.actor_solver.update(grads)
-
-        return loss, clip_frac
-
-    def update_actor_stepsize(self, clip_frac):
-        clip_tol = 1.5
-        step_scale = 2
-        max_stepsize = 1e-2
-        min_stepsize = 1e-8
-        warmup_iters = 5
-
-        actor_stepsize = self.actor_solver.get_stepsize()
-        if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
-            min_clip = self.tar_clip_frac / clip_tol
-            max_clip = self.tar_clip_frac * clip_tol
-            under_tol = clip_frac < min_clip
-            over_tol = clip_frac > max_clip
-
-            if (over_tol or under_tol):
-                if (over_tol):
-                    actor_stepsize *= self.actor_stepsize_decay
-                else:
-                    actor_stepsize /= self.actor_stepsize_decay
-
-                actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
-                self.set_actor_stepsize(actor_stepsize)
-
-        return actor_stepsize
-
-    def set_actor_stepsize(self, stepsize):
-        feed = {
-            self._actor_stepsize_ph: stepsize,
-        }
-        self.sess.run(self._actor_stepsize_update_op, feed)
-        return
+  def set_actor_stepsize(self, stepsize):
+    feed = {
+        self._actor_stepsize_ph: stepsize,
+    }
+    self.sess.run(self._actor_stepsize_update_op, feed)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py
@@ -5,347 +5,353 @@ import inspect as inspect
 from pybullet_envs.deep_mimic.env.env import Env
 import pybullet_utils.math_util as MathUtil

+
 class ReplayBuffer(object):
-    TERMINATE_KEY = 'terminate'
-    PATH_START_KEY = 'path_start'
-    PATH_END_KEY = 'path_end'
+  TERMINATE_KEY = 'terminate'
+  PATH_START_KEY = 'path_start'
+  PATH_END_KEY = 'path_end'

-    def __init__(self, buffer_size):
-        assert buffer_size > 0
+  def __init__(self, buffer_size):
+    assert buffer_size > 0

-        self.buffer_size = buffer_size
-        self.total_count = 0
-        self.buffer_head = 0
-        self.buffer_tail = MathUtil.INVALID_IDX
-        self.num_paths = 0
-        self._sample_buffers = dict()
-        self.buffers = None
+    self.buffer_size = buffer_size
+    self.total_count = 0
+    self.buffer_head = 0
+    self.buffer_tail = MathUtil.INVALID_IDX
+    self.num_paths = 0
+    self._sample_buffers = dict()
+    self.buffers = None

-        self.clear()
-        return
+    self.clear()
+    return

-    def sample(self, n):
-        curr_size = self.get_current_size()
-        assert curr_size > 0
+  def sample(self, n):
+    curr_size = self.get_current_size()
+    assert curr_size > 0

-        idx = np.empty(n, dtype=int)
-        # makes sure that the end states are not sampled
-        for i in range(n):
-            while True:
-                curr_idx = np.random.randint(0, curr_size, size=1)[0]
-                curr_idx += self.buffer_tail
-                curr_idx = np.mod(curr_idx, self.buffer_size)
+    idx = np.empty(n, dtype=int)
+    # makes sure that the end states are not sampled
+    for i in range(n):
+      while True:
+        curr_idx = np.random.randint(0, curr_size, size=1)[0]
+        curr_idx += self.buffer_tail
+        curr_idx = np.mod(curr_idx, self.buffer_size)

-                if not self.is_path_end(curr_idx):
-                    break
-            idx[i] = curr_idx
+        if not self.is_path_end(curr_idx):
+          break
+      idx[i] = curr_idx

-        return idx
+    return idx

-    def sample_filtered(self, n, key):
-        assert key in self._sample_buffers
-        curr_buffer = self._sample_buffers[key]
-        idx = curr_buffer.sample(n)
-        return idx
+  def sample_filtered(self, n, key):
+    assert key in self._sample_buffers
+    curr_buffer = self._sample_buffers[key]
+    idx = curr_buffer.sample(n)
+    return idx

-    def count_filtered(self, key):
-        curr_buffer = self._sample_buffers[key]
-        return curr_buffer.count
+  def count_filtered(self, key):
+    curr_buffer = self._sample_buffers[key]
+    return curr_buffer.count

-    def get(self, key, idx):
-        return self.buffers[key][idx]
+  def get(self, key, idx):
+    return self.buffers[key][idx]

-    def get_all(self, key):
-        return self.buffers[key]
+  def get_all(self, key):
+    return self.buffers[key]

-    def get_idx_filtered(self, key):
-        assert key in self._sample_buffers
-        curr_buffer = self._sample_buffers[key]
-        idx = curr_buffer.slot_to_idx[:curr_buffer.count]
-        return idx
-    
-    def get_path_start(self, idx):
-        return self.buffers[self.PATH_START_KEY][idx]
+  def get_idx_filtered(self, key):
+    assert key in self._sample_buffers
+    curr_buffer = self._sample_buffers[key]
+    idx = curr_buffer.slot_to_idx[:curr_buffer.count]
+    return idx

-    def get_path_end(self, idx):
-        return self.buffers[self.PATH_END_KEY][idx]
+  def get_path_start(self, idx):
+    return self.buffers[self.PATH_START_KEY][idx]

-    def get_pathlen(self, idx):
-        is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
-        if not is_array:
-            idx = [idx]
+  def get_path_end(self, idx):
+    return self.buffers[self.PATH_END_KEY][idx]

-        n = len(idx)
-        start_idx = self.get_path_start(idx)
-        end_idx = self.get_path_end(idx)
-        pathlen = np.empty(n, dtype=int)
+  def get_pathlen(self, idx):
+    is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
+    if not is_array:
+      idx = [idx]

-        for i in range(n):
-            curr_start = start_idx[i]
-            curr_end = end_idx[i]
-            if curr_start < curr_end:
-                curr_len = curr_end - curr_start
-            else:
-                curr_len = self.buffer_size - curr_start + curr_end
-            pathlen[i] = curr_len
+    n = len(idx)
+    start_idx = self.get_path_start(idx)
+    end_idx = self.get_path_end(idx)
+    pathlen = np.empty(n, dtype=int)

-        if not is_array:
-            pathlen = pathlen[0]
+    for i in range(n):
+      curr_start = start_idx[i]
+      curr_end = end_idx[i]
+      if curr_start < curr_end:
+        curr_len = curr_end - curr_start
+      else:
+        curr_len = self.buffer_size - curr_start + curr_end
+      pathlen[i] = curr_len

-        return pathlen
+    if not is_array:
+      pathlen = pathlen[0]

-    def is_valid_path(self, idx):
-        start_idx = self.get_path_start(idx)
-        valid = start_idx != MathUtil.INVALID_IDX
-        return valid
+    return pathlen

-    def store(self, path):
-        start_idx = MathUtil.INVALID_IDX
-        n = path.pathlength()
-        
-        if (n > 0):
-            assert path.is_valid()
+  def is_valid_path(self, idx):
+    start_idx = self.get_path_start(idx)
+    valid = start_idx != MathUtil.INVALID_IDX
+    return valid

-            if path.check_vals():
-                if self.buffers is None:
-                    self._init_buffers(path)
+  def store(self, path):
+    start_idx = MathUtil.INVALID_IDX
+    n = path.pathlength()

-                idx = self._request_idx(n + 1)
-                self._store_path(path, idx)
-                self._add_sample_buffers(idx)
+    if (n > 0):
+      assert path.is_valid()

-                self.num_paths += 1
-                self.total_count += n + 1
-                start_idx = idx[0]
-            else:
-                Logger.print2('Invalid path data value detected')
-        
-        return start_idx
+      if path.check_vals():
+        if self.buffers is None:
+          self._init_buffers(path)

-    def clear(self):
-        self.buffer_head = 0
-        self.buffer_tail = MathUtil.INVALID_IDX
-        self.num_paths = 0
+        idx = self._request_idx(n + 1)
+        self._store_path(path, idx)
+        self._add_sample_buffers(idx)

-        for key in self._sample_buffers:
-            self._sample_buffers[key].clear()
-        return
+        self.num_paths += 1
+        self.total_count += n + 1
+        start_idx = idx[0]
+      else:
+        Logger.print2('Invalid path data value detected')

-    def get_next_idx(self, idx):
-        next_idx = np.mod(idx + 1, self.buffer_size)
-        return next_idx
+    return start_idx

-    def is_terminal_state(self, idx):
-        terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
-        terminate = terminate_flags != Env.Terminate.Null.value
-        is_end = self.is_path_end(idx)
-        terminal_state = np.logical_and(terminate, is_end)
-        return terminal_state
+  def clear(self):
+    self.buffer_head = 0
+    self.buffer_tail = MathUtil.INVALID_IDX
+    self.num_paths = 0

-    def check_terminal_flag(self, idx, flag):
-        terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
-        terminate = terminate_flags == flag.value
-        return terminate
+    for key in self._sample_buffers:
+      self._sample_buffers[key].clear()
+    return

-    def is_path_end(self, idx):
-        is_end = self.buffers[self.PATH_END_KEY][idx] == idx
-        return is_end
+  def get_next_idx(self, idx):
+    next_idx = np.mod(idx + 1, self.buffer_size)
+    return next_idx

-    def add_filter_key(self, key):
-        assert self.get_current_size() == 0
-        if key not in self._sample_buffers:
-            self._sample_buffers[key] = SampleBuffer(self.buffer_size)
-        return
+  def is_terminal_state(self, idx):
+    terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
+    terminate = terminate_flags != Env.Terminate.Null.value
+    is_end = self.is_path_end(idx)
+    terminal_state = np.logical_and(terminate, is_end)
+    return terminal_state

-    def get_current_size(self):
-        if self.buffer_tail == MathUtil.INVALID_IDX:
-            return 0
-        elif self.buffer_tail < self.buffer_head:
-            return self.buffer_head - self.buffer_tail
+  def check_terminal_flag(self, idx, flag):
+    terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
+    terminate = terminate_flags == flag.value
+    return terminate
+
+  def is_path_end(self, idx):
+    is_end = self.buffers[self.PATH_END_KEY][idx] == idx
+    return is_end
+
+  def add_filter_key(self, key):
+    assert self.get_current_size() == 0
+    if key not in self._sample_buffers:
+      self._sample_buffers[key] = SampleBuffer(self.buffer_size)
+    return
+
+  def get_current_size(self):
+    if self.buffer_tail == MathUtil.INVALID_IDX:
+      return 0
+    elif self.buffer_tail < self.buffer_head:
+      return self.buffer_head - self.buffer_tail
+    else:
+      return self.buffer_size - self.buffer_tail + self.buffer_head
+
+  def _check_flags(self, key, flags):
+    return (flags & key) == key
+
+  def _add_sample_buffers(self, idx):
+    flags = self.buffers['flags']
+    for key in self._sample_buffers:
+      curr_buffer = self._sample_buffers[key]
+      filter_idx = [
+          i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))
+      ]
+      curr_buffer.add(filter_idx)
+    return
+
+  def _free_sample_buffers(self, idx):
+    for key in self._sample_buffers:
+      curr_buffer = self._sample_buffers[key]
+      curr_buffer.free(idx)
+    return
+
+  def _init_buffers(self, path):
+    self.buffers = dict()
+    self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
+    self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
+
+    for key in dir(path):
+      val = getattr(path, key)
+      if not key.startswith('__') and not inspect.ismethod(val):
+        if key == self.TERMINATE_KEY:
+          self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
        else:
-            return self.buffer_size - self.buffer_tail + self.buffer_head
+          val_type = type(val[0])
+          is_array = val_type == np.ndarray
+          if is_array:
+            shape = [self.buffer_size, val[0].shape[0]]
+            dtype = val[0].dtype
+          else:
+            shape = [self.buffer_size]
+            dtype = val_type

-    def _check_flags(self, key, flags):
-        return (flags & key) == key
+          self.buffers[key] = np.zeros(shape, dtype=dtype)
+    return

-    def _add_sample_buffers(self, idx):
-        flags = self.buffers['flags']
-        for key in self._sample_buffers:
-            curr_buffer = self._sample_buffers[key]
-            filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
-            curr_buffer.add(filter_idx)
-        return
+  def _request_idx(self, n):
+    assert n + 1 < self.buffer_size  # bad things can happen if path is too long

-    def _free_sample_buffers(self, idx):
-        for key in self._sample_buffers:
-            curr_buffer = self._sample_buffers[key]
-            curr_buffer.free(idx)    
-        return
+    remainder = n
+    idx = []

-    def _init_buffers(self, path):
-        self.buffers = dict()
-        self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
-        self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
+    start_idx = self.buffer_head
+    while remainder > 0:
+      end_idx = np.minimum(start_idx + remainder, self.buffer_size)
+      remainder -= (end_idx - start_idx)

-        for key in dir(path):
-            val = getattr(path, key)
-            if not key.startswith('__') and not inspect.ismethod(val):
-                if key == self.TERMINATE_KEY:
-                    self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
-                else:
-                    val_type = type(val[0])
-                    is_array = val_type == np.ndarray
-                    if is_array:
-                        shape = [self.buffer_size, val[0].shape[0]]
-                        dtype = val[0].dtype
-                    else:
-                        shape = [self.buffer_size]
-                        dtype = val_type
-                    
-                    self.buffers[key] = np.zeros(shape, dtype=dtype)
-        return
+      free_idx = list(range(start_idx, end_idx))
+      self._free_idx(free_idx)
+      idx += free_idx
+      start_idx = 0

-    def _request_idx(self, n):
-        assert n + 1 < self.buffer_size # bad things can happen if path is too long
+    self.buffer_head = (self.buffer_head + n) % self.buffer_size
+    return idx

-        remainder = n
-        idx = []
+  def _free_idx(self, idx):
+    assert (idx[0] <= idx[-1])
+    n = len(idx)
+    if self.buffer_tail != MathUtil.INVALID_IDX:
+      update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
+      update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or
+                                           idx[-1] >= self.buffer_tail)

-        start_idx = self.buffer_head
-        while remainder > 0:
-            end_idx = np.minimum(start_idx + remainder, self.buffer_size)
-            remainder -= (end_idx - start_idx)
+      if update_tail:
+        i = 0
+        while i < n:
+          curr_idx = idx[i]
+          if self.is_valid_path(curr_idx):
+            start_idx = self.get_path_start(curr_idx)
+            end_idx = self.get_path_end(curr_idx)
+            pathlen = self.get_pathlen(curr_idx)

-            free_idx = list(range(start_idx, end_idx))
-            self._free_idx(free_idx)
-            idx += free_idx
-            start_idx = 0
+            if start_idx < end_idx:
+              self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
+              self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
+            else:
+              self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
+              self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
+              self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
+              self._free_sample_buffers(list(range(0, end_idx + 1)))

-        self.buffer_head = (self.buffer_head + n) % self.buffer_size
-        return idx
+            self.num_paths -= 1
+            i += pathlen + 1
+            self.buffer_tail = (end_idx + 1) % self.buffer_size
+          else:
+            i += 1
+    else:
+      self.buffer_tail = idx[0]
+    return

-    def _free_idx(self, idx):
-        assert(idx[0] <= idx[-1])
-        n = len(idx)
-        if self.buffer_tail != MathUtil.INVALID_IDX:
-            update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
-            update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
-            
-            if update_tail:
-                i = 0
-                while i < n:
-                    curr_idx = idx[i]
-                    if self.is_valid_path(curr_idx):
-                        start_idx = self.get_path_start(curr_idx)
-                        end_idx = self.get_path_end(curr_idx)
-                        pathlen = self.get_pathlen(curr_idx)
+  def _store_path(self, path, idx):
+    n = path.pathlength()
+    for key, data in self.buffers.items():
+      if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
+        val = getattr(path, key)
+        val_len = len(val)
+        assert val_len == n or val_len == n + 1
+        data[idx[:val_len]] = val

-                        if start_idx < end_idx:
-                            self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
-                            self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
-                        else:
-                            self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
-                            self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
-                            self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
-                            self._free_sample_buffers(list(range(0, end_idx + 1)))
-                        
-                        self.num_paths -= 1
-                        i += pathlen + 1
-                        self.buffer_tail = (end_idx + 1) % self.buffer_size;
-                    else:
-                        i += 1
-        else:
-            self.buffer_tail = idx[0]
-        return
+    self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
+    self.buffers[self.PATH_START_KEY][idx] = idx[0]
+    self.buffers[self.PATH_END_KEY][idx] = idx[-1]
+    return

-    def _store_path(self, path, idx):
-        n = path.pathlength()
-        for key, data in self.buffers.items():
-            if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
-                val = getattr(path, key)
-                val_len = len(val)
-                assert val_len == n or val_len == n + 1
-                data[idx[:val_len]] = val
-
-        self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
-        self.buffers[self.PATH_START_KEY][idx] = idx[0]
-        self.buffers[self.PATH_END_KEY][idx] = idx[-1]
-        return

 class SampleBuffer(object):
-    def __init__(self, size):
-        self.idx_to_slot = np.empty(shape=[size], dtype=int)
-        self.slot_to_idx = np.empty(shape=[size], dtype=int)
-        self.count = 0
-        self.clear()
-        return
-    
-    def clear(self):
-        self.idx_to_slot.fill(MathUtil.INVALID_IDX)
-        self.slot_to_idx.fill(MathUtil.INVALID_IDX)
-        self.count = 0
-        return

-    def is_valid(self, idx):
-        return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
+  def __init__(self, size):
+    self.idx_to_slot = np.empty(shape=[size], dtype=int)
+    self.slot_to_idx = np.empty(shape=[size], dtype=int)
+    self.count = 0
+    self.clear()
+    return

-    def get_size(self):
-        return self.idx_to_slot.shape[0]
+  def clear(self):
+    self.idx_to_slot.fill(MathUtil.INVALID_IDX)
+    self.slot_to_idx.fill(MathUtil.INVALID_IDX)
+    self.count = 0
+    return

-    def add(self, idx):
-        for i in idx:
-            if not self.is_valid(i):
-                new_slot = self.count
-                assert new_slot >= 0
+  def is_valid(self, idx):
+    return self.idx_to_slot[idx] != MathUtil.INVALID_IDX

-                self.idx_to_slot[i] = new_slot
-                self.slot_to_idx[new_slot] = i
-                self.count += 1
-        return
+  def get_size(self):
+    return self.idx_to_slot.shape[0]

-    def free(self, idx):
-        for i in idx:
-            if self.is_valid(i):
-                slot = self.idx_to_slot[i]
-                last_slot = self.count - 1
-                last_idx = self.slot_to_idx[last_slot]
+  def add(self, idx):
+    for i in idx:
+      if not self.is_valid(i):
+        new_slot = self.count
+        assert new_slot >= 0

-                self.idx_to_slot[last_idx] = slot
-                self.slot_to_idx[slot] = last_idx
-                self.idx_to_slot[i] = MathUtil.INVALID_IDX
-                self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
-                self.count -= 1
-        return
+        self.idx_to_slot[i] = new_slot
+        self.slot_to_idx[new_slot] = i
+        self.count += 1
+    return

-    def sample(self, n):
-        if self.count > 0:
-            slots = np.random.randint(0, self.count, size=n)
-            idx = self.slot_to_idx[slots]
-        else:
-            idx = np.empty(shape=[0], dtype=int)
-        return idx
+  def free(self, idx):
+    for i in idx:
+      if self.is_valid(i):
+        slot = self.idx_to_slot[i]
+        last_slot = self.count - 1
+        last_idx = self.slot_to_idx[last_slot]

-    def check_consistency(self):
-        valid = True
-        if self.count < 0:
+        self.idx_to_slot[last_idx] = slot
+        self.slot_to_idx[slot] = last_idx
+        self.idx_to_slot[i] = MathUtil.INVALID_IDX
+        self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
+        self.count -= 1
+    return
+
+  def sample(self, n):
+    if self.count > 0:
+      slots = np.random.randint(0, self.count, size=n)
+      idx = self.slot_to_idx[slots]
+    else:
+      idx = np.empty(shape=[0], dtype=int)
+    return idx
+
+  def check_consistency(self):
+    valid = True
+    if self.count < 0:
+      valid = False
+
+    if valid:
+      for i in range(self.get_size()):
+        if self.is_valid(i):
+          s = self.idx_to_slot[i]
+          if self.slot_to_idx[s] != i:
            valid = False
+            break

-        if valid:
-            for i in range(self.get_size()):
-                if self.is_valid(i):
-                    s = self.idx_to_slot[i]
-                    if self.slot_to_idx[s] != i:
-                        valid = False
-                        break
+        s2i = self.slot_to_idx[i]
+        if s2i != MathUtil.INVALID_IDX:
+          i2s = self.idx_to_slot[s2i]
+          if i2s != i:
+            valid = False
+            break

-                s2i = self.slot_to_idx[i] 
-                if s2i != MathUtil.INVALID_IDX:
-                    i2s = self.idx_to_slot[s2i]
-                    if i2s != i:
-                        valid = False
-                        break
-
-        count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
-        count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
-        valid &= count0 == count1
-        return valid
+    count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
+    count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
+    valid &= count0 == count1
+    return valid
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py
@@ -1,18 +1,19 @@
 import numpy as np

+
 def compute_return(rewards, gamma, td_lambda, val_t):
-    # computes td-lambda return of path
-    path_len = len(rewards)
-    assert len(val_t) == path_len + 1
+  # computes td-lambda return of path
+  path_len = len(rewards)
+  assert len(val_t) == path_len + 1

-    return_t = np.zeros(path_len)
-    last_val = rewards[-1] + gamma * val_t[-1]
-    return_t[-1] = last_val
+  return_t = np.zeros(path_len)
+  last_val = rewards[-1] + gamma * val_t[-1]
+  return_t[-1] = last_val

-    for i in reversed(range(0, path_len - 1)):
-        curr_r = rewards[i]
-        next_ret = return_t[i + 1]
-        curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
-        return_t[i] = curr_val
-    
-    return return_t
+  for i in reversed(range(0, path_len - 1)):
+    curr_r = rewards[i]
+    next_ret = return_t[i + 1]
+    curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
+    return_t[i] = curr_val
+
+  return return_t
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
@@ -5,139 +5,140 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
 import pybullet_data

+
 class RLWorld(object):
-    def __init__(self, env, arg_parser):
-        TFUtil.disable_gpu()

-        self.env = env
-        self.arg_parser = arg_parser
-        self._enable_training = True
-        self.train_agents = []
-        self.parse_args(arg_parser)
+  def __init__(self, env, arg_parser):
+    TFUtil.disable_gpu()

-        self.build_agents()
-        
-        return
+    self.env = env
+    self.arg_parser = arg_parser
+    self._enable_training = True
+    self.train_agents = []
+    self.parse_args(arg_parser)

-    def get_enable_training(self):
-        return self._enable_training
-    
-    def set_enable_training(self, enable):
-        self._enable_training = enable
-        for i in range(len(self.agents)):
-            curr_agent = self.agents[i]
-            if curr_agent is not None:
-                enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
-                curr_agent.enable_training = self.enable_training and enable_curr_train
+    self.build_agents()

-        if (self._enable_training):
-            self.env.set_mode(RLAgent.Mode.TRAIN)
-        else:
-            self.env.set_mode(RLAgent.Mode.TEST)
+    return

-        return
+  def get_enable_training(self):
+    return self._enable_training

-    enable_training = property(get_enable_training, set_enable_training)
-    
-    def parse_args(self, arg_parser):
-        self.train_agents = self.arg_parser.parse_bools('train_agents')
-        num_agents = self.env.get_num_agents()
-        assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
+  def set_enable_training(self, enable):
+    self._enable_training = enable
+    for i in range(len(self.agents)):
+      curr_agent = self.agents[i]
+      if curr_agent is not None:
+        enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
+        curr_agent.enable_training = self.enable_training and enable_curr_train

-        return
+    if (self._enable_training):
+      self.env.set_mode(RLAgent.Mode.TRAIN)
+    else:
+      self.env.set_mode(RLAgent.Mode.TEST)

-    def shutdown(self):
-        self.env.shutdown()
-        return
+    return

-    def build_agents(self):
-        num_agents = self.env.get_num_agents()
-        print("num_agents=",num_agents)
-        self.agents = []
+  enable_training = property(get_enable_training, set_enable_training)

-        Logger.print2('')
-        Logger.print2('Num Agents: {:d}'.format(num_agents))
+  def parse_args(self, arg_parser):
+    self.train_agents = self.arg_parser.parse_bools('train_agents')
+    num_agents = self.env.get_num_agents()
+    assert (len(self.train_agents) == num_agents or len(self.train_agents) == 0)

-        agent_files = self.arg_parser.parse_strings('agent_files')
-        print("len(agent_files)=",len(agent_files))
-        assert(len(agent_files) == num_agents or len(agent_files) == 0)
+    return

-        model_files = self.arg_parser.parse_strings('model_files')
-        assert(len(model_files) == num_agents or len(model_files) == 0)
+  def shutdown(self):
+    self.env.shutdown()
+    return

-        output_path = self.arg_parser.parse_string('output_path')
-        int_output_path = self.arg_parser.parse_string('int_output_path')
+  def build_agents(self):
+    num_agents = self.env.get_num_agents()
+    print("num_agents=", num_agents)
+    self.agents = []

-        for i in range(num_agents):
-            curr_file = agent_files[i]
-            curr_agent = self._build_agent(i, curr_file)
+    Logger.print2('')
+    Logger.print2('Num Agents: {:d}'.format(num_agents))

-            if curr_agent is not None:
-                curr_agent.output_dir = output_path
-                curr_agent.int_output_dir = int_output_path
-                Logger.print2(str(curr_agent))
+    agent_files = self.arg_parser.parse_strings('agent_files')
+    print("len(agent_files)=", len(agent_files))
+    assert (len(agent_files) == num_agents or len(agent_files) == 0)

-                if (len(model_files) > 0):
-                    curr_model_file = model_files[i]
-                    if curr_model_file != 'none':
-                        curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
+    model_files = self.arg_parser.parse_strings('model_files')
+    assert (len(model_files) == num_agents or len(model_files) == 0)

-            self.agents.append(curr_agent)
-            Logger.print2('')
+    output_path = self.arg_parser.parse_string('output_path')
+    int_output_path = self.arg_parser.parse_string('int_output_path')

-        self.set_enable_training(self.enable_training)
+    for i in range(num_agents):
+      curr_file = agent_files[i]
+      curr_agent = self._build_agent(i, curr_file)

-        return
+      if curr_agent is not None:
+        curr_agent.output_dir = output_path
+        curr_agent.int_output_dir = int_output_path
+        Logger.print2(str(curr_agent))

-    def update(self, timestep):
-        #print("world update!\n")
-        self._update_agents(timestep)
-        self._update_env(timestep)
-        return
+        if (len(model_files) > 0):
+          curr_model_file = model_files[i]
+          if curr_model_file != 'none':
+            curr_agent.load_model(pybullet_data.getDataPath() + "/" + curr_model_file)

-    def reset(self):
-        self._reset_agents()
-        self._reset_env()
-        return
+      self.agents.append(curr_agent)
+      Logger.print2('')

-    def end_episode(self):
-        self._end_episode_agents();
-        return
+    self.set_enable_training(self.enable_training)

-    def _update_env(self, timestep):
-        self.env.update(timestep)
-        return
+    return

-    def _update_agents(self, timestep):
-        #print("len(agents)=",len(self.agents))
-        for agent in self.agents:
-            if (agent is not None):
-                agent.update(timestep)
-        return
+  def update(self, timestep):
+    #print("world update!\n")
+    self._update_agents(timestep)
+    self._update_env(timestep)
+    return

-    def _reset_env(self):
-        self.env.reset()
-        return
+  def reset(self):
+    self._reset_agents()
+    self._reset_env()
+    return

-    def _reset_agents(self):
-        for agent in self.agents:
-            if (agent != None):
-                agent.reset()
-        return
+  def end_episode(self):
+    self._end_episode_agents()
+    return

-    def _end_episode_agents(self):
-        for agent in self.agents:
-            if (agent != None):
-                agent.end_episode()
-        return
+  def _update_env(self, timestep):
+    self.env.update(timestep)
+    return

-    def _build_agent(self, id, agent_file):
-        Logger.print2('Agent {:d}: {}'.format(id, agent_file))
-        if (agent_file == 'none'):
-            agent = None
-        else:
-            agent = AgentBuilder.build_agent(self, id, agent_file)
-            assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
-        
-        return agent
-        
+  def _update_agents(self, timestep):
+    #print("len(agents)=",len(self.agents))
+    for agent in self.agents:
+      if (agent is not None):
+        agent.update(timestep)
+    return
+
+  def _reset_env(self):
+    self.env.reset()
+    return
+
+  def _reset_agents(self):
+    for agent in self.agents:
+      if (agent != None):
+        agent.reset()
+    return
+
+  def _end_episode_agents(self):
+    for agent in self.agents:
+      if (agent != None):
+        agent.end_episode()
+    return
+
+  def _build_agent(self, id, agent_file):
+    Logger.print2('Agent {:d}: {}'.format(id, agent_file))
+    if (agent_file == 'none'):
+      agent = None
+    else:
+      agent = AgentBuilder.build_agent(self, id, agent_file)
+      assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
+
+    return agent
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py
@@ -8,96 +8,97 @@ from pybullet_utils.logger import Logger

 from pybullet_envs.deep_mimic.learning.solvers.solver import Solver

+
 class MPISolver(Solver):
-    CHECK_SYNC_ITERS = 1000
+  CHECK_SYNC_ITERS = 1000

-    def __init__(self, sess, optimizer, vars):
-        super().__init__(vars)
-        self.sess = sess
-        self.optimizer = optimizer
-        self._build_grad_feed(vars)
-        self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
-        self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
-        self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
+  def __init__(self, sess, optimizer, vars):
+    super().__init__(vars)
+    self.sess = sess
+    self.optimizer = optimizer
+    self._build_grad_feed(vars)
+    self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
+    self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
+    self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)

-        self.iter = 0
-        grad_dim = self._calc_grad_dim()
-        self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
-        self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
-        
-        return
+    self.iter = 0
+    grad_dim = self._calc_grad_dim()
+    self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
+    self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)

-    def get_stepsize(self):
-        return self.optimizer._learning_rate_tensor.eval()
+    return

-    def update(self, grads=None, grad_scale=1.0):
-        if grads is not None:
-            self._flat_grad = MathUtil.flatten(grads)
-        else:
-            self._flat_grad.fill(0)
-        return self.update_flatgrad(self._flat_grad, grad_scale)
+  def get_stepsize(self):
+    return self.optimizer._learning_rate_tensor.eval()

-    def update_flatgrad(self, flat_grad, grad_scale=1.0):
-        if self.iter % self.CHECK_SYNC_ITERS == 0:
-            assert self.check_synced(), Logger.print2('Network parameters desynchronized')
-        
-        if grad_scale != 1.0:
-            flat_grad *= grad_scale
+  def update(self, grads=None, grad_scale=1.0):
+    if grads is not None:
+      self._flat_grad = MathUtil.flatten(grads)
+    else:
+      self._flat_grad.fill(0)
+    return self.update_flatgrad(self._flat_grad, grad_scale)

-        MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
-        self._global_flat_grad /= MPIUtil.get_num_procs()
+  def update_flatgrad(self, flat_grad, grad_scale=1.0):
+    if self.iter % self.CHECK_SYNC_ITERS == 0:
+      assert self.check_synced(), Logger.print2('Network parameters desynchronized')

-        self._load_flat_grad(self._global_flat_grad)
-        self.sess.run([self._update], self._grad_feed)
-        self.iter += 1
+    if grad_scale != 1.0:
+      flat_grad *= grad_scale

-        return
+    MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
+    self._global_flat_grad /= MPIUtil.get_num_procs()

-    def sync(self):
-        vars = self._get_flat_vars()
-        MPIUtil.bcast(vars)
-        self._set_flat_vars(vars)
-        return
+    self._load_flat_grad(self._global_flat_grad)
+    self.sess.run([self._update], self._grad_feed)
+    self.iter += 1

-    def check_synced(self):
-        synced = True
-        if self._is_root():
-            vars = self._get_flat_vars()
-            MPIUtil.bcast(vars)
-        else:
-            vars_local = self._get_flat_vars()
-            vars_root = np.empty_like(vars_local)
-            MPIUtil.bcast(vars_root)
-            synced = (vars_local == vars_root).all()
-        return synced
+    return

-    def _is_root(self):
-        return MPIUtil.is_root_proc()
-    
-    def _build_grad_feed(self, vars):
-        self._grad_tf_list = []
-        self._grad_buffers = []
-        for v in self.vars:
-            shape = v.get_shape()
-            grad = np.zeros(shape)
-            grad_tf = tf.placeholder(tf.float32, shape=shape)
-            self._grad_buffers.append(grad)
-            self._grad_tf_list.append(grad_tf)
+  def sync(self):
+    vars = self._get_flat_vars()
+    MPIUtil.bcast(vars)
+    self._set_flat_vars(vars)
+    return

-        self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
-        
-        return
+  def check_synced(self):
+    synced = True
+    if self._is_root():
+      vars = self._get_flat_vars()
+      MPIUtil.bcast(vars)
+    else:
+      vars_local = self._get_flat_vars()
+      vars_root = np.empty_like(vars_local)
+      MPIUtil.bcast(vars_root)
+      synced = (vars_local == vars_root).all()
+    return synced

-    def _calc_grad_dim(self):
-        grad_dim = 0
-        for grad in self._grad_buffers:
-            grad_dim += grad.size
-        return grad_dim
+  def _is_root(self):
+    return MPIUtil.is_root_proc()

-    def _load_flat_grad(self, flat_grad):
-        start = 0
-        for g in self._grad_buffers:
-            size = g.size
-            np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
-            start += size
-        return
+  def _build_grad_feed(self, vars):
+    self._grad_tf_list = []
+    self._grad_buffers = []
+    for v in self.vars:
+      shape = v.get_shape()
+      grad = np.zeros(shape)
+      grad_tf = tf.placeholder(tf.float32, shape=shape)
+      self._grad_buffers.append(grad)
+      self._grad_tf_list.append(grad_tf)
+
+    self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
+
+    return
+
+  def _calc_grad_dim(self):
+    grad_dim = 0
+    for grad in self._grad_buffers:
+      grad_dim += grad.size
+    return grad_dim
+
+  def _load_flat_grad(self, flat_grad):
+    start = 0
+    for g in self._grad_buffers:
+      size = g.size
+      np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
+      start += size
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py
@@ -1,15 +1,17 @@
 from abc import abstractmethod
 import sys, abc
 if sys.version_info >= (3, 4):
-    ABC = abc.ABC
+  ABC = abc.ABC
 else:
-    ABC = abc.ABCMeta('ABC', (), {})
+  ABC = abc.ABCMeta('ABC', (), {})
+

 class Solver(ABC):
-    def __init__(self, vars):
-        self.vars = vars
-        return

-    @abstractmethod
-    def update(self, grads):
-        pass
+  def __init__(self, vars):
+    self.vars = vars
+    return
+
+  @abstractmethod
+  def update(self, grads):
+    pass
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py
@@ -6,144 +6,148 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
 from pybullet_envs.deep_mimic.learning.tf_normalizer import TFNormalizer

+
 class TFAgent(RLAgent):
-    RESOURCE_SCOPE = 'resource'
-    SOLVER_SCOPE = 'solvers'
+  RESOURCE_SCOPE = 'resource'
+  SOLVER_SCOPE = 'solvers'

-    def __init__(self, world, id, json_data):
-        self.tf_scope = 'agent'
-        self.graph = tf.Graph()
-        self.sess = tf.Session(graph=self.graph)
+  def __init__(self, world, id, json_data):
+    self.tf_scope = 'agent'
+    self.graph = tf.Graph()
+    self.sess = tf.Session(graph=self.graph)

-        super().__init__(world, id, json_data)
-        self._build_graph(json_data)
-        self._init_normalizers()
-        return
+    super().__init__(world, id, json_data)
+    self._build_graph(json_data)
+    self._init_normalizers()
+    return

-    def __del__(self):
-        self.sess.close()
-        return
+  def __del__(self):
+    self.sess.close()
+    return

-    def save_model(self, out_path):
-        with self.sess.as_default(), self.graph.as_default():
-            try:
-                save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
-                Logger.print2('Model saved to: ' + save_path)
-            except:
-                Logger.print2("Failed to save model to: " + save_path)
-        return
+  def save_model(self, out_path):
+    with self.sess.as_default(), self.graph.as_default():
+      try:
+        save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
+        Logger.print2('Model saved to: ' + save_path)
+      except:
+        Logger.print2("Failed to save model to: " + save_path)
+    return

-    def load_model(self, in_path):
-        with self.sess.as_default(), self.graph.as_default():
-            self.saver.restore(self.sess, in_path)
-            self._load_normalizers()
-            Logger.print2('Model loaded from: ' + in_path)
-        return
+  def load_model(self, in_path):
+    with self.sess.as_default(), self.graph.as_default():
+      self.saver.restore(self.sess, in_path)
+      self._load_normalizers()
+      Logger.print2('Model loaded from: ' + in_path)
+    return

-    def _get_output_path(self):
-        assert(self.output_dir != '')
-        file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
-        return file_path
+  def _get_output_path(self):
+    assert (self.output_dir != '')
+    file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
+    return file_path

-    def _get_int_output_path(self):
-        assert(self.int_output_dir != '')
-        file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
-        return file_path
+  def _get_int_output_path(self):
+    assert (self.int_output_dir != '')
+    file_path = self.int_output_dir + (
+        '/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
+    return file_path

-    def _build_graph(self, json_data):
-        with self.sess.as_default(), self.graph.as_default():
-            with tf.variable_scope(self.tf_scope):
-                self._build_nets(json_data)
-                
-                with tf.variable_scope(self.SOLVER_SCOPE):
-                    self._build_losses(json_data)
-                    self._build_solvers(json_data)
+  def _build_graph(self, json_data):
+    with self.sess.as_default(), self.graph.as_default():
+      with tf.variable_scope(self.tf_scope):
+        self._build_nets(json_data)

-                self._initialize_vars()
-                self._build_saver()
-        return
+        with tf.variable_scope(self.SOLVER_SCOPE):
+          self._build_losses(json_data)
+          self._build_solvers(json_data)

-    def _init_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default():
-            # update normalizers to sync the tensorflow tensors
-            self.s_norm.update()
-            self.g_norm.update()
-            self.a_norm.update()
-        return
+        self._initialize_vars()
+        self._build_saver()
+    return

-    @abstractmethod
-    def _build_nets(self, json_data):
-        pass
+  def _init_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default():
+      # update normalizers to sync the tensorflow tensors
+      self.s_norm.update()
+      self.g_norm.update()
+      self.a_norm.update()
+    return

-    @abstractmethod
-    def _build_losses(self, json_data):
-        pass
+  @abstractmethod
+  def _build_nets(self, json_data):
+    pass

-    @abstractmethod
-    def _build_solvers(self, json_data):
-        pass
+  @abstractmethod
+  def _build_losses(self, json_data):
+    pass

-    def _tf_vars(self, scope=''):
-        with self.sess.as_default(), self.graph.as_default():
-            res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
-            assert len(res) > 0
-        return res
+  @abstractmethod
+  def _build_solvers(self, json_data):
+    pass

-    def _build_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
-            with tf.variable_scope(self.RESOURCE_SCOPE):
-                self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
-                state_offset = -self.world.env.build_state_offset(self.id)
-                print("state_offset=",state_offset)
-                state_scale = 1 / self.world.env.build_state_scale(self.id)
-                print("state_scale=",state_scale)
-                self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id), 
-                                         1 / self.world.env.build_state_scale(self.id))
-                
-                self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
-                self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id), 
-                                         1 / self.world.env.build_goal_scale(self.id))
+  def _tf_vars(self, scope=''):
+    with self.sess.as_default(), self.graph.as_default():
+      res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
+      assert len(res) > 0
+    return res

-                self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
-                self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id), 
-                                         1 / self.world.env.build_action_scale(self.id))
-        return
+  def _build_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
+      with tf.variable_scope(self.RESOURCE_SCOPE):
+        self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(),
+                                   self.world.env.build_state_norm_groups(self.id))
+        state_offset = -self.world.env.build_state_offset(self.id)
+        print("state_offset=", state_offset)
+        state_scale = 1 / self.world.env.build_state_scale(self.id)
+        print("state_scale=", state_scale)
+        self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
+                                 1 / self.world.env.build_state_scale(self.id))

-    def _load_normalizers(self):
-        self.s_norm.load()
-        self.g_norm.load()
-        self.a_norm.load()
-        return
+        self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(),
+                                   self.world.env.build_goal_norm_groups(self.id))
+        self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
+                                 1 / self.world.env.build_goal_scale(self.id))

-    def _update_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default():
-            super()._update_normalizers()
-        return
+        self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
+        self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
+                                 1 / self.world.env.build_action_scale(self.id))
+    return

-    def _initialize_vars(self):
-        self.sess.run(tf.global_variables_initializer())
-        return
+  def _load_normalizers(self):
+    self.s_norm.load()
+    self.g_norm.load()
+    self.a_norm.load()
+    return

-    def _build_saver(self):
-        vars = self._get_saver_vars()
-        self.saver = tf.train.Saver(vars, max_to_keep=0)
-        return
+  def _update_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default():
+      super()._update_normalizers()
+    return

-    def _get_saver_vars(self):
-        with self.sess.as_default(), self.graph.as_default():
-            vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
-            vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
-            #vars = [v for v in vars if '/target/' not in v.name]
-            assert len(vars) > 0
-        return vars
-    
-    def _weight_decay_loss(self, scope):
-        vars = self._tf_vars(scope)
-        vars_no_bias = [v for v in vars if 'bias' not in v.name]
-        loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
-        return loss
+  def _initialize_vars(self):
+    self.sess.run(tf.global_variables_initializer())
+    return

-    def _train(self):
-        with self.sess.as_default(), self.graph.as_default():
-            super()._train()
-        return
+  def _build_saver(self):
+    vars = self._get_saver_vars()
+    self.saver = tf.train.Saver(vars, max_to_keep=0)
+    return
+
+  def _get_saver_vars(self):
+    with self.sess.as_default(), self.graph.as_default():
+      vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
+      vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
+      #vars = [v for v in vars if '/target/' not in v.name]
+      assert len(vars) > 0
+    return vars
+
+  def _weight_decay_loss(self, scope):
+    vars = self._tf_vars(scope)
+    vars_no_bias = [v for v in vars if 'bias' not in v.name]
+    loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
+    return loss
+
+  def _train(self):
+    with self.sess.as_default(), self.graph.as_default():
+      super()._train()
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py
@@ -3,65 +3,72 @@ import copy
 import tensorflow as tf
 from pybullet_envs.deep_mimic.learning.normalizer import Normalizer

+
 class TFNormalizer(Normalizer):

-    def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
-        self.sess = sess
-        self.scope = scope
-        super().__init__(size, groups_ids, eps, clip)
+  def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
+    self.sess = sess
+    self.scope = scope
+    super().__init__(size, groups_ids, eps, clip)

-        with tf.variable_scope(self.scope):
-            self._build_resource_tf()
-        return
+    with tf.variable_scope(self.scope):
+      self._build_resource_tf()
+    return

-    # initialze count when loading saved values so that things don't change to quickly during updates
-    def load(self):
-        self.count = self.count_tf.eval()[0]
-        self.mean = self.mean_tf.eval()
-        self.std = self.std_tf.eval()
-        self.mean_sq = self.calc_mean_sq(self.mean, self.std)
-        return
+  # initialze count when loading saved values so that things don't change to quickly during updates
+  def load(self):
+    self.count = self.count_tf.eval()[0]
+    self.mean = self.mean_tf.eval()
+    self.std = self.std_tf.eval()
+    self.mean_sq = self.calc_mean_sq(self.mean, self.std)
+    return

-    def update(self):
-        super().update()
-        self._update_resource_tf()
-        return
+  def update(self):
+    super().update()
+    self._update_resource_tf()
+    return

-    def set_mean_std(self, mean, std):
-        super().set_mean_std(mean, std)
-        self._update_resource_tf()
-        return
+  def set_mean_std(self, mean, std):
+    super().set_mean_std(mean, std)
+    self._update_resource_tf()
+    return

-    def normalize_tf(self, x):
-        norm_x = (x - self.mean_tf) / self.std_tf
-        norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
-        return norm_x
+  def normalize_tf(self, x):
+    norm_x = (x - self.mean_tf) / self.std_tf
+    norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
+    return norm_x

-    def unnormalize_tf(self, norm_x):
-        x = norm_x * self.std_tf + self.mean_tf
-        return x
-    
-    def _build_resource_tf(self):
-        self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
-        self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
-        self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
-        
-        self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
-        self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
-        self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
-        
-        self._update_op = tf.group(
-            self.count_tf.assign(self.count_ph),
-            self.mean_tf.assign(self.mean_ph),
-            self.std_tf.assign(self.std_ph)
-        )
-        return
+  def unnormalize_tf(self, norm_x):
+    x = norm_x * self.std_tf + self.mean_tf
+    return x

-    def _update_resource_tf(self):
-        feed = {
-            self.count_ph: np.array([self.count], dtype=np.int32),
-            self.mean_ph: self.mean,
-            self.std_ph: self.std
-        }
-        self.sess.run(self._update_op, feed_dict=feed)
-        return
+  def _build_resource_tf(self):
+    self.count_tf = tf.get_variable(dtype=tf.int32,
+                                    name='count',
+                                    initializer=np.array([self.count], dtype=np.int32),
+                                    trainable=False)
+    self.mean_tf = tf.get_variable(dtype=tf.float32,
+                                   name='mean',
+                                   initializer=self.mean.astype(np.float32),
+                                   trainable=False)
+    self.std_tf = tf.get_variable(dtype=tf.float32,
+                                  name='std',
+                                  initializer=self.std.astype(np.float32),
+                                  trainable=False)
+
+    self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
+    self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
+    self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
+
+    self._update_op = tf.group(self.count_tf.assign(self.count_ph),
+                               self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
+    return
+
+  def _update_resource_tf(self):
+    feed = {
+        self.count_ph: np.array([self.count], dtype=np.int32),
+        self.mean_ph: self.mean,
+        self.std_ph: self.std
+    }
+    self.sess.run(self._update_op, feed_dict=feed)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py
@@ -4,101 +4,116 @@ import os

 xavier_initializer = tf.contrib.layers.xavier_initializer()

+
 def disable_gpu():
-    os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
-    return
+  os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
+  return
+

 def var_shape(x):
-    out = [k.value for k in x.get_shape()]
-    assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
-    return out
+  out = [k.value for k in x.get_shape()]
+  assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
+  return out
+

 def intprod(x):
-    return int(np.prod(x))
+  return int(np.prod(x))
+

 def numel(x):
-    n = intprod(var_shape(x))
-    return n
+  n = intprod(var_shape(x))
+  return n
+

 def flat_grad(loss, var_list, grad_ys=None):
-    grads = tf.gradients(loss, var_list, grad_ys)
-    return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
+  grads = tf.gradients(loss, var_list, grad_ys)
+  return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)

-def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
-    curr_tf = input
-    for i, size in enumerate(layers_sizes):
-        with tf.variable_scope(str(i), reuse=reuse):
-            curr_tf = tf.layers.dense(inputs=curr_tf,
-                                    units=size,
-                                    kernel_initializer=xavier_initializer,
-                                    activation = activation if i < len(layers_sizes)-1 else None)
-    if flatten:
-        assert layers_sizes[-1] == 1
-        curr_tf = tf.reshape(curr_tf, [-1])

-    return curr_tf
+def fc_net(input, layers_sizes, activation, reuse=None,
+           flatten=False):  # build fully connected network
+  curr_tf = input
+  for i, size in enumerate(layers_sizes):
+    with tf.variable_scope(str(i), reuse=reuse):
+      curr_tf = tf.layers.dense(inputs=curr_tf,
+                                units=size,
+                                kernel_initializer=xavier_initializer,
+                                activation=activation if i < len(layers_sizes) - 1 else None)
+  if flatten:
+    assert layers_sizes[-1] == 1
+    curr_tf = tf.reshape(curr_tf, [-1])
+
+  return curr_tf
+

 def copy(sess, src, dst):
-    assert len(src) == len(dst)
-    sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
-    return
+  assert len(src) == len(dst)
+  sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
+  return
+

 def flat_grad(loss, var_list):
-    grads = tf.gradients(loss, var_list)
-    return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
-        for (v, grad) in zip(var_list, grads)])
+  grads = tf.gradients(loss, var_list)
+  return tf.concat(axis=0,
+                   values=[tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)])


 def calc_logp_gaussian(x_tf, mean_tf, std_tf):
-    dim = tf.to_float(tf.shape(x_tf)[-1])
+  dim = tf.to_float(tf.shape(x_tf)[-1])

-    if mean_tf is None:
-        diff_tf = x_tf
-    else:
-        diff_tf = x_tf - mean_tf
+  if mean_tf is None:
+    diff_tf = x_tf
+  else:
+    diff_tf = x_tf - mean_tf
+
+  logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
+  logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
+
+  return logp_tf

-    logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
-    logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
-    
-    return logp_tf

 def calc_bound_loss(x_tf, bound_min, bound_max):
-    # penalty for violating bounds
-    violation_min = tf.minimum(x_tf - bound_min, 0)
-    violation_max = tf.maximum(x_tf - bound_max, 0)
-    violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
-    loss = 0.5 * tf.reduce_mean(violation)
-    return loss
+  # penalty for violating bounds
+  violation_min = tf.minimum(x_tf - bound_min, 0)
+  violation_max = tf.maximum(x_tf - bound_max, 0)
+  violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(
+      tf.square(violation_max), axis=-1)
+  loss = 0.5 * tf.reduce_mean(violation)
+  return loss
+

 class SetFromFlat(object):
-    def __init__(self, sess, var_list, dtype=tf.float32):
-        assigns = []
-        shapes = list(map(var_shape, var_list))
-        total_size = np.sum([intprod(shape) for shape in shapes])

-        self.sess = sess
-        self.theta = tf.placeholder(dtype,[total_size])
-        start=0
-        assigns = []
+  def __init__(self, sess, var_list, dtype=tf.float32):
+    assigns = []
+    shapes = list(map(var_shape, var_list))
+    total_size = np.sum([intprod(shape) for shape in shapes])

-        for (shape,v) in zip(shapes,var_list):
-            size = intprod(shape)
-            assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
-            start += size
+    self.sess = sess
+    self.theta = tf.placeholder(dtype, [total_size])
+    start = 0
+    assigns = []

-        self.op = tf.group(*assigns)
+    for (shape, v) in zip(shapes, var_list):
+      size = intprod(shape)
+      assigns.append(tf.assign(v, tf.reshape(self.theta[start:start + size], shape)))
+      start += size

-        return
+    self.op = tf.group(*assigns)
+
+    return
+
+  def __call__(self, theta):
+    self.sess.run(self.op, feed_dict={self.theta: theta})
+    return

-    def __call__(self, theta):
-        self.sess.run(self.op, feed_dict={self.theta:theta})
-        return

 class GetFlat(object):
-    def __init__(self, sess, var_list):
-        self.sess = sess
-        self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
-        return

-    def __call__(self):
-        return self.sess.run(self.op)
+  def __init__(self, sess, var_list):
+    self.sess = sess
+    self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
+    return
+
+  def __call__(self):
+    return self.sess.run(self.op)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/camera.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/camera.py
@@ -1,26 +1,28 @@
-import numpy as np 
+import numpy as np

 from quaternion import qrot, qinverse


-def normalize_screen_coordinates(X, w, h): 
-    assert X.shape[-1] == 2
-    # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
-    return X/w*2 - [1, h/w]
+def normalize_screen_coordinates(X, w, h):
+  assert X.shape[-1] == 2
+  # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
+  return X / w * 2 - [1, h / w]
+

 def image_coordinates(X, w, h):
-    assert X.shape[-1] == 2
-    # Reverse camera frame normalization
-    return (X + [1, h/w])*w/2
+  assert X.shape[-1] == 2
+  # Reverse camera frame normalization
+  return (X + [1, h / w]) * w / 2
+

 def world_to_camera(X, R, t):
-    Rt = qinverse(R)
-    Q = np.tile(Rt, (*X.shape[:-1], 1))
-    V = X - t
-    return qrot(Q, V)
-    
-def camera_to_world(X, R, t):
-    Q = np.tile(R, (*X.shape[:-1], 1))
-    V = X
-    return qrot(Q, V) + t
+  Rt = qinverse(R)
+  Q = np.tile(Rt, (*X.shape[:-1], 1))
+  V = X - t
+  return qrot(Q, V)

+
+def camera_to_world(X, R, t):
+  Q = np.tile(R, (*X.shape[:-1], 1))
+  V = X
+  return qrot(Q, V) + t
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/deepmimic_json_generator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/deepmimic_json_generator.py
@@ -3,54 +3,63 @@ from h36m_dataset import Human36mDataset
 from camera import *

 import numpy as np
-# In[2]: 
+# In[2]:

 joint_info = {
-	'joint_name':['root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle', 'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder', 'right_elbow', 'right_wrist'],
-    'father':[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
-    'side':['middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle', 'middle', 'left', 'left', 'left', 'right', 'right', 'right'] 
+    'joint_name': [
+        'root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle',
+        'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist',
+        'right_shoulder', 'right_elbow', 'right_wrist'
+    ],
+    'father': [0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
+    'side': [
+        'middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle',
+        'middle', 'left', 'left', 'left', 'right', 'right', 'right'
+    ]
 }

 # In[3]:

+
 def init_fb_h36m_dataset(dataset_path):
-    dataset = Human36mDataset(dataset_path)
-    print('Preparing Facebook Human3.6M Dataset...')
-    for subject in dataset.subjects():
-        for action in dataset[subject].keys():
-            anim = dataset[subject][action]
-            positions_3d = []
-            for cam in anim['cameras']:
-                pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
-                pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
-                positions_3d.append(pos_3d)
-            anim['positions_3d'] = positions_3d
-    return dataset
+  dataset = Human36mDataset(dataset_path)
+  print('Preparing Facebook Human3.6M Dataset...')
+  for subject in dataset.subjects():
+    for action in dataset[subject].keys():
+      anim = dataset[subject][action]
+      positions_3d = []
+      for cam in anim['cameras']:
+        pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
+        pos_3d[:, 1:] -= pos_3d[:, :
+                                1]  # Remove global offset, but keep trajectory in first position
+        positions_3d.append(pos_3d)
+      anim['positions_3d'] = positions_3d
+  return dataset
+

 def pose3D_from_fb_h36m(dataset, subject, action, shift):
-    pose_seq = dataset[subject][action]['positions_3d'][0].copy()
-    trajectory = pose_seq[:, :1]
-    pose_seq[:, 1:] += trajectory
-    # Invert camera transformation
-    cam = dataset.cameras()[subject][0]
-    pose_seq = camera_to_world(pose_seq, 
-                                   R=cam['orientation'], 
-                                   t=cam['translation'])
-    x = pose_seq[:,:,0:1]
-    y = pose_seq[:,:,1:2] * -1
-    z = pose_seq[:,:,2:3] 
-    pose_seq = np.concatenate((x,z,y),axis=2)
-    # plus shift
-    pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])])
-    return pose_seq
+  pose_seq = dataset[subject][action]['positions_3d'][0].copy()
+  trajectory = pose_seq[:, :1]
+  pose_seq[:, 1:] += trajectory
+  # Invert camera transformation
+  cam = dataset.cameras()[subject][0]
+  pose_seq = camera_to_world(pose_seq, R=cam['orientation'], t=cam['translation'])
+  x = pose_seq[:, :, 0:1]
+  y = pose_seq[:, :, 1:2] * -1
+  z = pose_seq[:, :, 2:3]
+  pose_seq = np.concatenate((x, z, y), axis=2)
+  # plus shift
+  pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])
+                       ])
+  return pose_seq
+

 def rot_seq_to_deepmimic_json(rot_seq, loop, json_path):
-    to_json = {"Loop": loop, "Frames":[]}
-    rot_seq = np.around(rot_seq, decimals=6)
-    to_json["Frames"] = rot_seq.tolist()
-    # In[14]:
-    to_file = json.dumps(to_json)
-    file = open(json_path,"w")
-    file.write(to_file)
-    file.close()
-
+  to_json = {"Loop": loop, "Frames": []}
+  rot_seq = np.around(rot_seq, decimals=6)
+  to_json["Frames"] = rot_seq.tolist()
+  # In[14]:
+  to_file = json.dumps(to_json)
+  file = open(json_path, "w")
+  file.write(to_file)
+  file.close()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/h36m_dataset.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/h36m_dataset.py
@@ -3,11 +3,13 @@ import copy
 from skeleton import Skeleton
 from mocap_dataset import MocapDataset
 from camera import normalize_screen_coordinates, image_coordinates
-       
-h36m_skeleton = Skeleton(parents=[-1,  0,  1,  2,  3,  4,  0,  6,  7,  8,  9,  0, 11, 12, 13, 14, 12,
-       16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
-       joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
-       joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
+
+h36m_skeleton = Skeleton(parents=[
+    -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12, 16, 17, 18, 19, 20, 19, 22, 12, 24,
+    25, 26, 27, 28, 27, 30
+],
+                         joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
+                         joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])

 h36m_cameras_intrinsic_params = [
    {
@@ -18,7 +20,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
        'res_w': 1000,
        'res_h': 1002,
-        'azimuth': 70, # Only used for visualization
+        'azimuth': 70,  # Only used for visualization
    },
    {
        'id': '55011271',
@@ -28,7 +30,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
        'res_w': 1000,
        'res_h': 1000,
-        'azimuth': -70, # Only used for visualization
+        'azimuth': -70,  # Only used for visualization
    },
    {
        'id': '58860488',
@@ -38,7 +40,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
        'res_w': 1000,
        'res_h': 1000,
-        'azimuth': 110, # Only used for visualization
+        'azimuth': 110,  # Only used for visualization
    },
    {
        'id': '60457274',
@@ -48,26 +50,34 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
        'res_w': 1000,
        'res_h': 1002,
-        'azimuth': -110, # Only used for visualization
+        'azimuth': -110,  # Only used for visualization
    },
 ]

 h36m_cameras_extrinsic_params = {
    'S1': [
        {
-            'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
+            'orientation': [
+                0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088
+            ],
            'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
        },
        {
-            'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
+            'orientation': [
+                0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205
+            ],
            'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
        },
        {
-            'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
+            'orientation': [
+                0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696
+            ],
            'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
        },
        {
-            'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
+            'orientation': [
+                0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435
+            ],
            'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
        },
    ],
@@ -91,158 +101,206 @@ h36m_cameras_extrinsic_params = {
    ],
    'S5': [
        {
-            'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
+            'orientation': [
+                0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332
+            ],
            'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
        },
        {
-            'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
+            'orientation': [
+                0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915
+            ],
            'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
        },
        {
-            'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
+            'orientation': [
+                0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576
+            ],
            'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
        },
        {
-            'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
+            'orientation': [
+                0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092
+            ],
            'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
        },
    ],
    'S6': [
        {
-            'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
+            'orientation': [
+                0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938
+            ],
            'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
        },
        {
-            'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
+            'orientation': [
+                0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428
+            ],
            'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
        },
        {
-            'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
+            'orientation': [
+                0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334
+            ],
            'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
        },
        {
-            'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
+            'orientation': [
+                0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802
+            ],
            'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
        },
    ],
    'S7': [
        {
-            'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
+            'orientation': [
+                0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778
+            ],
            'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
        },
        {
-            'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
+            'orientation': [
+                0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508
+            ],
            'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
        },
        {
-            'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
+            'orientation': [
+                0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278
+            ],
            'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
        },
        {
-            'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
+            'orientation': [
+                0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523
+            ],
            'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
        },
    ],
    'S8': [
        {
-            'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
+            'orientation': [
+                0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773
+            ],
            'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
        },
        {
-            'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
+            'orientation': [
+                0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615
+            ],
            'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
        },
        {
-            'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
+            'orientation': [
+                0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755
+            ],
            'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
        },
        {
-            'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
+            'orientation': [
+                0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708
+            ],
            'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
        },
    ],
    'S9': [
        {
-            'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
+            'orientation': [
+                0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243
+            ],
            'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
        },
        {
-            'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
+            'orientation': [
+                0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801
+            ],
            'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
        },
        {
-            'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
+            'orientation': [
+                0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915
+            ],
            'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
        },
        {
-            'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
+            'orientation': [
+                0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822
+            ],
            'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
        },
    ],
    'S11': [
        {
-            'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
+            'orientation': [
+                0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467
+            ],
            'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
        },
        {
-            'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
+            'orientation': [
+                0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085
+            ],
            'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
        },
        {
-            'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
+            'orientation': [
+                0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407
+            ],
            'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
        },
        {
-            'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
+            'orientation': [
+                0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809
+            ],
            'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
        },
    ],
 }

+
 class Human36mDataset(MocapDataset):
-    def __init__(self, path, remove_static_joints=True):
-        super().__init__(fps=50, skeleton=h36m_skeleton)
-        
-        self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
-        for cameras in self._cameras.values():
-            for i, cam in enumerate(cameras):
-                cam.update(h36m_cameras_intrinsic_params[i])
-                for k, v in cam.items():
-                    if k not in ['id', 'res_w', 'res_h']:
-                        cam[k] = np.array(v, dtype='float32')
-                
-                # Normalize camera frame
-                cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype('float32')
-                cam['focal_length'] = cam['focal_length']/cam['res_w']*2
-                if 'translation' in cam:
-                    cam['translation'] = cam['translation']/1000 # mm to meters
-                
-                # Add intrinsic parameters vector
-                cam['intrinsic'] = np.concatenate((cam['focal_length'],
-                                                   cam['center'],
-                                                   cam['radial_distortion'],
-                                                   cam['tangential_distortion']))
-        
-        # Load serialized dataset
-        data = np.load(path)['positions_3d'].item()
-        
-        self._data = {}
-        for subject, actions in data.items():
-            self._data[subject] = {}
-            for action_name, positions in actions.items():
-                self._data[subject][action_name] = {
-                    'positions': positions,
-                    'cameras': self._cameras[subject],
-                }
-                
-        if remove_static_joints:
-            # Bring the skeleton to 17 joints instead of the original 32
-            self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
-            
-            # Rewire shoulders to the correct parents
-            self._skeleton._parents[11] = 8
-            self._skeleton._parents[14] = 8
-            
-    def supports_semi_supervised(self):
-        return True
-    
+
+  def __init__(self, path, remove_static_joints=True):
+    super().__init__(fps=50, skeleton=h36m_skeleton)
+
+    self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
+    for cameras in self._cameras.values():
+      for i, cam in enumerate(cameras):
+        cam.update(h36m_cameras_intrinsic_params[i])
+        for k, v in cam.items():
+          if k not in ['id', 'res_w', 'res_h']:
+            cam[k] = np.array(v, dtype='float32')
+
+        # Normalize camera frame
+        cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'],
+                                                     h=cam['res_h']).astype('float32')
+        cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
+        if 'translation' in cam:
+          cam['translation'] = cam['translation'] / 1000  # mm to meters
+
+        # Add intrinsic parameters vector
+        cam['intrinsic'] = np.concatenate((cam['focal_length'], cam['center'],
+                                           cam['radial_distortion'], cam['tangential_distortion']))
+
+    # Load serialized dataset
+    data = np.load(path)['positions_3d'].item()
+
+    self._data = {}
+    for subject, actions in data.items():
+      self._data[subject] = {}
+      for action_name, positions in actions.items():
+        self._data[subject][action_name] = {
+            'positions': positions,
+            'cameras': self._cameras[subject],
+        }
+
+    if remove_static_joints:
+      # Bring the skeleton to 17 joints instead of the original 32
+      self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
+
+      # Rewire shoulders to the correct parents
+      self._skeleton._parents[11] = 8
+      self._skeleton._parents[14] = 8
+
+  def supports_semi_supervised(self):
+    return True
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/humanoid.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/humanoid.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/inverse_kinematics.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/inverse_kinematics.py
@@ -9,142 +9,138 @@ from transformation import *

 from pyquaternion import Quaternion

+
 def get_angle(vec1, vec2):
-    cos_theta = np.dot(vec1, vec2)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))
-    return acos(cos_theta) 
+  cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+  return acos(cos_theta)


 def get_quaternion(ox, oy, oz, x, y, z):
-    # given transformed axis in x-y-z order return a quaternion
-    ox /= np.linalg.norm(ox)
-    oy /= np.linalg.norm(oy)
-    oz /= np.linalg.norm(oz)
+  # given transformed axis in x-y-z order return a quaternion
+  ox /= np.linalg.norm(ox)
+  oy /= np.linalg.norm(oy)
+  oz /= np.linalg.norm(oz)

-    set1 = np.vstack((ox,oy,oz))
+  set1 = np.vstack((ox, oy, oz))

-    x /= np.linalg.norm(x)
-    y /= np.linalg.norm(y)
-    z /= np.linalg.norm(z)
+  x /= np.linalg.norm(x)
+  y /= np.linalg.norm(y)
+  z /= np.linalg.norm(z)

-    set2 = np.vstack((x,y,z))
-    rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
-    rot_qua = quaternion_from_matrix(rot_mat)
+  set2 = np.vstack((x, y, z))
+  rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
+  rot_qua = quaternion_from_matrix(rot_mat)

-    return rot_qua
+  return rot_qua


 # 3D coord to deepmimic rotations
 def coord_to_rot(frameNum, frame, frame_duration):
-    eps = 0.001
-    axis_rotate_rate = 0.3
+  eps = 0.001
+  axis_rotate_rate = 0.3

-    frame = np.array(frame)
-    tmp = [[] for i in range(15)]
-    # duration of frame in seconds (1D),
-    tmp[0] = [frame_duration]
-    # root position (3D),
-    tmp[1] = frame[0]
-    # root rotation (4D),
-    root_y = (frame[7] - frame[0])
-    root_z = (frame[1] - frame[0]) 
-    root_x = np.cross(root_y, root_z)
+  frame = np.array(frame)
+  tmp = [[] for i in range(15)]
+  # duration of frame in seconds (1D),
+  tmp[0] = [frame_duration]
+  # root position (3D),
+  tmp[1] = frame[0]
+  # root rotation (4D),
+  root_y = (frame[7] - frame[0])
+  root_z = (frame[1] - frame[0])
+  root_x = np.cross(root_y, root_z)

-    x = np.array([1.0,0,0])
-    y = np.array([0,1.0,0])
-    z = np.array([0,0,1.0])
-    
-    rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
-    tmp[2] = list(rot_qua)
+  x = np.array([1.0, 0, 0])
+  y = np.array([0, 1.0, 0])
+  z = np.array([0, 0, 1.0])

-    # chest rotation (4D),
-    chest_y = (frame[8] - frame[7])
-    chest_z = (frame[14] - frame[8]) 
-    chest_x = np.cross(chest_y, chest_z)
-    rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
-    tmp[3] = list(rot_qua)
+  rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
+  tmp[2] = list(rot_qua)

-    # neck rotation (4D),
-    neck_y = (frame[10] - frame[8])
-    neck_z = np.cross(frame[10]-frame[9], frame[8]-frame[9]) 
-    neck_x = np.cross(neck_y, neck_z)
-    rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
-    tmp[4] = list(rot_qua)
+  # chest rotation (4D),
+  chest_y = (frame[8] - frame[7])
+  chest_z = (frame[14] - frame[8])
+  chest_x = np.cross(chest_y, chest_z)
+  rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
+  tmp[3] = list(rot_qua)

-    # right hip rotation (4D),
-    r_hip_y = (frame[1] - frame[2])
-    r_hip_z = np.cross(frame[1]-frame[2], frame[3]-frame[2]) 
-    r_hip_x = np.cross(r_hip_y, r_hip_z)
-    rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
-    tmp[5] = list(rot_qua)
+  # neck rotation (4D),
+  neck_y = (frame[10] - frame[8])
+  neck_z = np.cross(frame[10] - frame[9], frame[8] - frame[9])
+  neck_x = np.cross(neck_y, neck_z)
+  rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
+  tmp[4] = list(rot_qua)

-    # right knee rotation (1D),
-    vec1 = frame[1] - frame[2]
-    vec2 = frame[3] - frame[2]
-    angle1 = get_angle(vec1, vec2)
-    tmp[6] = [angle1-pi]
+  # right hip rotation (4D),
+  r_hip_y = (frame[1] - frame[2])
+  r_hip_z = np.cross(frame[1] - frame[2], frame[3] - frame[2])
+  r_hip_x = np.cross(r_hip_y, r_hip_z)
+  rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
+  tmp[5] = list(rot_qua)

-    # right ankle rotation (4D),
-    tmp[7] = [1,0,0,0]
+  # right knee rotation (1D),
+  vec1 = frame[1] - frame[2]
+  vec2 = frame[3] - frame[2]
+  angle1 = get_angle(vec1, vec2)
+  tmp[6] = [angle1 - pi]

-    #  right shoulder rotation (4D),
-    r_shou_y = (frame[14] - frame[15])
-    r_shou_z = np.cross(frame[16]-frame[15], frame[14]-frame[15]) 
-    r_shou_x = np.cross(r_shou_y, r_shou_z)
-    rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
-    tmp[8] = list(rot_qua)
+  # right ankle rotation (4D),
+  tmp[7] = [1, 0, 0, 0]

-    # right elbow rotation (1D),
-    vec1 = frame[14] - frame[15]
-    vec2 = frame[16] - frame[15]
-    angle1 = get_angle(vec1, vec2)
-    tmp[9] = [pi-angle1]
+  #  right shoulder rotation (4D),
+  r_shou_y = (frame[14] - frame[15])
+  r_shou_z = np.cross(frame[16] - frame[15], frame[14] - frame[15])
+  r_shou_x = np.cross(r_shou_y, r_shou_z)
+  rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
+  tmp[8] = list(rot_qua)

-    # left hip rotation (4D),
-    l_hip_y = (frame[4] - frame[5])
-    l_hip_z = np.cross(frame[4]-frame[5], frame[6]-frame[5]) 
-    l_hip_x = np.cross(l_hip_y, l_hip_z)
-    rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)    
-    tmp[10] = list(rot_qua)
-    
-    # left knee rotation (1D),
-    vec1 = frame[4] - frame[5]
-    vec2 = frame[6] - frame[5]
-    angle1 = get_angle(vec1, vec2)
-    tmp[11] = [angle1-pi]
-    
-    # left ankle rotation (4D),
-    tmp[12] = [1,0,0,0]
+  # right elbow rotation (1D),
+  vec1 = frame[14] - frame[15]
+  vec2 = frame[16] - frame[15]
+  angle1 = get_angle(vec1, vec2)
+  tmp[9] = [pi - angle1]

-    # left shoulder rotation (4D),
-    l_shou_y = (frame[11] - frame[12])
-    l_shou_z = np.cross(frame[13]-frame[12], frame[11]-frame[12]) 
-    l_shou_x = np.cross(l_shou_y, l_shou_z)
-    rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
-    tmp[13] = list(rot_qua)
+  # left hip rotation (4D),
+  l_hip_y = (frame[4] - frame[5])
+  l_hip_z = np.cross(frame[4] - frame[5], frame[6] - frame[5])
+  l_hip_x = np.cross(l_hip_y, l_hip_z)
+  rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
+  tmp[10] = list(rot_qua)

-    # left elbow rotation (1D)
-    vec1 = frame[11] - frame[12]
-    vec2 = frame[13] - frame[12]
-    angle1 = get_angle(vec1, vec2)
-    tmp[14] = [pi-angle1]
+  # left knee rotation (1D),
+  vec1 = frame[4] - frame[5]
+  vec2 = frame[6] - frame[5]
+  angle1 = get_angle(vec1, vec2)
+  tmp[11] = [angle1 - pi]
+
+  # left ankle rotation (4D),
+  tmp[12] = [1, 0, 0, 0]
+
+  # left shoulder rotation (4D),
+  l_shou_y = (frame[11] - frame[12])
+  l_shou_z = np.cross(frame[13] - frame[12], frame[11] - frame[12])
+  l_shou_x = np.cross(l_shou_y, l_shou_z)
+  rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
+  tmp[13] = list(rot_qua)
+
+  # left elbow rotation (1D)
+  vec1 = frame[11] - frame[12]
+  vec2 = frame[13] - frame[12]
+  angle1 = get_angle(vec1, vec2)
+  tmp[14] = [pi - angle1]
+
+  ret = []
+  for i in tmp:
+    ret += list(i)
+  return np.array(ret)

-    ret = []
-    for i in tmp:
-        ret += list(i)
-    return np.array(ret)

 # In[6]:


 def coord_seq_to_rot_seq(coord_seq, frame_duration):
-    ret = []
-    for i in range(len(coord_seq)):
-        tmp = coord_to_rot( i, coord_seq[i], frame_duration)
-        ret.append(list(tmp))
-    return ret
-
-
-
-
-
-
+  ret = []
+  for i in range(len(coord_seq)):
+    tmp = coord_to_rot(i, coord_seq[i], frame_duration)
+    ret.append(list(tmp))
+  return ret
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/mocap_dataset.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/mocap_dataset.py
@@ -1,36 +1,37 @@
 import numpy as np
 from skeleton import Skeleton

+
 class MocapDataset:
-    def __init__(self, fps, skeleton):
-        self._skeleton = skeleton
-        self._fps = fps
-        self._data = None # Must be filled by subclass
-        self._cameras = None # Must be filled by subclass
-    
-    def remove_joints(self, joints_to_remove):
-        kept_joints = self._skeleton.remove_joints(joints_to_remove)
-        for subject in self._data.keys():
-            for action in self._data[subject].keys():
-                s = self._data[subject][action]
-                s['positions'] = s['positions'][:, kept_joints]
-                
-        
-    def __getitem__(self, key):
-        return self._data[key]
-        
-    def subjects(self):
-        return self._data.keys()
-    
-    def fps(self):
-        return self._fps
-    
-    def skeleton(self):
-        return self._skeleton
-        
-    def cameras(self):
-        return self._cameras
-    
-    def supports_semi_supervised(self):
-        # This method can be overridden
-        return False 
+
+  def __init__(self, fps, skeleton):
+    self._skeleton = skeleton
+    self._fps = fps
+    self._data = None  # Must be filled by subclass
+    self._cameras = None  # Must be filled by subclass
+
+  def remove_joints(self, joints_to_remove):
+    kept_joints = self._skeleton.remove_joints(joints_to_remove)
+    for subject in self._data.keys():
+      for action in self._data[subject].keys():
+        s = self._data[subject][action]
+        s['positions'] = s['positions'][:, kept_joints]
+
+  def __getitem__(self, key):
+    return self._data[key]
+
+  def subjects(self):
+    return self._data.keys()
+
+  def fps(self):
+    return self._fps
+
+  def skeleton(self):
+    return self._skeleton
+
+  def cameras(self):
+    return self._cameras
+
+  def supports_semi_supervised(self):
+    # This method can be overridden
+    return False
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/quaternion.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/quaternion.py
@@ -1,30 +1,31 @@
 import numpy as np

+
 def qrot(q, v):
-    """
+  """
    Rotate vector(s) v about the rotation described by quaternion(s) q.
    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
    where * denotes any number of dimensions.
    Returns a tensor of shape (*, 3).
    """
-    assert q.shape[-1] == 4
-    assert v.shape[-1] == 3
-    assert q.shape[:-1] == v.shape[:-1]
+  assert q.shape[-1] == 4
+  assert v.shape[-1] == 3
+  assert q.shape[:-1] == v.shape[:-1]
+
+  qvec = q[..., 1:]
+
+  uv = np.cross(qvec, v)
+  uuv = np.cross(qvec, uv)
+
+  return (v + 2 * (q[..., :1] * uv + uuv))

-    qvec = q[..., 1:]

-    uv = np.cross(qvec, v)
-    uuv = np.cross(qvec, uv)
-    
-    return (v + 2 * (q[..., :1] * uv + uuv))
-    
 def qinverse(q, inplace=False):
-    # We assume the quaternion to be normalized
-    if inplace:
-        q[..., 1:] *= -1
-        return q
-    else:
-        w = q[..., :1]
-        xyz = q[..., 1:]
-        return np.hstack((w, -xyz))
- 
+  # We assume the quaternion to be normalized
+  if inplace:
+    q[..., 1:] *= -1
+    return q
+  else:
+    w = q[..., :1]
+    xyz = q[..., 1:]
+    return np.hstack((w, -xyz))
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/render_reference.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/render_reference.py
@@ -1,7 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 print('parent:', parentdir)

@@ -10,7 +10,6 @@ import pybullet
 import time
 import random

-
 from pybullet_utils.bullet_client import BulletClient
 from deep_mimic.env.motion_capture_data import MotionCaptureData

@@ -26,21 +25,28 @@ import pybullet as p
 import numpy as np
 import argparse

-
-
 parser = argparse.ArgumentParser(description='Arguments for loading reference for learning.')

 # General arguments
-parser.add_argument('--dataset_path', default='data/data_3d_h36m.npz', type=str, help='target dataset') # h36m or humaneva
-parser.add_argument('--json_path', default='data/Walking.json', type=str, help='json file path for storing the deepmimic-format json created by inverse-kinect.')
+parser.add_argument('--dataset_path',
+                    default='data/data_3d_h36m.npz',
+                    type=str,
+                    help='target dataset')  # h36m or humaneva
+parser.add_argument(
+    '--json_path',
+    default='data/Walking.json',
+    type=str,
+    help='json file path for storing the deepmimic-format json created by inverse-kinect.')
 parser.add_argument('--fps', default=24, type=int, help='frame per second')
 parser.add_argument('--subject', default='S11', type=str, help='camera subject.')
-parser.add_argument('--action', default='Walking', type=str,  help='name of the action.')
-parser.add_argument('--loop', default='wrap', type=str, help='loop information in deepmimic, wrap or none.')
+parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
+parser.add_argument('--loop',
+                    default='wrap',
+                    type=str,
+                    help='loop information in deepmimic, wrap or none.')
 parser.add_argument('--draw_gt', action='store_true', help='draw ground truth or not.')

-args = parser.parse_args() 
-
+args = parser.parse_args()

 dataset_path = args.dataset_path
 json_path = args.json_path
@@ -52,93 +58,75 @@ draw_gt = args.draw_gt


 def draw_ground_truth(coord_seq, frame, duration, shift):
-    global joint_info
-    joint = coord_seq[frame]
-    shift = np.array(shift)
-    for i in range(1, 17):
-        # print(x[11], x[14])
-        joint_fa = joint_info['father'][i]
-        if joint_info['side'][i] == 'right':
-            p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
-                               lineToXYZ=joint[joint_fa]+shift,
-                               lineColorRGB=(255,0,0),
-                               lineWidth=1,
-                               lifeTime=duration)
-        else:
-            p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
-                               lineToXYZ=joint[joint_fa]+shift,
-                               lineColorRGB=(0,0,0),
-                               lineWidth=1,
-                               lifeTime=duration)
+  global joint_info
+  joint = coord_seq[frame]
+  shift = np.array(shift)
+  for i in range(1, 17):
+    # print(x[11], x[14])
+    joint_fa = joint_info['father'][i]
+    if joint_info['side'][i] == 'right':
+      p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
+                         lineToXYZ=joint[joint_fa] + shift,
+                         lineColorRGB=(255, 0, 0),
+                         lineWidth=1,
+                         lifeTime=duration)
+    else:
+      p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
+                         lineToXYZ=joint[joint_fa] + shift,
+                         lineColorRGB=(0, 0, 0),
+                         lineWidth=1,
+                         lifeTime=duration)


 dataset = init_fb_h36m_dataset(dataset_path)
-ground_truth = pose3D_from_fb_h36m(dataset, 
-                                   subject = subject, 
-                                   action = action,
-                                   shift = [1.0,0.0,0.0])
-
-rot_seq =  coord_seq_to_rot_seq(coord_seq = ground_truth, 
-                                frame_duration = 1/fps)
-
-
-rot_seq_to_deepmimic_json(  rot_seq = rot_seq, 
-                            loop = loop,
-                            json_path = json_path)
+ground_truth = pose3D_from_fb_h36m(dataset, subject=subject, action=action, shift=[1.0, 0.0, 0.0])

+rot_seq = coord_seq_to_rot_seq(coord_seq=ground_truth, frame_duration=1 / fps)

+rot_seq_to_deepmimic_json(rot_seq=rot_seq, loop=loop, json_path=json_path)

 bc = BulletClient(connection_mode=pybullet.GUI)
 bc.setAdditionalSearchPath(pybullet_data.getDataPath())
-bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP,1)
-bc.setGravity(0,-9.8,0)
-motion=MotionCaptureData()
+bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP, 1)
+bc.setGravity(0, -9.8, 0)
+motion = MotionCaptureData()

 motionPath = json_path
 motion.Load(motionPath)
 print("numFrames = ", motion.NumFrames())

+simTimeId = bc.addUserDebugParameter("simTime", 0, motion.NumFrames() - 1.1, 0)

+y2zOrn = bc.getQuaternionFromEuler([-1.57, 0, 0])
+bc.loadURDF("plane.urdf", [0, -0.04, 0], y2zOrn)

-simTimeId= bc.addUserDebugParameter("simTime",0,motion.NumFrames()-1.1,0)
-
-y2zOrn = bc.getQuaternionFromEuler([-1.57,0,0])
-bc.loadURDF("plane.urdf",[0,-0.04,0], y2zOrn)
-
-humanoid = Humanoid(bc, motion, [0,0,0]) #这是初始位置的坐标
-
-
+humanoid = Humanoid(bc, motion, [0, 0, 0])  #这是初始位置的坐标

 print(p.getBasePositionAndOrientation(humanoid._humanoid))

-
-
 simTime = 0
 keyFrameDuration = motion.KeyFrameDuraction()
-print("keyFrameDuration=",keyFrameDuration)
+print("keyFrameDuration=", keyFrameDuration)
 for utNum in range(motion.NumFrames()):
-    bc.stepSimulation()
-    humanoid.RenderReference(utNum * keyFrameDuration)
-    if draw_gt:
-        draw_ground_truth(coord_seq = ground_truth, 
-                        frame = utNum, 
-                        duration = keyFrameDuration,
-                        shift = [-1.0, 0.0, 1.0])
-    time.sleep(0.001)
+  bc.stepSimulation()
+  humanoid.RenderReference(utNum * keyFrameDuration)
+  if draw_gt:
+    draw_ground_truth(coord_seq=ground_truth,
+                      frame=utNum,
+                      duration=keyFrameDuration,
+                      shift=[-1.0, 0.0, 1.0])
+  time.sleep(0.001)
 stage = 0


-
 def Reset(humanoid):
-	global simTime
-	humanoid.Reset()
-	simTime = 0 
-	humanoid.SetSimTime(simTime)
-	pose = humanoid.InitializePoseFromMotionData()
-	humanoid.ApplyPose(pose, True, True, humanoid._humanoid,bc)
+  global simTime
+  humanoid.Reset()
+  simTime = 0
+  humanoid.SetSimTime(simTime)
+  pose = humanoid.InitializePoseFromMotionData()
+  humanoid.ApplyPose(pose, True, True, humanoid._humanoid, bc)


 Reset(humanoid)
 p.disconnect()
-
-
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/skeleton.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/skeleton.py
@@ -1,81 +1,82 @@
 import numpy as np

+
 class Skeleton:
-    def __init__(self, parents, joints_left, joints_right):
-        assert len(joints_left) == len(joints_right)
-        
-        self._parents = np.array(parents)
-        self._joints_left = joints_left
-        self._joints_right = joints_right
-        self._compute_metadata()
-    
-    def num_joints(self):
-        return len(self._parents)
-    
-    def parents(self):
-        return self._parents
-    
-    def has_children(self):
-        return self._has_children
-    
-    def children(self):
-        return self._children
-    
-    def remove_joints(self, joints_to_remove):
-        """
+
+  def __init__(self, parents, joints_left, joints_right):
+    assert len(joints_left) == len(joints_right)
+
+    self._parents = np.array(parents)
+    self._joints_left = joints_left
+    self._joints_right = joints_right
+    self._compute_metadata()
+
+  def num_joints(self):
+    return len(self._parents)
+
+  def parents(self):
+    return self._parents
+
+  def has_children(self):
+    return self._has_children
+
+  def children(self):
+    return self._children
+
+  def remove_joints(self, joints_to_remove):
+    """
        Remove the joints specified in 'joints_to_remove'.
        """
-        valid_joints = []
-        for joint in range(len(self._parents)):
-            if joint not in joints_to_remove:
-                valid_joints.append(joint)
+    valid_joints = []
+    for joint in range(len(self._parents)):
+      if joint not in joints_to_remove:
+        valid_joints.append(joint)

-        for i in range(len(self._parents)):
-            while self._parents[i] in joints_to_remove:
-                self._parents[i] = self._parents[self._parents[i]]
-                
-        index_offsets = np.zeros(len(self._parents), dtype=int)
-        new_parents = []
-        for i, parent in enumerate(self._parents):
-            if i not in joints_to_remove:
-                new_parents.append(parent - index_offsets[parent])
-            else:
-                index_offsets[i:] += 1
-        self._parents = np.array(new_parents)
-        
-        
-        if self._joints_left is not None:
-            new_joints_left = []
-            for joint in self._joints_left:
-                if joint in valid_joints:
-                    new_joints_left.append(joint - index_offsets[joint])
-            self._joints_left = new_joints_left
-        if self._joints_right is not None:
-            new_joints_right = []
-            for joint in self._joints_right:
-                if joint in valid_joints:
-                    new_joints_right.append(joint - index_offsets[joint])
-            self._joints_right = new_joints_right
+    for i in range(len(self._parents)):
+      while self._parents[i] in joints_to_remove:
+        self._parents[i] = self._parents[self._parents[i]]

-        self._compute_metadata()
-        
-        return valid_joints
-    
-    def joints_left(self):
-        return self._joints_left
-    
-    def joints_right(self):
-        return self._joints_right
-        
-    def _compute_metadata(self):
-        self._has_children = np.zeros(len(self._parents)).astype(bool)
-        for i, parent in enumerate(self._parents):
-            if parent != -1:
-                self._has_children[parent] = True
+    index_offsets = np.zeros(len(self._parents), dtype=int)
+    new_parents = []
+    for i, parent in enumerate(self._parents):
+      if i not in joints_to_remove:
+        new_parents.append(parent - index_offsets[parent])
+      else:
+        index_offsets[i:] += 1
+    self._parents = np.array(new_parents)

-        self._children = []
-        for i, parent in enumerate(self._parents):
-            self._children.append([])
-        for i, parent in enumerate(self._parents):
-            if parent != -1:
-                self._children[parent].append(i) 
+    if self._joints_left is not None:
+      new_joints_left = []
+      for joint in self._joints_left:
+        if joint in valid_joints:
+          new_joints_left.append(joint - index_offsets[joint])
+      self._joints_left = new_joints_left
+    if self._joints_right is not None:
+      new_joints_right = []
+      for joint in self._joints_right:
+        if joint in valid_joints:
+          new_joints_right.append(joint - index_offsets[joint])
+      self._joints_right = new_joints_right
+
+    self._compute_metadata()
+
+    return valid_joints
+
+  def joints_left(self):
+    return self._joints_left
+
+  def joints_right(self):
+    return self._joints_right
+
+  def _compute_metadata(self):
+    self._has_children = np.zeros(len(self._parents)).astype(bool)
+    for i, parent in enumerate(self._parents):
+      if parent != -1:
+        self._has_children[parent] = True
+
+    self._children = []
+    for i, parent in enumerate(self._parents):
+      self._children.append([])
+    for i, parent in enumerate(self._parents):
+      if parent != -1:
+        self._children[parent].append(i)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/transformation.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/transformation.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
@@ -3,21 +3,23 @@ import subprocess
 from pybullet_utils.arg_parser import ArgParser
 from pybullet_utils.logger import Logger

+
 def main():
-    # Command line arguments
-    args = sys.argv[1:]
-    arg_parser = ArgParser()
-    arg_parser.load_args(args)
+  # Command line arguments
+  args = sys.argv[1:]
+  arg_parser = ArgParser()
+  arg_parser.load_args(args)

-    num_workers = arg_parser.parse_int('num_workers', 1)
-    assert(num_workers > 0)
+  num_workers = arg_parser.parse_int('num_workers', 1)
+  assert (num_workers > 0)
+
+  Logger.print2('Running with {:d} workers'.format(num_workers))
+  cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
+  cmd += ' '.join(args)
+  Logger.print2('cmd: ' + cmd)
+  subprocess.call(cmd, shell=True)
+  return

-    Logger.print2('Running with {:d} workers'.format(num_workers))
-    cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
-    cmd += ' '.join(args)
-    Logger.print2('cmd: ' + cmd)
-    subprocess.call(cmd, shell=True)
-    return

 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
@@ -2,8 +2,8 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-print("parentdir=",parentdir)
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)
 import json
 from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
 from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
@@ -15,81 +15,78 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim
 import sys
 import random

-update_timestep = 1./240.
+update_timestep = 1. / 240.
 animating = True

+
 def update_world(world, time_elapsed):
-    timeStep = update_timestep
-    world.update(timeStep)
-    reward = world.env.calc_reward(agent_id=0)
-    #print("reward=",reward)
-    end_episode = world.env.is_episode_end()
-    if (end_episode):
-      world.end_episode()
-      world.reset()
-    return
+  timeStep = update_timestep
+  world.update(timeStep)
+  reward = world.env.calc_reward(agent_id=0)
+  #print("reward=",reward)
+  end_episode = world.env.is_episode_end()
+  if (end_episode):
+    world.end_episode()
+    world.reset()
+  return
+

 def build_arg_parser(args):
-    arg_parser = ArgParser()
-    arg_parser.load_args(args)
+  arg_parser = ArgParser()
+  arg_parser.load_args(args)
+
+  arg_file = arg_parser.parse_string('arg_file', '')
+  if (arg_file != ''):
+    path = pybullet_data.getDataPath() + "/args/" + arg_file
+    succ = arg_parser.load_file(path)
+    Logger.print2(arg_file)
+    assert succ, Logger.print2('Failed to load args from: ' + arg_file)
+  return arg_parser

-    arg_file = arg_parser.parse_string('arg_file', '')
-    if (arg_file != ''):
-        path = pybullet_data.getDataPath()+"/args/"+arg_file
-        succ = arg_parser.load_file(path)
-        Logger.print2(arg_file)
-        assert succ, Logger.print2('Failed to load args from: ' + arg_file)
-    return arg_parser

 args = sys.argv[1:]


-
 def build_world(args, enable_draw):
-    arg_parser = build_arg_parser(args)
-    print("enable_draw=",enable_draw)
-    env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
-    world = RLWorld(env, arg_parser)
-    #world.env.set_playback_speed(playback_speed)
+  arg_parser = build_arg_parser(args)
+  print("enable_draw=", enable_draw)
+  env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
+  world = RLWorld(env, arg_parser)
+  #world.env.set_playback_speed(playback_speed)

-    motion_file = arg_parser.parse_string("motion_file")
-    print("motion_file=",motion_file)
-    bodies = arg_parser.parse_ints("fall_contact_bodies")
-    print("bodies=",bodies)
-    int_output_path = arg_parser.parse_string("int_output_path")
-    print("int_output_path=",int_output_path)
-    agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
+  motion_file = arg_parser.parse_string("motion_file")
+  print("motion_file=", motion_file)
+  bodies = arg_parser.parse_ints("fall_contact_bodies")
+  print("bodies=", bodies)
+  int_output_path = arg_parser.parse_string("int_output_path")
+  print("int_output_path=", int_output_path)
+  agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")

-    AGENT_TYPE_KEY = "AgentType"
+  AGENT_TYPE_KEY = "AgentType"

-    print("agent_file=",agent_files)
-    with open(agent_files) as data_file:
-        json_data = json.load(data_file)
-        print("json_data=",json_data)
-        assert AGENT_TYPE_KEY in json_data
-        agent_type = json_data[AGENT_TYPE_KEY]
-        print("agent_type=",agent_type)
-        agent = PPOAgent(world, id, json_data)
-    
-        agent.set_enable_training(False)
-        world.reset()
-    return world    
+  print("agent_file=", agent_files)
+  with open(agent_files) as data_file:
+    json_data = json.load(data_file)
+    print("json_data=", json_data)
+    assert AGENT_TYPE_KEY in json_data
+    agent_type = json_data[AGENT_TYPE_KEY]
+    print("agent_type=", agent_type)
+    agent = PPOAgent(world, id, json_data)
+
+    agent.set_enable_training(False)
+    world.reset()
+  return world


-
-  
-    
 if __name__ == '__main__':
-    
-    world = build_world(args, True)
-    while (world.env._pybullet_client.isConnected()):
-      timeStep = update_timestep
-      keys = world.env.getKeyboardEvents()
-      
-        
-      if world.env.isKeyTriggered(keys, ' '):
-        animating = not animating
-      if (animating):
-        update_world(world, timeStep)
-        #animating=False

+  world = build_world(args, True)
+  while (world.env._pybullet_client.isConnected()):
+    timeStep = update_timestep
+    keys = world.env.getKeyboardEvents()
+
+    if world.env.isKeyTriggered(keys, ' '):
+      animating = not animating
+    if (animating):
+      update_world(world, timeStep)
+      #animating=False