add yapf style and apply yapf to format all Python files

This recreates pull request #2192
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions
--- a/examples/pybullet/gym/pybullet_envs/ARS/ars.py
+++ b/examples/pybullet/gym/pybullet_envs/ARS/ars.py
@@ -4,7 +4,7 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 # Importing the libraries
 import os
@@ -20,17 +20,17 @@ import argparse

 # Setting the Hyper Parameters
 class Hp():
-    
-    def __init__(self):
-        self.nb_steps = 10000
-        self.episode_length = 1000
-        self.learning_rate = 0.02
-        self.nb_directions = 16
-        self.nb_best_directions = 16
-        assert self.nb_best_directions <= self.nb_directions
-        self.noise = 0.03
-        self.seed = 1
-        self.env_name = 'HalfCheetahBulletEnv-v0'
+
+  def __init__(self):
+    self.nb_steps = 10000
+    self.episode_length = 1000
+    self.learning_rate = 0.02
+    self.nb_directions = 16
+    self.nb_best_directions = 16
+    assert self.nb_best_directions <= self.nb_directions
+    self.noise = 0.03
+    self.seed = 1
+    self.env_name = 'HalfCheetahBulletEnv-v0'


 # Multiprocess Exploring the policy on one specific direction and over one episode
@@ -39,111 +39,37 @@ _RESET = 1
 _CLOSE = 2
 _EXPLORE = 3

-def ExploreWorker(rank,childPipe, envname, args):
-    env = gym.make(envname)
-    nb_inputs = env.observation_space.shape[0]
-    normalizer = Normalizer(nb_inputs)
-    observation_n = env.reset()
-    n=0
-    while True:
-      n+=1
-      try:
-        # Only block for short times to have keyboard exceptions be raised.
-        if not childPipe.poll(0.001):
-          continue
-        message, payload = childPipe.recv()
-      except (EOFError, KeyboardInterrupt):
-        break
-      if message == _RESET:
-        observation_n = env.reset()
-        childPipe.send(["reset ok"])
+
+def ExploreWorker(rank, childPipe, envname, args):
+  env = gym.make(envname)
+  nb_inputs = env.observation_space.shape[0]
+  normalizer = Normalizer(nb_inputs)
+  observation_n = env.reset()
+  n = 0
+  while True:
+    n += 1
+    try:
+      # Only block for short times to have keyboard exceptions be raised.
+      if not childPipe.poll(0.001):
        continue
-      if message == _EXPLORE:
-        #normalizer = payload[0] #use our local normalizer
-        policy = payload[1]
-        hp = payload[2]
-        direction = payload[3]
-        delta = payload[4]
-        state = env.reset()
-        done = False
-        num_plays = 0.
-        sum_rewards = 0
-        while not done and num_plays < hp.episode_length:
-            normalizer.observe(state)
-            state = normalizer.normalize(state)
-            action = policy.evaluate(state, delta, direction,hp)
-            state, reward, done, _ = env.step(action)
-            reward = max(min(reward, 1), -1)
-            sum_rewards += reward
-            num_plays += 1
-        childPipe.send([sum_rewards])
-        continue
-      if message == _CLOSE:
-        childPipe.send(["close ok"])
-        break
-    childPipe.close()
-        
-
-# Normalizing the states
-
-class Normalizer():
-    
-    def __init__(self, nb_inputs):
-        self.n = np.zeros(nb_inputs)
-        self.mean = np.zeros(nb_inputs)
-        self.mean_diff = np.zeros(nb_inputs)
-        self.var = np.zeros(nb_inputs)
-    
-    def observe(self, x):
-        self.n += 1.
-        last_mean = self.mean.copy()
-        self.mean += (x - self.mean) / self.n
-        self.mean_diff += (x - last_mean) * (x - self.mean)
-        self.var = (self.mean_diff / self.n).clip(min = 1e-2)
-    
-    def normalize(self, inputs):
-        obs_mean = self.mean
-        obs_std = np.sqrt(self.var)
-        return (inputs - obs_mean) / obs_std
-
-# Building the AI
-
-class Policy():
-    def __init__(self, input_size, output_size, env_name, args):
-        try:
-          self.theta = np.load(args.policy)
-        except:
-          self.theta = np.zeros((output_size, input_size))
-        self.env_name = env_name
-        print("Starting policy theta=",self.theta)
-    def evaluate(self, input, delta, direction, hp):
-        if direction is None:
-            return np.clip(self.theta.dot(input), -1.0, 1.0)
-        elif direction == "positive":
-            return np.clip((self.theta + hp.noise*delta).dot(input), -1.0, 1.0)
-        else:
-            return np.clip((self.theta - hp.noise*delta).dot(input), -1.0, 1.0)
-    
-    def sample_deltas(self):
-        return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
-    
-    def update(self, rollouts, sigma_r, args):
-        step = np.zeros(self.theta.shape)
-        for r_pos, r_neg, d in rollouts:
-            step += (r_pos - r_neg) * d
-        self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
-        timestr = time.strftime("%Y%m%d-%H%M%S")
-        np.save(args.logdir+"/policy_"+self.env_name+"_"+timestr+".npy", self.theta)
-
-
-# Exploring the policy on one specific direction and over one episode
-
-def explore(env, normalizer, policy, direction, delta, hp):
-    state = env.reset()
-    done = False
-    num_plays = 0.
-    sum_rewards = 0
-    while not done and num_plays < hp.episode_length:
+      message, payload = childPipe.recv()
+    except (EOFError, KeyboardInterrupt):
+      break
+    if message == _RESET:
+      observation_n = env.reset()
+      childPipe.send(["reset ok"])
+      continue
+    if message == _EXPLORE:
+      #normalizer = payload[0] #use our local normalizer
+      policy = payload[1]
+      hp = payload[2]
+      direction = payload[3]
+      delta = payload[4]
+      state = env.reset()
+      done = False
+      num_plays = 0.
+      sum_rewards = 0
+      while not done and num_plays < hp.episode_length:
        normalizer.observe(state)
        state = normalizer.normalize(state)
        action = policy.evaluate(state, delta, direction, hp)
@@ -151,127 +77,217 @@ def explore(env, normalizer, policy, direction, delta, hp):
        reward = max(min(reward, 1), -1)
        sum_rewards += reward
        num_plays += 1
-    return sum_rewards
+      childPipe.send([sum_rewards])
+      continue
+    if message == _CLOSE:
+      childPipe.send(["close ok"])
+      break
+  childPipe.close()
+
+
+# Normalizing the states
+
+
+class Normalizer():
+
+  def __init__(self, nb_inputs):
+    self.n = np.zeros(nb_inputs)
+    self.mean = np.zeros(nb_inputs)
+    self.mean_diff = np.zeros(nb_inputs)
+    self.var = np.zeros(nb_inputs)
+
+  def observe(self, x):
+    self.n += 1.
+    last_mean = self.mean.copy()
+    self.mean += (x - self.mean) / self.n
+    self.mean_diff += (x - last_mean) * (x - self.mean)
+    self.var = (self.mean_diff / self.n).clip(min=1e-2)
+
+  def normalize(self, inputs):
+    obs_mean = self.mean
+    obs_std = np.sqrt(self.var)
+    return (inputs - obs_mean) / obs_std
+
+
+# Building the AI
+
+
+class Policy():
+
+  def __init__(self, input_size, output_size, env_name, args):
+    try:
+      self.theta = np.load(args.policy)
+    except:
+      self.theta = np.zeros((output_size, input_size))
+    self.env_name = env_name
+    print("Starting policy theta=", self.theta)
+
+  def evaluate(self, input, delta, direction, hp):
+    if direction is None:
+      return np.clip(self.theta.dot(input), -1.0, 1.0)
+    elif direction == "positive":
+      return np.clip((self.theta + hp.noise * delta).dot(input), -1.0, 1.0)
+    else:
+      return np.clip((self.theta - hp.noise * delta).dot(input), -1.0, 1.0)
+
+  def sample_deltas(self):
+    return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
+
+  def update(self, rollouts, sigma_r, args):
+    step = np.zeros(self.theta.shape)
+    for r_pos, r_neg, d in rollouts:
+      step += (r_pos - r_neg) * d
+    self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
+    timestr = time.strftime("%Y%m%d-%H%M%S")
+    np.save(args.logdir + "/policy_" + self.env_name + "_" + timestr + ".npy", self.theta)
+
+
+# Exploring the policy on one specific direction and over one episode
+
+
+def explore(env, normalizer, policy, direction, delta, hp):
+  state = env.reset()
+  done = False
+  num_plays = 0.
+  sum_rewards = 0
+  while not done and num_plays < hp.episode_length:
+    normalizer.observe(state)
+    state = normalizer.normalize(state)
+    action = policy.evaluate(state, delta, direction, hp)
+    state, reward, done, _ = env.step(action)
+    reward = max(min(reward, 1), -1)
+    sum_rewards += reward
+    num_plays += 1
+  return sum_rewards
+

 # Training the AI

+
 def train(env, policy, normalizer, hp, parentPipes, args):
-    
-    for step in range(hp.nb_steps):
-        
-        # Initializing the perturbations deltas and the positive/negative rewards
-        deltas = policy.sample_deltas()
-        positive_rewards = [0] * hp.nb_directions
-        negative_rewards = [0] * hp.nb_directions
-        
-        if parentPipes:
-          for k in range(hp.nb_directions):
-            parentPipe = parentPipes[k]
-            parentPipe.send([_EXPLORE,[normalizer, policy, hp, "positive", deltas[k]]])
-          for k in range(hp.nb_directions):
-            positive_rewards[k] = parentPipes[k].recv()[0]
-          
-          for k in range(hp.nb_directions):
-            parentPipe = parentPipes[k]
-            parentPipe.send([_EXPLORE,[normalizer, policy, hp, "negative", deltas[k]]])
-          for k in range(hp.nb_directions):
-            negative_rewards[k] = parentPipes[k].recv()[0]
-          
-        else:
-          # Getting the positive rewards in the positive directions
-          for k in range(hp.nb_directions):
-              positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
-        
-          
-          # Getting the negative rewards in the negative/opposite directions
-          for k in range(hp.nb_directions):
-              negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
-            
-        
-        # Gathering all the positive/negative rewards to compute the standard deviation of these rewards
-        all_rewards = np.array(positive_rewards + negative_rewards)
-        sigma_r = all_rewards.std()
-        
-        # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
-        scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))}
-        order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions]
-        rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
-        
-        # Updating our policy
-        policy.update(rollouts, sigma_r, args)
-        
-        # Printing the final reward of the policy after the update
-        reward_evaluation = explore(env, normalizer, policy, None, None, hp)
-        print('Step:', step, 'Reward:', reward_evaluation)
+
+  for step in range(hp.nb_steps):
+
+    # Initializing the perturbations deltas and the positive/negative rewards
+    deltas = policy.sample_deltas()
+    positive_rewards = [0] * hp.nb_directions
+    negative_rewards = [0] * hp.nb_directions
+
+    if parentPipes:
+      for k in range(hp.nb_directions):
+        parentPipe = parentPipes[k]
+        parentPipe.send([_EXPLORE, [normalizer, policy, hp, "positive", deltas[k]]])
+      for k in range(hp.nb_directions):
+        positive_rewards[k] = parentPipes[k].recv()[0]
+
+      for k in range(hp.nb_directions):
+        parentPipe = parentPipes[k]
+        parentPipe.send([_EXPLORE, [normalizer, policy, hp, "negative", deltas[k]]])
+      for k in range(hp.nb_directions):
+        negative_rewards[k] = parentPipes[k].recv()[0]
+
+    else:
+      # Getting the positive rewards in the positive directions
+      for k in range(hp.nb_directions):
+        positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
+
+      # Getting the negative rewards in the negative/opposite directions
+      for k in range(hp.nb_directions):
+        negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
+
+    # Gathering all the positive/negative rewards to compute the standard deviation of these rewards
+    all_rewards = np.array(positive_rewards + negative_rewards)
+    sigma_r = all_rewards.std()
+
+    # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
+    scores = {
+        k: max(r_pos, r_neg)
+        for k, (r_pos, r_neg) in enumerate(zip(positive_rewards, negative_rewards))
+    }
+    order = sorted(scores.keys(), key=lambda x: scores[x])[:hp.nb_best_directions]
+    rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
+
+    # Updating our policy
+    policy.update(rollouts, sigma_r, args)
+
+    # Printing the final reward of the policy after the update
+    reward_evaluation = explore(env, normalizer, policy, None, None, hp)
+    print('Step:', step, 'Reward:', reward_evaluation)
+

 # Running the main code

+
 def mkdir(base, name):
-    path = os.path.join(base, name)
-    if not os.path.exists(path):
-        os.makedirs(path)
-    return path
-
-
+  path = os.path.join(base, name)
+  if not os.path.exists(path):
+    os.makedirs(path)
+  return path


 if __name__ == "__main__":
-    mp.freeze_support()
+  mp.freeze_support()

-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--env', help='Gym environment name', type=str, default='HalfCheetahBulletEnv-v0')
-    parser.add_argument('--seed', help='RNG seed', type=int, default=1)
-    parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
-    parser.add_argument('--movie',help='rgb_array gym movie',type=int, default=0)
-    parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
-    parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
-    parser.add_argument('--logdir', help='Directory root to log policy files (npy)', type=str, default='.')
-    parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
-        
-    args = parser.parse_args()
-        
-    hp = Hp()
-    hp.env_name = args.env
-    hp.seed = args.seed
-    hp.nb_steps = args.steps
-    print("seed = ", hp.seed)
-    np.random.seed(hp.seed)
+  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument('--env',
+                      help='Gym environment name',
+                      type=str,
+                      default='HalfCheetahBulletEnv-v0')
+  parser.add_argument('--seed', help='RNG seed', type=int, default=1)
+  parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
+  parser.add_argument('--movie', help='rgb_array gym movie', type=int, default=0)
+  parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
+  parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
+  parser.add_argument('--logdir',
+                      help='Directory root to log policy files (npy)',
+                      type=str,
+                      default='.')
+  parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)

-    parentPipes = None
-    if args.mp:
-      num_processes = hp.nb_directions
-      processes = []
-      childPipes = []
-      parentPipes = []
-      
-      for pr in range (num_processes):
-        parentPipe, childPipe = Pipe()
-        parentPipes.append(parentPipe)
-        childPipes.append(childPipe)
-      
-      for rank in range(num_processes):
-          p = mp.Process(target=ExploreWorker, args=(rank,childPipes[rank], hp.env_name, args))
-          p.start()
-          processes.append(p)
-        
-    work_dir = mkdir('exp', 'brs')
-    monitor_dir = mkdir(work_dir, 'monitor')
-    env = gym.make(hp.env_name)
-    if args.render:
-      env.render(mode = "human")
-    if args.movie:
-      env = wrappers.Monitor(env, monitor_dir, force = True)
-    nb_inputs = env.observation_space.shape[0]
-    nb_outputs = env.action_space.shape[0]
-    policy = Policy(nb_inputs, nb_outputs,hp.env_name, args)
-    normalizer = Normalizer(nb_inputs)
-    
-    print("start training")
-    train(env, policy, normalizer, hp, parentPipes, args)
+  args = parser.parse_args()

-    if args.mp:
-      for parentPipe in parentPipes:
-        parentPipe.send([_CLOSE,"pay2"])
-      
-      for p in processes:
-        p.join()
+  hp = Hp()
+  hp.env_name = args.env
+  hp.seed = args.seed
+  hp.nb_steps = args.steps
+  print("seed = ", hp.seed)
+  np.random.seed(hp.seed)
+
+  parentPipes = None
+  if args.mp:
+    num_processes = hp.nb_directions
+    processes = []
+    childPipes = []
+    parentPipes = []
+
+    for pr in range(num_processes):
+      parentPipe, childPipe = Pipe()
+      parentPipes.append(parentPipe)
+      childPipes.append(childPipe)
+
+    for rank in range(num_processes):
+      p = mp.Process(target=ExploreWorker, args=(rank, childPipes[rank], hp.env_name, args))
+      p.start()
+      processes.append(p)
+
+  work_dir = mkdir('exp', 'brs')
+  monitor_dir = mkdir(work_dir, 'monitor')
+  env = gym.make(hp.env_name)
+  if args.render:
+    env.render(mode="human")
+  if args.movie:
+    env = wrappers.Monitor(env, monitor_dir, force=True)
+  nb_inputs = env.observation_space.shape[0]
+  nb_outputs = env.action_space.shape[0]
+  policy = Policy(nb_inputs, nb_outputs, hp.env_name, args)
+  normalizer = Normalizer(nb_inputs)
+
+  print("start training")
+  train(env, policy, normalizer, hp, parentPipes, args)
+
+  if args.mp:
+    for parentPipe in parentPipes:
+      parentPipe.send([_CLOSE, "pay2"])
+
+    for p in processes:
+      p.join()
--- a/examples/pybullet/gym/pybullet_envs/init.py
+++ b/examples/pybullet/gym/pybullet_envs/init.py
@@ -1,25 +1,28 @@
 import gym
 from gym.envs.registration import registry, make, spec
-def register(id,*args,**kvargs):
-	if id in registry.env_specs:
-		return
-	else:
-		return gym.envs.registration.register(id,*args,**kvargs)
+
+
+def register(id, *args, **kvargs):
+  if id in registry.env_specs:
+    return
+  else:
+    return gym.envs.registration.register(id, *args, **kvargs)
+

 # ------------bullet-------------

 register(
-        id='HumanoidDeepMimicBulletEnv-v1',
-        entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
-        max_episode_steps=1000,
-        reward_threshold=20000.0,
+    id='HumanoidDeepMimicBulletEnv-v1',
+    entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
+    max_episode_steps=1000,
+    reward_threshold=20000.0,
 )

 register(
-	id='CartPoleBulletEnv-v1',
-	entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
-        max_episode_steps=200,
-	reward_threshold=190.0,
+    id='CartPoleBulletEnv-v1',
+    entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
+    max_episode_steps=200,
+    reward_threshold=190.0,
 )

 register(
@@ -36,7 +39,6 @@ register(
    reward_threshold=5.0,
 )

-
 register(
    id='MinitaurReactiveEnv-v0',
    entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
@@ -44,7 +46,6 @@ register(
    reward_threshold=5.0,
 )

-
 register(
    id='MinitaurBallGymEnv-v0',
    entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv',
@@ -52,7 +53,6 @@ register(
    reward_threshold=5.0,
 )

-
 register(
    id='MinitaurTrottingEnv-v0',
    entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv',
@@ -81,8 +81,6 @@ register(
    reward_threshold=5.0,
 )

-
-
 register(
    id='RacecarBulletEnv-v0',
    entry_point='pybullet_envs.bullet:RacecarGymEnv',
@@ -91,128 +89,113 @@ register(
 )

 register(
-	id='RacecarZedBulletEnv-v0',
-	entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
-	max_episode_steps=1000,
-	reward_threshold=5.0,
-)
-
-
-register(
-	id='KukaBulletEnv-v0',
-	entry_point='pybullet_envs.bullet:KukaGymEnv',
-	max_episode_steps=1000,
-	reward_threshold=5.0,
+    id='RacecarZedBulletEnv-v0',
+    entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
+    max_episode_steps=1000,
+    reward_threshold=5.0,
 )

 register(
-	id='KukaCamBulletEnv-v0',
-	entry_point='pybullet_envs.bullet:KukaCamGymEnv',
-	max_episode_steps=1000,
-	reward_threshold=5.0,
+    id='KukaBulletEnv-v0',
+    entry_point='pybullet_envs.bullet:KukaGymEnv',
+    max_episode_steps=1000,
+    reward_threshold=5.0,
 )

 register(
-	id='KukaDiverseObjectGrasping-v0',
-	entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
-	max_episode_steps=1000,
-	reward_threshold=5.0,
+    id='KukaCamBulletEnv-v0',
+    entry_point='pybullet_envs.bullet:KukaCamGymEnv',
+    max_episode_steps=1000,
+    reward_threshold=5.0,
 )

 register(
-	id='InvertedPendulumBulletEnv-v0',
-	entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=950.0,
-	)
-
-register(
-	id='InvertedDoublePendulumBulletEnv-v0',
-	entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=9100.0,
-	)
-
-register(
-	id='InvertedPendulumSwingupBulletEnv-v0',
-	entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=800.0,
-	)
-
-register(
-	id='ReacherBulletEnv-v0',
-	entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
-	max_episode_steps=150,
-	reward_threshold=18.0,
-	)
-
-register(
-	id='PusherBulletEnv-v0',
-	entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
-	max_episode_steps=150,
-	reward_threshold=18.0,
+    id='KukaDiverseObjectGrasping-v0',
+    entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
+    max_episode_steps=1000,
+    reward_threshold=5.0,
 )

 register(
-	id='ThrowerBulletEnv-v0',
-	entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
-	max_episode_steps=100,
-	reward_threshold=18.0,
+    id='InvertedPendulumBulletEnv-v0',
+    entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
+    max_episode_steps=1000,
+    reward_threshold=950.0,
 )

 register(
-	id='StrikerBulletEnv-v0',
-	entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
-	max_episode_steps=100,
-	reward_threshold=18.0,
+    id='InvertedDoublePendulumBulletEnv-v0',
+    entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
+    max_episode_steps=1000,
+    reward_threshold=9100.0,
 )

 register(
-	id='Walker2DBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=2500.0
-	)
-register(
-	id='HalfCheetahBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=3000.0
-	)
+    id='InvertedPendulumSwingupBulletEnv-v0',
+    entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
+    max_episode_steps=1000,
+    reward_threshold=800.0,
+)

 register(
-	id='AntBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=2500.0
-	)
+    id='ReacherBulletEnv-v0',
+    entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
+    max_episode_steps=150,
+    reward_threshold=18.0,
+)

 register(
-	id='HopperBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=2500.0
-	)
+    id='PusherBulletEnv-v0',
+    entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
+    max_episode_steps=150,
+    reward_threshold=18.0,
+)

 register(
-	id='HumanoidBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
-	max_episode_steps=1000
-	)
+    id='ThrowerBulletEnv-v0',
+    entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
+    max_episode_steps=100,
+    reward_threshold=18.0,
+)

 register(
-	id='HumanoidFlagrunBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=2000.0
-	)
+    id='StrikerBulletEnv-v0',
+    entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
+    max_episode_steps=100,
+    reward_threshold=18.0,
+)

-register(
-	id='HumanoidFlagrunHarderBulletEnv-v0',
-	entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
-	max_episode_steps=1000
-	)
+register(id='Walker2DBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
+         max_episode_steps=1000,
+         reward_threshold=2500.0)
+register(id='HalfCheetahBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
+         max_episode_steps=1000,
+         reward_threshold=3000.0)
+
+register(id='AntBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
+         max_episode_steps=1000,
+         reward_threshold=2500.0)
+
+register(id='HopperBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
+         max_episode_steps=1000,
+         reward_threshold=2500.0)
+
+register(id='HumanoidBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
+         max_episode_steps=1000)
+
+register(id='HumanoidFlagrunBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
+         max_episode_steps=1000,
+         reward_threshold=2000.0)
+
+register(id='HumanoidFlagrunHarderBulletEnv-v0',
+         entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
+         max_episode_steps=1000)

 #register(
 #	id='AtlasBulletEnv-v0',
@@ -220,6 +203,7 @@ register(
 #	max_episode_steps=1000
 #	)

+
 def getList():
-	btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet')>=0]
-	return btenvs
+  btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet') >= 0]
+  return btenvs
--- a/examples/pybullet/gym/pybullet_envs/agents/init.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/init.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Executable scripts for reinforcement learning."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Example configurations using the PPO algorithm."""

 from __future__ import absolute_import
@@ -29,6 +28,7 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
 import pybullet_envs
 import tensorflow as tf

+
 def default():
  """Default configuration for PPO."""
  # General
@@ -38,10 +38,7 @@ def default():
  use_gpu = False
  # Network
  network = networks.feed_forward_gaussian
-  weight_summaries = dict(
-      all=r'.*',
-      policy=r'.*/policy/.*',
-      value=r'.*/value/.*')
+  weight_summaries = dict(all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
  policy_layers = 200, 100
  value_layers = 200, 100
  init_mean_factor = 0.1
@@ -52,7 +49,7 @@ def default():
  optimizer = tf.train.AdamOptimizer
  update_epochs_policy = 64
  update_epochs_value = 64
-  learning_rate = 1e-4  
+  learning_rate = 1e-4
  # Losses
  discount = 0.995
  kl_target = 1e-2
@@ -69,6 +66,7 @@ def pybullet_pendulum():
  steps = 5e7  # 50M
  return locals()

+
 def pybullet_doublependulum():
  locals().update(default())
  env = 'InvertedDoublePendulumBulletEnv-v0'
@@ -76,6 +74,7 @@ def pybullet_doublependulum():
  steps = 5e7  # 50M
  return locals()

+
 def pybullet_pendulumswingup():
  locals().update(default())
  env = 'InvertedPendulumSwingupBulletEnv-v0'
@@ -83,6 +82,7 @@ def pybullet_pendulumswingup():
  steps = 5e7  # 50M
  return locals()

+
 def pybullet_cheetah():
  """Configuration for MuJoCo's half cheetah task."""
  locals().update(default())
@@ -92,6 +92,7 @@ def pybullet_cheetah():
  steps = 1e8  # 100M
  return locals()

+
 def pybullet_ant():
  locals().update(default())
  env = 'AntBulletEnv-v0'
@@ -99,6 +100,7 @@ def pybullet_ant():
  steps = 5e7  # 50M
  return locals()

+
 def pybullet_kuka_grasping():
  """Configuration for Bullet Kuka grasping task."""
  locals().update(default())
@@ -113,7 +115,7 @@ def pybullet_racecar():
  """Configuration for Bullet MIT Racecar task."""
  locals().update(default())
  # Environment
-  env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
+  env = 'RacecarBulletEnv-v0'  #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
  max_length = 10
  steps = 1e7  # 10M
  return locals()
@@ -132,29 +134,27 @@ def pybullet_minitaur():
  """Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
  locals().update(default())
  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
-  env = functools.partial(
-      minitaur_gym_env.MinitaurBulletEnv,
-      accurate_motor_model_enabled=True,
-      motor_overheat_protection=True,
-      pd_control_enabled=True,
-      env_randomizer=randomizer,
-      render=False)
+  env = functools.partial(minitaur_gym_env.MinitaurBulletEnv,
+                          accurate_motor_model_enabled=True,
+                          motor_overheat_protection=True,
+                          pd_control_enabled=True,
+                          env_randomizer=randomizer,
+                          render=False)
  max_length = 1000
  steps = 3e7  # 30M
  return locals()

+
 def pybullet_duck_minitaur():
  """Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class."""
  locals().update(default())
  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
-  env = functools.partial(
-      minitaur_gym_env.MinitaurBulletDuckEnv,
-      accurate_motor_model_enabled=True,
-      motor_overheat_protection=True,
-      pd_control_enabled=True,
-      env_randomizer=randomizer,
-      render=False)
+  env = functools.partial(minitaur_gym_env.MinitaurBulletDuckEnv,
+                          accurate_motor_model_enabled=True,
+                          motor_overheat_protection=True,
+                          pd_control_enabled=True,
+                          env_randomizer=randomizer,
+                          render=False)
  max_length = 1000
  steps = 3e7  # 30M
  return locals()
-
--- a/examples/pybullet/gym/pybullet_envs/agents/networks.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/networks.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Network definitions for the PPO algorithm."""

 from __future__ import absolute_import
@@ -24,13 +23,10 @@ import operator

 import tensorflow as tf

-
-NetworkOutput = collections.namedtuple(
-    'NetworkOutput', 'policy, mean, logstd, value, state')
+NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')


-def feed_forward_gaussian(
-    config, action_size, observations, unused_length, state=None):
+def feed_forward_gaussian(config, action_size, observations, unused_length, state=None):
  """Independent feed forward networks for policy and value.

  The policy network outputs the mean action and the log standard deviation
@@ -50,20 +46,22 @@ def feed_forward_gaussian(
      factor=config.init_mean_factor)
  logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
  flat_observations = tf.reshape(observations, [
-      tf.shape(observations)[0], tf.shape(observations)[1],
-      functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
+      tf.shape(observations)[0],
+      tf.shape(observations)[1],
+      functools.reduce(operator.mul,
+                       observations.shape.as_list()[2:], 1)
+  ])
  with tf.variable_scope('policy'):
    x = flat_observations
    for size in config.policy_layers:
      x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
-    mean = tf.contrib.layers.fully_connected(
-        x, action_size, tf.tanh,
-        weights_initializer=mean_weights_initializer)
-    logstd = tf.get_variable(
-        'logstd', mean.shape[2:], tf.float32, logstd_initializer)
-    logstd = tf.tile(
-        logstd[None, None],
-        [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
+    mean = tf.contrib.layers.fully_connected(x,
+                                             action_size,
+                                             tf.tanh,
+                                             weights_initializer=mean_weights_initializer)
+    logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
+    logstd = tf.tile(logstd[None, None],
+                     [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
  with tf.variable_scope('value'):
    x = flat_observations
    for size in config.value_layers:
@@ -72,13 +70,11 @@ def feed_forward_gaussian(
  mean = tf.check_numerics(mean, 'mean')
  logstd = tf.check_numerics(logstd, 'logstd')
  value = tf.check_numerics(value, 'value')
-  policy = tf.contrib.distributions.MultivariateNormalDiag(
-      mean, tf.exp(logstd))
+  policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
  return NetworkOutput(policy, mean, logstd, value, state)


-def recurrent_gaussian(
-    config, action_size, observations, length, state=None):
+def recurrent_gaussian(config, action_size, observations, length, state=None):
  """Independent recurrent policy and feed forward value networks.

  The policy network outputs the mean action and the log standard deviation
@@ -100,21 +96,23 @@ def recurrent_gaussian(
  logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
  cell = tf.contrib.rnn.GRUBlockCell(config.policy_layers[-1])
  flat_observations = tf.reshape(observations, [
-      tf.shape(observations)[0], tf.shape(observations)[1],
-      functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
+      tf.shape(observations)[0],
+      tf.shape(observations)[1],
+      functools.reduce(operator.mul,
+                       observations.shape.as_list()[2:], 1)
+  ])
  with tf.variable_scope('policy'):
    x = flat_observations
    for size in config.policy_layers[:-1]:
      x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
    x, state = tf.nn.dynamic_rnn(cell, x, length, state, tf.float32)
-    mean = tf.contrib.layers.fully_connected(
-        x, action_size, tf.tanh,
-        weights_initializer=mean_weights_initializer)
-    logstd = tf.get_variable(
-        'logstd', mean.shape[2:], tf.float32, logstd_initializer)
-    logstd = tf.tile(
-        logstd[None, None],
-        [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
+    mean = tf.contrib.layers.fully_connected(x,
+                                             action_size,
+                                             tf.tanh,
+                                             weights_initializer=mean_weights_initializer)
+    logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
+    logstd = tf.tile(logstd[None, None],
+                     [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
  with tf.variable_scope('value'):
    x = flat_observations
    for size in config.value_layers:
@@ -123,7 +121,6 @@ def recurrent_gaussian(
  mean = tf.check_numerics(mean, 'mean')
  logstd = tf.check_numerics(logstd, 'logstd')
  value = tf.check_numerics(value, 'value')
-  policy = tf.contrib.distributions.MultivariateNormalDiag(
-      mean, tf.exp(logstd))
+  policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
  # assert state.shape.as_list()[0] is not None
  return NetworkOutput(policy, mean, logstd, value, state)
--- a/examples/pybullet/gym/pybullet_envs/agents/ppo/init.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/ppo/init.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Proximal Policy Optimization algorithm."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/ppo/algorithm.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/ppo/algorithm.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Proximal Policy Optimization algorithm.

 Based on John Schulman's implementation in Python and Theano:
@@ -49,51 +48,51 @@ class PPOAlgorithm(object):
    self._is_training = is_training
    self._should_log = should_log
    self._config = config
-    self._observ_filter = normalize.StreamingNormalize(
-        self._batch_env.observ[0], center=True, scale=True, clip=5,
-        name='normalize_observ')
-    self._reward_filter = normalize.StreamingNormalize(
-        self._batch_env.reward[0], center=False, scale=True, clip=10,
-        name='normalize_reward')
+    self._observ_filter = normalize.StreamingNormalize(self._batch_env.observ[0],
+                                                       center=True,
+                                                       scale=True,
+                                                       clip=5,
+                                                       name='normalize_observ')
+    self._reward_filter = normalize.StreamingNormalize(self._batch_env.reward[0],
+                                                       center=False,
+                                                       scale=True,
+                                                       clip=10,
+                                                       name='normalize_reward')
    # Memory stores tuple of observ, action, mean, logstd, reward.
-    template = (
-        self._batch_env.observ[0], self._batch_env.action[0],
-        self._batch_env.action[0], self._batch_env.action[0],
-        self._batch_env.reward[0])
-    self._memory = memory.EpisodeMemory(
-        template, config.update_every, config.max_length, 'memory')
+    template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0],
+                self._batch_env.action[0], self._batch_env.reward[0])
+    self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory')
    self._memory_index = tf.Variable(0, False)
    use_gpu = self._config.use_gpu and utility.available_gpus()
    with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
      # Create network variables for later calls to reuse.
      action_size = self._batch_env.action.shape[1].value
-      self._network = tf.make_template(
-          'network', functools.partial(config.network, config, action_size))
+      self._network = tf.make_template('network',
+                                       functools.partial(config.network, config, action_size))
      output = self._network(
-          tf.zeros_like(self._batch_env.observ)[:, None],
-          tf.ones(len(self._batch_env)))
+          tf.zeros_like(self._batch_env.observ)[:, None], tf.ones(len(self._batch_env)))
      with tf.variable_scope('ppo_temporary'):
-        self._episodes = memory.EpisodeMemory(
-            template, len(batch_env), config.max_length, 'episodes')
+        self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length,
+                                              'episodes')
        if output.state is None:
          self._last_state = None
        else:
          # Ensure the batch dimension is set.
          tf.contrib.framework.nest.map_structure(
-              lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]),
-              output.state)
+              lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]), output.state)
          # pylint: disable=undefined-variable
          self._last_state = tf.contrib.framework.nest.map_structure(
-              lambda x: tf.Variable(lambda: tf.zeros_like(x), False),
-              output.state)
-        self._last_action = tf.Variable(
-            tf.zeros_like(self._batch_env.action), False, name='last_action')
-        self._last_mean = tf.Variable(
-            tf.zeros_like(self._batch_env.action), False, name='last_mean')
-        self._last_logstd = tf.Variable(
-            tf.zeros_like(self._batch_env.action), False, name='last_logstd')
-    self._penalty = tf.Variable(
-        self._config.kl_init_penalty, False, dtype=tf.float32)
+              lambda x: tf.Variable(lambda: tf.zeros_like(x), False), output.state)
+        self._last_action = tf.Variable(tf.zeros_like(self._batch_env.action),
+                                        False,
+                                        name='last_action')
+        self._last_mean = tf.Variable(tf.zeros_like(self._batch_env.action),
+                                      False,
+                                      name='last_mean')
+        self._last_logstd = tf.Variable(tf.zeros_like(self._batch_env.action),
+                                        False,
+                                        name='last_logstd')
+    self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32)
    self._optimizer = self._config.optimizer(self._config.learning_rate)

  def begin_episode(self, agent_indices):
@@ -109,8 +108,7 @@ class PPOAlgorithm(object):
      if self._last_state is None:
        reset_state = tf.no_op()
      else:
-        reset_state = utility.reinit_nested_vars(
-            self._last_state, agent_indices)
+        reset_state = utility.reinit_nested_vars(self._last_state, agent_indices)
      reset_buffer = self._episodes.clear(agent_indices)
      with tf.control_dependencies([reset_state, reset_buffer]):
        return tf.constant('')
@@ -130,36 +128,33 @@ class PPOAlgorithm(object):
      if self._last_state is None:
        state = None
      else:
-        state = tf.contrib.framework.nest.map_structure(
-            lambda x: tf.gather(x, agent_indices), self._last_state)
+        state = tf.contrib.framework.nest.map_structure(lambda x: tf.gather(x, agent_indices),
+                                                        self._last_state)
      output = self._network(observ[:, None], tf.ones(observ.shape[0]), state)
-      action = tf.cond(
-          self._is_training, output.policy.sample, lambda: output.mean)
+      action = tf.cond(self._is_training, output.policy.sample, lambda: output.mean)
      logprob = output.policy.log_prob(action)[:, 0]
      # pylint: disable=g-long-lambda
-      summary = tf.cond(self._should_log, lambda: tf.summary.merge([
-          tf.summary.histogram('mean', output.mean[:, 0]),
-          tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
-          tf.summary.histogram('action', action[:, 0]),
-          tf.summary.histogram('logprob', logprob)]), str)
+      summary = tf.cond(
+          self._should_log, lambda: tf.summary.merge([
+              tf.summary.histogram('mean', output.mean[:, 0]),
+              tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
+              tf.summary.histogram('action', action[:, 0]),
+              tf.summary.histogram('logprob', logprob)
+          ]), str)
      # Remember current policy to append to memory in the experience callback.
      if self._last_state is None:
        assign_state = tf.no_op()
      else:
-        assign_state = utility.assign_nested_vars(
-            self._last_state, output.state, agent_indices)
+        assign_state = utility.assign_nested_vars(self._last_state, output.state, agent_indices)
      with tf.control_dependencies([
          assign_state,
-          tf.scatter_update(
-              self._last_action, agent_indices, action[:, 0]),
-          tf.scatter_update(
-              self._last_mean, agent_indices, output.mean[:, 0]),
-          tf.scatter_update(
-              self._last_logstd, agent_indices, output.logstd[:, 0])]):
+          tf.scatter_update(self._last_action, agent_indices, action[:, 0]),
+          tf.scatter_update(self._last_mean, agent_indices, output.mean[:, 0]),
+          tf.scatter_update(self._last_logstd, agent_indices, output.logstd[:, 0])
+      ]):
        return tf.check_numerics(action[:, 0], 'action'), tf.identity(summary)

-  def experience(
-      self, agent_indices, observ, action, reward, unused_done, unused_nextob):
+  def experience(self, agent_indices, observ, action, reward, unused_done, unused_nextob):
    """Process the transition tuple of the current step.

    When training, add the current transition tuple to the memory and update
@@ -181,34 +176,36 @@ class PPOAlgorithm(object):
      return tf.cond(
          self._is_training,
          # pylint: disable=g-long-lambda
-          lambda: self._define_experience(
-              agent_indices, observ, action, reward), str)
+          lambda: self._define_experience(agent_indices, observ, action, reward),
+          str)

  def _define_experience(self, agent_indices, observ, action, reward):
    """Implement the branch of experience() entered during training."""
-    update_filters = tf.summary.merge([
-        self._observ_filter.update(observ),
-        self._reward_filter.update(reward)])
+    update_filters = tf.summary.merge(
+        [self._observ_filter.update(observ),
+         self._reward_filter.update(reward)])
    with tf.control_dependencies([update_filters]):
      if self._config.train_on_agent_action:
        # NOTE: Doesn't seem to change much.
        action = self._last_action
-      batch = (
-          observ, action, tf.gather(self._last_mean, agent_indices),
-          tf.gather(self._last_logstd, agent_indices), reward)
+      batch = (observ, action, tf.gather(self._last_mean,
+                                         agent_indices), tf.gather(self._last_logstd,
+                                                                   agent_indices), reward)
      append = self._episodes.append(batch, agent_indices)
    with tf.control_dependencies([append]):
      norm_observ = self._observ_filter.transform(observ)
      norm_reward = tf.reduce_mean(self._reward_filter.transform(reward))
      # pylint: disable=g-long-lambda
-      summary = tf.cond(self._should_log, lambda: tf.summary.merge([
-          update_filters,
-          self._observ_filter.summary(),
-          self._reward_filter.summary(),
-          tf.summary.scalar('memory_size', self._memory_index),
-          tf.summary.histogram('normalized_observ', norm_observ),
-          tf.summary.histogram('action', self._last_action),
-          tf.summary.scalar('normalized_reward', norm_reward)]), str)
+      summary = tf.cond(
+          self._should_log, lambda: tf.summary.merge([
+              update_filters,
+              self._observ_filter.summary(),
+              self._reward_filter.summary(),
+              tf.summary.scalar('memory_size', self._memory_index),
+              tf.summary.histogram('normalized_observ', norm_observ),
+              tf.summary.histogram('action', self._last_action),
+              tf.summary.scalar('normalized_reward', norm_reward)
+          ]), str)
      return summary

  def end_episode(self, agent_indices):
@@ -226,20 +223,16 @@ class PPOAlgorithm(object):
       Summary tensor.
    """
    with tf.name_scope('end_episode/'):
-      return tf.cond(
-          self._is_training,
-          lambda: self._define_end_episode(agent_indices), str)
+      return tf.cond(self._is_training, lambda: self._define_end_episode(agent_indices), str)

  def _define_end_episode(self, agent_indices):
    """Implement the branch of end_episode() entered during training."""
    episodes, length = self._episodes.data(agent_indices)
    space_left = self._config.update_every - self._memory_index
-    use_episodes = tf.range(tf.minimum(
-        tf.shape(agent_indices)[0], space_left))
+    use_episodes = tf.range(tf.minimum(tf.shape(agent_indices)[0], space_left))
    episodes = [tf.gather(elem, use_episodes) for elem in episodes]
-    append = self._memory.replace(
-        episodes, tf.gather(length, use_episodes),
-        use_episodes + self._memory_index)
+    append = self._memory.replace(episodes, tf.gather(length, use_episodes),
+                                  use_episodes + self._memory_index)
    with tf.control_dependencies([append]):
      inc_index = self._memory_index.assign_add(tf.shape(use_episodes)[0])
    with tf.control_dependencies([inc_index]):
@@ -256,8 +249,7 @@ class PPOAlgorithm(object):
      Summary tensor.
    """
    with tf.name_scope('training'):
-      assert_full = tf.assert_equal(
-          self._memory_index, self._config.update_every)
+      assert_full = tf.assert_equal(self._memory_index, self._config.update_every)
      with tf.control_dependencies([assert_full]):
        data = self._memory.data()
      (observ, action, old_mean, old_logstd, reward), length = data
@@ -265,22 +257,18 @@ class PPOAlgorithm(object):
        length = tf.identity(length)
      observ = self._observ_filter.transform(observ)
      reward = self._reward_filter.transform(reward)
-      update_summary = self._perform_update_steps(
-          observ, action, old_mean, old_logstd, reward, length)
+      update_summary = self._perform_update_steps(observ, action, old_mean, old_logstd, reward,
+                                                  length)
      with tf.control_dependencies([update_summary]):
-        penalty_summary = self._adjust_penalty(
-            observ, old_mean, old_logstd, length)
+        penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length)
      with tf.control_dependencies([penalty_summary]):
-        clear_memory = tf.group(
-            self._memory.clear(), self._memory_index.assign(0))
+        clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0))
      with tf.control_dependencies([clear_memory]):
-        weight_summary = utility.variable_summaries(
-            tf.trainable_variables(), self._config.weight_summaries)
-        return tf.summary.merge([
-            update_summary, penalty_summary, weight_summary])
+        weight_summary = utility.variable_summaries(tf.trainable_variables(),
+                                                    self._config.weight_summaries)
+        return tf.summary.merge([update_summary, penalty_summary, weight_summary])

-  def _perform_update_steps(
-      self, observ, action, old_mean, old_logstd, reward, length):
+  def _perform_update_steps(self, observ, action, old_mean, old_logstd, reward, length):
    """Perform multiple update steps of value function and policy.

    The advantage is computed once at the beginning and shared across
@@ -298,37 +286,29 @@ class PPOAlgorithm(object):
    Returns:
      Summary tensor.
    """
-    return_ = utility.discounted_return(
-        reward, length, self._config.discount)
+    return_ = utility.discounted_return(reward, length, self._config.discount)
    value = self._network(observ, length).value
    if self._config.gae_lambda:
-      advantage = utility.lambda_return(
-          reward, value, length, self._config.discount,
-          self._config.gae_lambda)
+      advantage = utility.lambda_return(reward, value, length, self._config.discount,
+                                        self._config.gae_lambda)
    else:
      advantage = return_ - value
    mean, variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True)
    advantage = (advantage - mean) / (tf.sqrt(variance) + 1e-8)
-    advantage = tf.Print(
-        advantage, [tf.reduce_mean(return_), tf.reduce_mean(value)],
-        'return and value: ')
-    advantage = tf.Print(
-        advantage, [tf.reduce_mean(advantage)],
-        'normalized advantage: ')
+    advantage = tf.Print(advantage,
+                         [tf.reduce_mean(return_), tf.reduce_mean(value)], 'return and value: ')
+    advantage = tf.Print(advantage, [tf.reduce_mean(advantage)], 'normalized advantage: ')
    # pylint: disable=g-long-lambda
-    value_loss, policy_loss, summary = tf.scan(
-        lambda _1, _2: self._update_step(
-            observ, action, old_mean, old_logstd, reward, advantage, length),
-        tf.range(self._config.update_epochs),
-        [0., 0., ''], parallel_iterations=1)
-    print_losses = tf.group(
-        tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
-        tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
+    value_loss, policy_loss, summary = tf.scan(lambda _1, _2: self._update_step(
+        observ, action, old_mean, old_logstd, reward, advantage, length),
+                                               tf.range(self._config.update_epochs), [0., 0., ''],
+                                               parallel_iterations=1)
+    print_losses = tf.group(tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
+                            tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
    with tf.control_dependencies([value_loss, policy_loss, print_losses]):
      return summary[self._config.update_epochs // 2]

-  def _update_step(
-      self, observ, action, old_mean, old_logstd, reward, advantage, length):
+  def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length):
    """Compute the current combined loss and perform a gradient update step.

    Args:
@@ -345,27 +325,20 @@ class PPOAlgorithm(object):
    """
    value_loss, value_summary = self._value_loss(observ, reward, length)
    network = self._network(observ, length)
-    policy_loss, policy_summary = self._policy_loss(
-        network.mean, network.logstd, old_mean, old_logstd, action,
-        advantage, length)
-    value_gradients, value_variables = (
-        zip(*self._optimizer.compute_gradients(value_loss)))
-    policy_gradients, policy_variables = (
-        zip(*self._optimizer.compute_gradients(policy_loss)))
+    policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean,
+                                                    old_logstd, action, advantage, length)
+    value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss)))
+    policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss)))
    all_gradients = value_gradients + policy_gradients
    all_variables = value_variables + policy_variables
-    optimize = self._optimizer.apply_gradients(
-        zip(all_gradients, all_variables))
+    optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables))
    summary = tf.summary.merge([
        value_summary, policy_summary,
-        tf.summary.scalar(
-            'value_gradient_norm', tf.global_norm(value_gradients)),
-        tf.summary.scalar(
-            'policy_gradient_norm', tf.global_norm(policy_gradients)),
-        utility.gradient_summaries(
-            zip(value_gradients, value_variables), dict(value=r'.*')),
-        utility.gradient_summaries(
-            zip(policy_gradients, policy_variables), dict(policy=r'.*'))])
+        tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)),
+        tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)),
+        utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')),
+        utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*'))
+    ])
    with tf.control_dependencies([optimize]):
      return [tf.identity(x) for x in (value_loss, policy_loss, summary)]

@@ -385,18 +358,17 @@ class PPOAlgorithm(object):
    """
    with tf.name_scope('value_loss'):
      value = self._network(observ, length).value
-      return_ = utility.discounted_return(
-          reward, length, self._config.discount)
+      return_ = utility.discounted_return(reward, length, self._config.discount)
      advantage = return_ - value
-      value_loss = 0.5 * self._mask(advantage ** 2, length)
+      value_loss = 0.5 * self._mask(advantage**2, length)
      summary = tf.summary.merge([
          tf.summary.histogram('value_loss', value_loss),
-          tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))])
+          tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))
+      ])
      value_loss = tf.reduce_mean(value_loss)
      return tf.check_numerics(value_loss, 'value_loss'), summary

-  def _policy_loss(
-      self, mean, logstd, old_mean, old_logstd, action, advantage, length):
+  def _policy_loss(self, mean, logstd, old_mean, old_logstd, action, advantage, length):
    """Compute the policy loss composed of multiple components.

    1. The policy gradient loss is importance sampled from the data-collecting
@@ -420,24 +392,20 @@ class PPOAlgorithm(object):
    """
    with tf.name_scope('policy_loss'):
      entropy = utility.diag_normal_entropy(mean, logstd)
-      kl = tf.reduce_mean(self._mask(utility.diag_normal_kl(
-          old_mean, old_logstd, mean, logstd), length), 1)
+      kl = tf.reduce_mean(
+          self._mask(utility.diag_normal_kl(old_mean, old_logstd, mean, logstd), length), 1)
      policy_gradient = tf.exp(
          utility.diag_normal_logpdf(mean, logstd, action) -
          utility.diag_normal_logpdf(old_mean, old_logstd, action))
-      surrogate_loss = -tf.reduce_mean(self._mask(
-          policy_gradient * tf.stop_gradient(advantage), length), 1)
+      surrogate_loss = -tf.reduce_mean(
+          self._mask(policy_gradient * tf.stop_gradient(advantage), length), 1)
      kl_penalty = self._penalty * kl
      cutoff_threshold = self._config.kl_target * self._config.kl_cutoff_factor
-      cutoff_count = tf.reduce_sum(
-          tf.cast(kl > cutoff_threshold, tf.int32))
-      with tf.control_dependencies([tf.cond(
-          cutoff_count > 0,
-          lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
-        kl_cutoff = (
-            self._config.kl_cutoff_coef *
-            tf.cast(kl > cutoff_threshold, tf.float32) *
-            (kl - cutoff_threshold) ** 2)
+      cutoff_count = tf.reduce_sum(tf.cast(kl > cutoff_threshold, tf.int32))
+      with tf.control_dependencies(
+          [tf.cond(cutoff_count > 0, lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
+        kl_cutoff = (self._config.kl_cutoff_coef * tf.cast(kl > cutoff_threshold, tf.float32) *
+                     (kl - cutoff_threshold)**2)
      policy_loss = surrogate_loss + kl_penalty + kl_cutoff
      summary = tf.summary.merge([
          tf.summary.histogram('entropy', entropy),
@@ -449,7 +417,8 @@ class PPOAlgorithm(object):
          tf.summary.histogram('policy_loss', policy_loss),
          tf.summary.scalar('avg_surr_loss', tf.reduce_mean(surrogate_loss)),
          tf.summary.scalar('avg_kl_penalty', tf.reduce_mean(kl_penalty)),
-          tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))])
+          tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))
+      ])
      policy_loss = tf.reduce_mean(policy_loss, 0)
      return tf.check_numerics(policy_loss, 'policy_loss'), summary

@@ -471,30 +440,30 @@ class PPOAlgorithm(object):
    """
    with tf.name_scope('adjust_penalty'):
      network = self._network(observ, length)
-      assert_change = tf.assert_equal(
-          tf.reduce_all(tf.equal(network.mean, old_mean)), False,
-          message='policy should change')
+      assert_change = tf.assert_equal(tf.reduce_all(tf.equal(network.mean, old_mean)),
+                                      False,
+                                      message='policy should change')
      print_penalty = tf.Print(0, [self._penalty], 'current penalty: ')
      with tf.control_dependencies([assert_change, print_penalty]):
-        kl_change = tf.reduce_mean(self._mask(utility.diag_normal_kl(
-            old_mean, old_logstd, network.mean, network.logstd), length))
+        kl_change = tf.reduce_mean(
+            self._mask(utility.diag_normal_kl(old_mean, old_logstd, network.mean, network.logstd),
+                       length))
        kl_change = tf.Print(kl_change, [kl_change], 'kl change: ')
        maybe_increase = tf.cond(
            kl_change > 1.3 * self._config.kl_target,
            # pylint: disable=g-long-lambda
-            lambda: tf.Print(self._penalty.assign(
-                self._penalty * 1.5), [0], 'increase penalty '),
+            lambda: tf.Print(self._penalty.assign(self._penalty * 1.5), [0], 'increase penalty '),
            float)
        maybe_decrease = tf.cond(
            kl_change < 0.7 * self._config.kl_target,
            # pylint: disable=g-long-lambda
-            lambda: tf.Print(self._penalty.assign(
-                self._penalty / 1.5), [0], 'decrease penalty '),
+            lambda: tf.Print(self._penalty.assign(self._penalty / 1.5), [0], 'decrease penalty '),
            float)
      with tf.control_dependencies([maybe_increase, maybe_decrease]):
        return tf.summary.merge([
            tf.summary.scalar('kl_change', kl_change),
-            tf.summary.scalar('penalty', self._penalty)])
+            tf.summary.scalar('penalty', self._penalty)
+        ])

  def _mask(self, tensor, length):
    """Set padding elements of a batch of sequences to zero.
--- a/examples/pybullet/gym/pybullet_envs/agents/ppo/memory.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/ppo/memory.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Memory that stores episodes."""

 from __future__ import absolute_import
@@ -43,10 +42,9 @@ class EpisodeMemory(object):
      self._scope = var_scope
      self._length = tf.Variable(tf.zeros(capacity, tf.int32), False)
      self._buffers = [
-          tf.Variable(tf.zeros(
-              [capacity, max_length] + elem.shape.as_list(),
-              elem.dtype), False)
-          for elem in template]
+          tf.Variable(tf.zeros([capacity, max_length] + elem.shape.as_list(), elem.dtype), False)
+          for elem in template
+      ]

  def length(self, rows=None):
    """Tensor holding the current length of episodes.
@@ -72,13 +70,11 @@ class EpisodeMemory(object):
    """
    rows = tf.range(self._capacity) if rows is None else rows
    assert rows.shape.ndims == 1
-    assert_capacity = tf.assert_less(
-        rows, self._capacity,
-        message='capacity exceeded')
+    assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
    with tf.control_dependencies([assert_capacity]):
-      assert_max_length = tf.assert_less(
-          tf.gather(self._length, rows), self._max_length,
-          message='max length exceeded')
+      assert_max_length = tf.assert_less(tf.gather(self._length, rows),
+                                         self._max_length,
+                                         message='max length exceeded')
    append_ops = []
    with tf.control_dependencies([assert_max_length]):
      for buffer_, elements in zip(self._buffers, transitions):
@@ -86,8 +82,7 @@ class EpisodeMemory(object):
        indices = tf.stack([rows, timestep], 1)
        append_ops.append(tf.scatter_nd_update(buffer_, indices, elements))
    with tf.control_dependencies(append_ops):
-      episode_mask = tf.reduce_sum(tf.one_hot(
-          rows, self._capacity, dtype=tf.int32), 0)
+      episode_mask = tf.reduce_sum(tf.one_hot(rows, self._capacity, dtype=tf.int32), 0)
      return self._length.assign_add(episode_mask)

  def replace(self, episodes, length, rows=None):
@@ -103,11 +98,11 @@ class EpisodeMemory(object):
    """
    rows = tf.range(self._capacity) if rows is None else rows
    assert rows.shape.ndims == 1
-    assert_capacity = tf.assert_less(
-        rows, self._capacity, message='capacity exceeded')
+    assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
    with tf.control_dependencies([assert_capacity]):
-      assert_max_length = tf.assert_less_equal(
-          length, self._max_length, message='max length exceeded')
+      assert_max_length = tf.assert_less_equal(length,
+                                               self._max_length,
+                                               message='max length exceeded')
    replace_ops = []
    with tf.control_dependencies([assert_max_length]):
      for buffer_, elements in zip(self._buffers, episodes):
--- a/examples/pybullet/gym/pybullet_envs/agents/ppo/normalize.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/ppo/normalize.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Normalize tensors based on streaming estimates of mean and variance."""

 from __future__ import absolute_import
@@ -24,8 +23,7 @@ import tensorflow as tf
 class StreamingNormalize(object):
  """Normalize tensors based on streaming estimates of mean and variance."""

-  def __init__(
-      self, template, center=True, scale=True, clip=10, name='normalize'):
+  def __init__(self, template, center=True, scale=True, clip=10, name='normalize'):
    """Normalize tensors based on streaming estimates of mean and variance.

    Centering the value, scaling it by the standard deviation, and clipping
@@ -69,8 +67,7 @@ class StreamingNormalize(object):
      if self._scale:
        # We cannot scale before seeing at least two samples.
        value /= tf.cond(
-            self._count > 1, lambda: self._std() + 1e-8,
-            lambda: tf.ones_like(self._var_sum))[None]
+            self._count > 1, lambda: self._std() + 1e-8, lambda: tf.ones_like(self._var_sum))[None]
      if self._clip:
        value = tf.clip_by_value(value, -self._clip, self._clip)
      # Remove batch dimension if necessary.
@@ -97,8 +94,7 @@ class StreamingNormalize(object):
        mean_delta = tf.reduce_sum(value - self._mean[None, ...], 0)
        new_mean = self._mean + mean_delta / step
        new_mean = tf.cond(self._count > 1, lambda: new_mean, lambda: value[0])
-        var_delta = (
-            value - self._mean[None, ...]) * (value - new_mean[None, ...])
+        var_delta = (value - self._mean[None, ...]) * (value - new_mean[None, ...])
        new_var_sum = self._var_sum + tf.reduce_sum(var_delta, 0)
      with tf.control_dependencies([new_mean, new_var_sum]):
        update = self._mean.assign(new_mean), self._var_sum.assign(new_var_sum)
@@ -116,10 +112,8 @@ class StreamingNormalize(object):
      Operation.
    """
    with tf.name_scope(self._name + '/reset'):
-      return tf.group(
-          self._count.assign(0),
-          self._mean.assign(tf.zeros_like(self._mean)),
-          self._var_sum.assign(tf.zeros_like(self._var_sum)))
+      return tf.group(self._count.assign(0), self._mean.assign(tf.zeros_like(self._mean)),
+                      self._var_sum.assign(tf.zeros_like(self._var_sum)))

  def summary(self):
    """Summary string of mean and standard deviation.
@@ -128,10 +122,8 @@ class StreamingNormalize(object):
      Summary tensor.
    """
    with tf.name_scope(self._name + '/summary'):
-      mean_summary = tf.cond(
-          self._count > 0, lambda: self._summary('mean', self._mean), str)
-      std_summary = tf.cond(
-          self._count > 1, lambda: self._summary('stddev', self._std()), str)
+      mean_summary = tf.cond(self._count > 0, lambda: self._summary('mean', self._mean), str)
+      std_summary = tf.cond(self._count > 1, lambda: self._summary('stddev', self._std()), str)
      return tf.summary.merge([mean_summary, std_summary])

  def _std(self):
@@ -143,10 +135,8 @@ class StreamingNormalize(object):
    Returns:
      Tensor of current variance.
    """
-    variance = tf.cond(
-        self._count > 1,
-        lambda: self._var_sum / tf.cast(self._count - 1, tf.float32),
-        lambda: tf.ones_like(self._var_sum) * float('nan'))
+    variance = tf.cond(self._count > 1, lambda: self._var_sum / tf.cast(
+        self._count - 1, tf.float32), lambda: tf.ones_like(self._var_sum) * float('nan'))
    # The epsilon corrects for small negative variance values caused by
    # the algorithm. It was empirically chosen to work with all environments
    # tested.
--- a/examples/pybullet/gym/pybullet_envs/agents/ppo/utility.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/ppo/utility.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Utilities for the PPO algorithm."""

 from __future__ import absolute_import
@@ -37,8 +36,7 @@ def reinit_nested_vars(variables, indices=None):
    Operation.
  """
  if isinstance(variables, (tuple, list)):
-    return tf.group(*[
-        reinit_nested_vars(variable, indices) for variable in variables])
+    return tf.group(*[reinit_nested_vars(variable, indices) for variable in variables])
  if indices is None:
    return variables.assign(tf.zeros_like(variables))
  else:
@@ -58,9 +56,8 @@ def assign_nested_vars(variables, tensors, indices=None):
    Operation.
  """
  if isinstance(variables, (tuple, list)):
-    return tf.group(*[
-        assign_nested_vars(variable, tensor)
-        for variable, tensor in zip(variables, tensors)])
+    return tf.group(
+        *[assign_nested_vars(variable, tensor) for variable, tensor in zip(variables, tensors)])
  if indices is None:
    return variables.assign(tensors)
  else:
@@ -71,10 +68,11 @@ def discounted_return(reward, length, discount):
  """Discounted Monte-Carlo returns."""
  timestep = tf.range(reward.shape[1].value)
  mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
-  return_ = tf.reverse(tf.transpose(tf.scan(
-      lambda agg, cur: cur + discount * agg,
-      tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
-      tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
+  return_ = tf.reverse(
+      tf.transpose(
+          tf.scan(lambda agg, cur: cur + discount * agg,
+                  tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
+                  tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
  return tf.check_numerics(tf.stop_gradient(return_), 'return')


@@ -85,9 +83,8 @@ def fixed_step_return(reward, value, length, discount, window):
  return_ = tf.zeros_like(reward)
  for _ in range(window):
    return_ += reward
-    reward = discount * tf.concat(
-        [reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
-  return_ += discount ** window * tf.concat(
+    reward = discount * tf.concat([reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
+  return_ += discount**window * tf.concat(
      [value[:, window:], tf.zeros_like(value[:, -window:]), 1])
  return tf.check_numerics(tf.stop_gradient(mask * return_), 'return')

@@ -99,10 +96,11 @@ def lambda_return(reward, value, length, discount, lambda_):
  sequence = mask * reward + discount * value * (1 - lambda_)
  discount = mask * discount * lambda_
  sequence = tf.stack([sequence, discount], 2)
-  return_ = tf.reverse(tf.transpose(tf.scan(
-      lambda agg, cur: cur[0] + cur[1] * agg,
-      tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]),
-      tf.zeros_like(value[:, -1]), 1, False), [1, 0]), [1])
+  return_ = tf.reverse(
+      tf.transpose(
+          tf.scan(lambda agg, cur: cur[0] + cur[1] * agg,
+                  tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]), tf.zeros_like(value[:, -1]),
+                  1, False), [1, 0]), [1])
  return tf.check_numerics(tf.stop_gradient(return_), 'return')


@@ -112,27 +110,26 @@ def lambda_advantage(reward, value, length, discount):
  mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
  next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
  delta = reward + discount * next_value - value
-  advantage = tf.reverse(tf.transpose(tf.scan(
-      lambda agg, cur: cur + discount * agg,
-      tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]),
-      tf.zeros_like(delta[:, -1]), 1, False), [1, 0]), [1])
+  advantage = tf.reverse(
+      tf.transpose(
+          tf.scan(lambda agg, cur: cur + discount * agg,
+                  tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]), tf.zeros_like(delta[:, -1]),
+                  1, False), [1, 0]), [1])
  return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')


 def diag_normal_kl(mean0, logstd0, mean1, logstd1):
  """Epirical KL divergence of two normals with diagonal covariance."""
  logstd0_2, logstd1_2 = 2 * logstd0, 2 * logstd1
-  return 0.5 * (
-      tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) +
-      tf.reduce_sum((mean1 - mean0) ** 2 / tf.exp(logstd1_2), -1) +
-      tf.reduce_sum(logstd1_2, -1) - tf.reduce_sum(logstd0_2, -1) -
-      mean0.shape[-1].value)
+  return 0.5 * (tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) + tf.reduce_sum(
+      (mean1 - mean0)**2 / tf.exp(logstd1_2), -1) + tf.reduce_sum(logstd1_2, -1) -
+                tf.reduce_sum(logstd0_2, -1) - mean0.shape[-1].value)


 def diag_normal_logpdf(mean, logstd, loc):
  """Log density of a normal with diagonal covariance."""
  constant = -0.5 * math.log(2 * math.pi) - logstd
-  value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2
+  value = -0.5 * ((loc - mean) / tf.exp(logstd))**2
  return tf.reduce_sum(constant + value, -1)


--- a/examples/pybullet/gym/pybullet_envs/agents/tools/init.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/init.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tools for reinforcement learning."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/attr_dict.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/attr_dict.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Wrap a dictionary to access keys as attributes."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/batch_env.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/batch_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Combine multiple environments to step them in batch."""

 from __future__ import absolute_import
@@ -83,13 +82,9 @@ class BatchEnv(object):
        message = 'Invalid action at index {}: {}'
        raise ValueError(message.format(index, action))
    if self._blocking:
-      transitions = [
-          env.step(action)
-          for env, action in zip(self._envs, actions)]
+      transitions = [env.step(action) for env, action in zip(self._envs, actions)]
    else:
-      transitions = [
-          env.step(action, blocking=False)
-          for env, action in zip(self._envs, actions)]
+      transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
      transitions = [transition() for transition in transitions]
    observs, rewards, dones, infos = zip(*transitions)
    observ = np.stack(observs)
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/count_weights.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/count_weights.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Count learnable parameters."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/in_graph_batch_env.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/in_graph_batch_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Batch of environments inside the TensorFlow graph."""

 from __future__ import absolute_import
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
    action_shape = self._parse_shape(self._batch_env.action_space)
    action_dtype = self._parse_dtype(self._batch_env.action_space)
    with tf.variable_scope('env_temporary'):
-      self._observ = tf.Variable(
-          tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
-          name='observ', trainable=False)
-      self._action = tf.Variable(
-          tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
-          name='action', trainable=False)
-      self._reward = tf.Variable(
-          tf.zeros((len(self._batch_env),), tf.float32),
-          name='reward', trainable=False)
-      self._done = tf.Variable(
-          tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
-          name='done', trainable=False)
+      self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
+                                 name='observ',
+                                 trainable=False)
+      self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
+                                 name='action',
+                                 trainable=False)
+      self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
+                                 name='reward',
+                                 trainable=False)
+      self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
+                               name='done',
+                               trainable=False)

  def __getattr__(self, name):
    """Forward unimplemented attributes to one of the original environments.
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._batch_env.observation_space)
-      observ, reward, done = tf.py_func(
-          lambda a: self._batch_env.step(a)[:3], [action],
-          [observ_dtype, tf.float32, tf.bool], name='step')
+      observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
+                                        [observ_dtype, tf.float32, tf.bool],
+                                        name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
-      return tf.group(
-          self._observ.assign(observ),
-          self._action.assign(action),
-          self._reward.assign(reward),
-          self._done.assign(done))
+      return tf.group(self._observ.assign(observ), self._action.assign(action),
+                      self._reward.assign(reward), self._done.assign(done))

  def reset(self, indices=None):
    """Reset the batch of environments.
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
-    observ = tf.py_func(
-        self._batch_env.reset, [indices], observ_dtype, name='reset')
+    observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
-        tf.scatter_update(self._done, indices, done)]):
+        tf.scatter_update(self._done, indices, done)
+    ]):
      return tf.identity(observ)

  @property
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/in_graph_env.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/in_graph_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Put an OpenAI Gym environment into the TensorFlow graph."""

 from __future__ import absolute_import
@@ -42,16 +41,15 @@ class InGraphEnv(object):
    action_shape = self._parse_shape(self._env.action_space)
    action_dtype = self._parse_dtype(self._env.action_space)
    with tf.name_scope('environment'):
-      self._observ = tf.Variable(
-          tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
-      self._action = tf.Variable(
-          tf.zeros(action_shape, action_dtype), name='action', trainable=False)
-      self._reward = tf.Variable(
-          0.0, dtype=tf.float32, name='reward', trainable=False)
-      self._done = tf.Variable(
-          True, dtype=tf.bool, name='done', trainable=False)
-      self._step = tf.Variable(
-          0, dtype=tf.int32, name='step', trainable=False)
+      self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
+                                 name='observ',
+                                 trainable=False)
+      self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
+                                 name='action',
+                                 trainable=False)
+      self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
+      self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
+      self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)

  def __getattr__(self, name):
    """Forward unimplemented attributes to the original environment.
@@ -79,17 +77,14 @@ class InGraphEnv(object):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._env.observation_space)
-      observ, reward, done = tf.py_func(
-          lambda a: self._env.step(a)[:3], [action],
-          [observ_dtype, tf.float32, tf.bool], name='step')
+      observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
+                                        [observ_dtype, tf.float32, tf.bool],
+                                        name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
-      return tf.group(
-          self._observ.assign(observ),
-          self._action.assign(action),
-          self._reward.assign(reward),
-          self._done.assign(done),
-          self._step.assign_add(1))
+      return tf.group(self._observ.assign(observ), self._action.assign(action),
+                      self._reward.assign(reward), self._done.assign(done),
+                      self._step.assign_add(1))

  def reset(self):
    """Reset the environment.
@@ -100,10 +95,10 @@ class InGraphEnv(object):
    observ_dtype = self._parse_dtype(self._env.observation_space)
    observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
-    with tf.control_dependencies([
-        self._observ.assign(observ),
-        self._reward.assign(0),
-        self._done.assign(False)]):
+    with tf.control_dependencies(
+        [self._observ.assign(observ),
+         self._reward.assign(0),
+         self._done.assign(False)]):
      return tf.identity(observ)

  @property
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/loop.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/loop.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Execute operations in a loop and coordinate logging and checkpoints."""

 from __future__ import absolute_import
@@ -25,10 +24,8 @@ import tensorflow as tf

 from . import streaming_mean

-
 _Phase = collections.namedtuple(
-    'Phase',
-    'name, writer, op, batch, steps, feed, report_every, log_every,'
+    'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
    'checkpoint_every')


@@ -56,16 +53,22 @@ class Loop(object):
      reset: Tensor indicating to the model to start a new computation.
    """
    self._logdir = logdir
-    self._step = (
-        tf.Variable(0, False, name='global_step') if step is None else step)
+    self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
    self._log = tf.placeholder(tf.bool) if log is None else log
    self._report = tf.placeholder(tf.bool) if report is None else report
    self._reset = tf.placeholder(tf.bool) if reset is None else reset
    self._phases = []

-  def add_phase(
-      self, name, done, score, summary, steps,
-      report_every=None, log_every=None, checkpoint_every=None, feed=None):
+  def add_phase(self,
+                name,
+                done,
+                score,
+                summary,
+                steps,
+                report_every=None,
+                log_every=None,
+                checkpoint_every=None,
+                feed=None):
    """Add a phase to the loop protocol.

    If the model breaks long computation into multiple steps, the done tensor
@@ -97,13 +100,12 @@ class Loop(object):
    if done.shape.ndims is None or score.shape.ndims is None:
      raise ValueError("Rank of 'done' and 'score' tensors must be known.")
    writer = self._logdir and tf.summary.FileWriter(
-        os.path.join(self._logdir, name), tf.get_default_graph(),
-        flush_secs=60)
+        os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
    op = self._define_step(done, score, summary)
    batch = 1 if score.shape.ndims == 0 else score.shape[0].value
-    self._phases.append(_Phase(
-        name, writer, op, batch, int(steps), feed, report_every,
-        log_every, checkpoint_every))
+    self._phases.append(
+        _Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
+               checkpoint_every))

  def run(self, sess, saver, max_step=None):
    """Run the loop schedule for a specified number of steps.
@@ -133,13 +135,11 @@ class Loop(object):
        tf.logging.info(message.format(phase.name, phase_step, global_step))
      # Populate book keeping tensors.
      phase.feed[self._reset] = (steps_in < steps_made)
-      phase.feed[self._log] = (
-          phase.writer and
-          self._is_every_steps(phase_step, phase.batch, phase.log_every))
-      phase.feed[self._report] = (
-          self._is_every_steps(phase_step, phase.batch, phase.report_every))
-      summary, mean_score, global_step, steps_made = sess.run(
-          phase.op, phase.feed)
+      phase.feed[self._log] = (phase.writer and
+                               self._is_every_steps(phase_step, phase.batch, phase.log_every))
+      phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
+                                                       phase.report_every))
+      summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
      if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
        self._store_checkpoint(sess, saver, global_step)
      if self._is_every_steps(phase_step, phase.batch, phase.report_every):
@@ -207,8 +207,7 @@ class Loop(object):
    score_mean = streaming_mean.StreamingMean((), tf.float32)
    with tf.control_dependencies([done, score, summary]):
      done_score = tf.gather(score, tf.where(done)[:, 0])
-      submit_score = tf.cond(
-          tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
+      submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
    with tf.control_dependencies([submit_score]):
      mean_score = tf.cond(self._report, score_mean.clear, float)
      steps_made = tf.shape(score)[0]
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/mock_algorithm.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/mock_algorithm.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Mock algorithm for testing reinforcement learning code."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/mock_environment.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/mock_environment.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Mock environment for testing reinforcement learning code."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/simulate.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/simulate.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """In-graph simulation step of a vectorized algorithm with environments."""

 from __future__ import absolute_import
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
    reset_ops = [
        batch_env.reset(agent_indices),
        tf.scatter_update(score, agent_indices, zero_scores),
-        tf.scatter_update(length, agent_indices, zero_durations)]
+        tf.scatter_update(length, agent_indices, zero_durations)
+    ]
    with tf.control_dependencies(reset_ops):
      return algo.begin_episode(agent_indices)

@@ -78,9 +78,8 @@ def simulate(batch_env, algo, log=True, reset=False):
      inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
    with tf.control_dependencies([add_score, inc_length]):
      agent_indices = tf.range(len(batch_env))
-      experience_summary = algo.experience(
-          agent_indices, prevob, batch_env.action, batch_env.reward,
-          batch_env.done, batch_env.observ)
+      experience_summary = algo.experience(agent_indices, prevob, batch_env.action,
+                                           batch_env.reward, batch_env.done, batch_env.observ)
    return tf.summary.merge([step_summary, experience_summary])

  def _define_end_episode(agent_indices):
@@ -96,8 +95,7 @@ def simulate(batch_env, algo, log=True, reset=False):
    """
    assert agent_indices.shape.ndims == 1
    submit_score = mean_score.submit(tf.gather(score, agent_indices))
-    submit_length = mean_length.submit(
-        tf.cast(tf.gather(length, agent_indices), tf.float32))
+    submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
    with tf.control_dependencies([submit_score, submit_length]):
      return algo.end_episode(agent_indices)

@@ -107,41 +105,34 @@ def simulate(batch_env, algo, log=True, reset=False):
    Returns:
      Summary string.
    """
-    score_summary = tf.cond(
-        tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
-        lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
-    length_summary = tf.cond(
-        tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
-        lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
+    score_summary = tf.cond(tf.logical_and(log, tf.cast(
+        mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
+                            str)
+    length_summary = tf.cond(tf.logical_and(
+        log, tf.cast(mean_length.count,
+                     tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
    return tf.summary.merge([score_summary, length_summary])

  with tf.name_scope('simulate'):
    log = tf.convert_to_tensor(log)
    reset = tf.convert_to_tensor(reset)
    with tf.variable_scope('simulate_temporary'):
-      score = tf.Variable(
-          tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
-      length = tf.Variable(
-          tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
+      score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
+      length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
    mean_score = streaming_mean.StreamingMean((), tf.float32)
    mean_length = streaming_mean.StreamingMean((), tf.float32)
-    agent_indices = tf.cond(
-        reset,
-        lambda: tf.range(len(batch_env)),
-        lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
-    begin_episode = tf.cond(
-        tf.cast(tf.shape(agent_indices)[0], tf.bool),
-        lambda: _define_begin_episode(agent_indices), str)
+    agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
+        tf.where(batch_env.done)[:, 0], tf.int32))
+    begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
+                                    tf.bool), lambda: _define_begin_episode(agent_indices), str)
    with tf.control_dependencies([begin_episode]):
      step = _define_step()
    with tf.control_dependencies([step]):
      agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
-      end_episode = tf.cond(
-          tf.cast(tf.shape(agent_indices)[0], tf.bool),
-          lambda: _define_end_episode(agent_indices), str)
+      end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
+                                    tf.bool), lambda: _define_end_episode(agent_indices), str)
    with tf.control_dependencies([end_episode]):
-      summary = tf.summary.merge([
-          _define_summaries(), begin_episode, step, end_episode])
+      summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
    with tf.control_dependencies([summary]):
      done, score = tf.identity(batch_env.done), tf.identity(score)
    return done, score, summary
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/streaming_mean.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/streaming_mean.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Compute a streaming estimation of the mean of submitted tensors."""

 from __future__ import absolute_import
@@ -53,9 +52,8 @@ class StreamingMean(object):
    # Add a batch dimension if necessary.
    if value.shape.ndims == self._sum.shape.ndims:
      value = value[None, ...]
-    return tf.group(
-        self._sum.assign_add(tf.reduce_sum(value, 0)),
-        self._count.assign_add(tf.shape(value)[0]))
+    return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
+                    self._count.assign_add(tf.shape(value)[0]))

  def clear(self):
    """Return the mean estimate and reset the streaming statistics."""
--- a/examples/pybullet/gym/pybullet_envs/agents/tools/wrappers.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/tools/wrappers.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Wrappers for OpenAI Gym environments."""

 from __future__ import absolute_import
@@ -149,8 +148,7 @@ class FrameHistory(object):
    return self._select_frames()

  def _select_frames(self):
-    indices = [
-        (self._step - index) % self._capacity for index in self._past_indices]
+    indices = [(self._step - index) % self._capacity for index in self._past_indices]
    observ = self._buffer[indices]
    if self._flatten:
      observ = np.reshape(observ, (-1,) + observ.shape[2:])
@@ -191,14 +189,14 @@ class RangeNormalize(object):

  def __init__(self, env, observ=None, action=None):
    self._env = env
-    self._should_normalize_observ = (
-        observ is not False and self._is_finite(self._env.observation_space))
+    self._should_normalize_observ = (observ is not False and
+                                     self._is_finite(self._env.observation_space))
    if observ is True and not self._should_normalize_observ:
      raise ValueError('Cannot normalize infinite observation range.')
    if observ is None and not self._should_normalize_observ:
      tf.logging.info('Not normalizing infinite observation range.')
-    self._should_normalize_action = (
-        action is not False and self._is_finite(self._env.action_space))
+    self._should_normalize_action = (action is not False and
+                                     self._is_finite(self._env.action_space))
    if action is True and not self._should_normalize_action:
      raise ValueError('Cannot normalize infinite action range.')
    if action is None and not self._should_normalize_action:
@@ -323,8 +321,7 @@ class ExternalProcess(object):
      action_space: The cached action space of the environment.
    """
    self._conn, conn = multiprocessing.Pipe()
-    self._process = multiprocessing.Process(
-        target=self._worker, args=(constructor, conn))
+    self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
    atexit.register(self.close)
    self._process.start()
    self._observ_space = None
--- a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 r"""Script to train a batch reinforcement learning algorithm.

 Command line:
@@ -67,21 +66,25 @@ def _define_loop(graph, logdir, train_steps, eval_steps):
  Returns:
    Loop object.
  """
-  loop = tools.Loop(
-      logdir, graph.step, graph.should_log, graph.do_report,
-      graph.force_reset)
-  loop.add_phase(
-      'train', graph.done, graph.score, graph.summary, train_steps,
-      report_every=train_steps,
-      log_every=train_steps // 2,
-      checkpoint_every=None,
-      feed={graph.is_training: True})
-  loop.add_phase(
-      'eval', graph.done, graph.score, graph.summary, eval_steps,
-      report_every=eval_steps,
-      log_every=eval_steps // 2,
-      checkpoint_every=10 * eval_steps,
-      feed={graph.is_training: False})
+  loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report, graph.force_reset)
+  loop.add_phase('train',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 train_steps,
+                 report_every=train_steps,
+                 log_every=train_steps // 2,
+                 checkpoint_every=None,
+                 feed={graph.is_training: True})
+  loop.add_phase('eval',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 eval_steps,
+                 report_every=eval_steps,
+                 log_every=eval_steps // 2,
+                 checkpoint_every=10 * eval_steps,
+                 feed={graph.is_training: False})
  return loop


@@ -102,18 +105,13 @@ def train(config, env_processes):
  if config.update_every % config.num_agents:
    tf.logging.warn('Number of agents should divide episodes per update.')
  with tf.device('/cpu:0'):
-    batch_env = utility.define_batch_env(
-        lambda: _create_environment(config),
-        config.num_agents, env_processes)
-    graph = utility.define_simulation_graph(
-        batch_env, config.algorithm, config)
-    loop = _define_loop(
-        graph, config.logdir,
-        config.update_every * config.max_length,
-        config.eval_episodes * config.max_length)
-    total_steps = int(
-        config.steps / config.update_every *
-        (config.update_every + config.eval_episodes))
+    batch_env = utility.define_batch_env(lambda: _create_environment(config), config.num_agents,
+                                         env_processes)
+    graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
+    loop = _define_loop(graph, config.logdir, config.update_every * config.max_length,
+                        config.eval_episodes * config.max_length)
+    total_steps = int(config.steps / config.update_every *
+                      (config.update_every + config.eval_episodes))
  # Exclude episode related variables since the Python state of environments is
  # not checkpointed and thus new episodes start after resuming.
  saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
@@ -131,8 +129,8 @@ def main(_):
  utility.set_up_logging()
  if not FLAGS.config:
    raise KeyError('You must specify a configuration.')
-  logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
-      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
+  logdir = FLAGS.logdir and os.path.expanduser(
+      os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
  try:
    config = utility.load_config(logdir)
  except IOError:
@@ -144,16 +142,11 @@ def main(_):

 if __name__ == '__main__':
  FLAGS = tf.app.flags.FLAGS
-  tf.app.flags.DEFINE_string(
-      'logdir', None,
-      'Base directory to store logs.')
-  tf.app.flags.DEFINE_string(
-      'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
-      'Sub directory to store logs.')
-  tf.app.flags.DEFINE_string(
-      'config', None,
-      'Configuration to execute.')
-  tf.app.flags.DEFINE_boolean(
-      'env_processes', True,
-      'Step environments in separate processes to circumvent the GIL.')
+  tf.app.flags.DEFINE_string('logdir', None, 'Base directory to store logs.')
+  tf.app.flags.DEFINE_string('timestamp',
+                             datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
+                             'Sub directory to store logs.')
+  tf.app.flags.DEFINE_string('config', None, 'Configuration to execute.')
+  tf.app.flags.DEFINE_boolean('env_processes', True,
+                              'Step environments in separate processes to circumvent the GIL.')
  tf.app.run()
--- a/examples/pybullet/gym/pybullet_envs/agents/utility.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/utility.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Utilities for using reinforcement learning algorithms."""

 from __future__ import absolute_import
@@ -46,8 +45,7 @@ def define_simulation_graph(batch_env, algo_cls, config):
  do_report = tf.placeholder(tf.bool, name='do_report')
  force_reset = tf.placeholder(tf.bool, name='force_reset')
  algo = algo_cls(batch_env, step, is_training, should_log, config)
-  done, score, summary = tools.simulate(
-      batch_env, algo, should_log, force_reset)
+  done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset)
  message = 'Graph contains {} trainable variables.'
  tf.logging.info(message.format(tools.count_weights()))
  # pylint: enable=unused-variable
@@ -67,9 +65,7 @@ def define_batch_env(constructor, num_agents, env_processes):
  """
  with tf.variable_scope('environments'):
    if env_processes:
-      envs = [
-          tools.wrappers.ExternalProcess(constructor)
-          for _ in range(num_agents)]
+      envs = [tools.wrappers.ExternalProcess(constructor) for _ in range(num_agents)]
    else:
      envs = [constructor() for _ in range(num_agents)]
    batch_env = tools.BatchEnv(envs, blocking=not env_processes)
@@ -111,9 +107,7 @@ def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
    ValueError: If resume expected but no log directory specified.
    RuntimeError: If no resume expected but a checkpoint was found.
  """
-  sess.run(tf.group(
-      tf.local_variables_initializer(),
-      tf.global_variables_initializer()))
+  sess.run(tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()))
  if resume and not (logdir or checkpoint):
    raise ValueError('Need to specify logdir to resume a checkpoint.')
  if logdir:
@@ -152,9 +146,8 @@ def save_config(config, logdir=None):
    with tf.gfile.GFile(config_path, 'w') as file_:
      yaml.dump(config, file_, default_flow_style=False)
  else:
-    message = (
-        'Start a new run without storing summaries and checkpoints since no '
-        'logging directory was specified.')
+    message = ('Start a new run without storing summaries and checkpoints since no '
+               'logging directory was specified.')
    tf.logging.info(message)
  return config

@@ -173,9 +166,8 @@ def load_config(logdir):
  """
  config_path = logdir and os.path.join(logdir, 'config.yaml')
  if not config_path or not tf.gfile.Exists(config_path):
-    message = (
-        'Cannot resume an existing run since the logging directory does not '
-        'contain a configuration file.')
+    message = ('Cannot resume an existing run since the logging directory does not '
+               'contain a configuration file.')
    raise IOError(message)
  with tf.gfile.FastGFile(config_path, 'r') as file_:
    config = yaml.load(file_, Loader=yaml.Loader)
--- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 r"""Script to render videos of the Proximal Policy Gradient algorithm.

 Command line:
@@ -53,8 +52,7 @@ def _create_environment(config, outdir):
    setattr(env, 'spec', getattr(env, 'spec', None))
  if config.max_length:
    env = tools.wrappers.LimitDuration(env, config.max_length)
-  env = gym.wrappers.Monitor(
-      env, outdir, lambda unused_episode_number: True)
+  env = gym.wrappers.Monitor(env, outdir, lambda unused_episode_number: True)
  env = tools.wrappers.RangeNormalize(env)
  env = tools.wrappers.ClipAction(env)
  env = tools.wrappers.ConvertTo32Bit(env)
@@ -71,20 +69,20 @@ def _define_loop(graph, eval_steps):
  Returns:
    Loop object.
  """
-  loop = tools.Loop(
-      None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
-  loop.add_phase(
-      'eval', graph.done, graph.score, graph.summary, eval_steps,
-      report_every=eval_steps,
-      log_every=None,
-      checkpoint_every=None,
-      feed={graph.is_training: False})
+  loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
+  loop.add_phase('eval',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 eval_steps,
+                 report_every=eval_steps,
+                 log_every=None,
+                 checkpoint_every=None,
+                 feed={graph.is_training: False})
  return loop


-def visualize(
-    logdir, outdir, num_agents, num_episodes, checkpoint=None,
-    env_processes=True):
+def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
  """Recover checkpoint and render videos from it.

  Args:
@@ -97,20 +95,16 @@ def visualize(
  """
  config = utility.load_config(logdir)
  with tf.device('/cpu:0'):
-    batch_env = utility.define_batch_env(
-        lambda: _create_environment(config, outdir),
-        num_agents, env_processes)
-    graph = utility.define_simulation_graph(
-        batch_env, config.algorithm, config)
+    batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
+                                         env_processes)
+    graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
    total_steps = num_episodes * config.max_length
    loop = _define_loop(graph, total_steps)
-  saver = utility.define_saver(
-      exclude=(r'.*_temporary/.*', r'global_step'))
+  saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
-    utility.initialize_variables(
-        sess, saver, config.logdir, checkpoint, resume=True)
+    utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
    for unused_score in loop.run(sess, saver, total_steps):
      pass
  batch_env.close()
@@ -123,29 +117,18 @@ def main(_):
    raise KeyError('You must specify logging and outdirs directories.')
  FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
  FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
-  visualize(
-      FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
-      FLAGS.checkpoint, FLAGS.env_processes)
+  visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes, FLAGS.checkpoint,
+            FLAGS.env_processes)


 if __name__ == '__main__':
  FLAGS = tf.app.flags.FLAGS
-  tf.app.flags.DEFINE_string(
-      'logdir', None,
-      'Directory to the checkpoint of a training run.')
-  tf.app.flags.DEFINE_string(
-      'outdir', None,
-      'Local directory for storing the monitoring outdir.')
-  tf.app.flags.DEFINE_string(
-      'checkpoint', None,
-      'Checkpoint name to load; defaults to most recent.')
-  tf.app.flags.DEFINE_integer(
-      'num_agents', 1,
-      'How many environments to step in parallel.')
-  tf.app.flags.DEFINE_integer(
-      'num_episodes', 5,
-      'Minimum number of episodes to render.')
-  tf.app.flags.DEFINE_boolean(
-      'env_processes', True,
-      'Step environments in separate processes to circumvent the GIL.')
+  tf.app.flags.DEFINE_string('logdir', None, 'Directory to the checkpoint of a training run.')
+  tf.app.flags.DEFINE_string('outdir', None, 'Local directory for storing the monitoring outdir.')
+  tf.app.flags.DEFINE_string('checkpoint', None,
+                             'Checkpoint name to load; defaults to most recent.')
+  tf.app.flags.DEFINE_integer('num_agents', 1, 'How many environments to step in parallel.')
+  tf.app.flags.DEFINE_integer('num_episodes', 5, 'Minimum number of episodes to render.')
+  tf.app.flags.DEFINE_boolean('env_processes', True,
+                              'Step environments in separate processes to circumvent the GIL.')
  tf.app.run()
--- a/examples/pybullet/gym/pybullet_envs/baselines/init.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/init.py
@@ -7,4 +7,3 @@ from . import train_kuka_grasping
 from . import train_pybullet_cartpole
 from . import train_pybullet_racecar
 from . import train_pybullet_zed_racecar
-
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
@@ -6,17 +6,17 @@ import numpy as np

 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
 from gym import spaces


-
 class ContinuousDownwardBiasPolicy(object):
  """Policy which takes continuous actions, and is biased to move down.
  """
+
  def __init__(self, height_hack_prob=0.9):
    """Initializes the DownwardBiasPolicy.

@@ -36,25 +36,25 @@ class ContinuousDownwardBiasPolicy(object):


 def main():
-    
-    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
-    policy = ContinuousDownwardBiasPolicy()

-    while True:
-        obs, done = env.reset(), False
-        print("===================================")        
-        print("obs")
-        print(obs)
-        episode_rew = 0
-        while not done:
-            env.render(mode='human')
-            act = policy.sample_action(obs, .1)
-            print("Action")
-            print(act)
-            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
-            episode_rew += rew
-        print("Episode reward", episode_rew)
+  env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
+  policy = ContinuousDownwardBiasPolicy()
+
+  while True:
+    obs, done = env.reset(), False
+    print("===================================")
+    print("obs")
+    print(obs)
+    episode_rew = 0
+    while not done:
+      env.render(mode='human')
+      act = policy.sample_action(obs, .1)
+      print("Action")
+      print(act)
+      obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
+      episode_rew += rew
+    print("Episode reward", episode_rew)


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq


 def main():
-    
-    env = KukaGymEnv(renders=True, isDiscrete=True)
-    act = deepq.load("kuka_model.pkl")
-    print(act)
-    while True:
-        obs, done = env.reset(), False
-        print("===================================")        
-        print("obs")
-        print(obs)
-        episode_rew = 0
-        while not done:
-            env.render()
-            obs, rew, done, _ = env.step(act(obs[None])[0])
-            episode_rew += rew
-        print("Episode reward", episode_rew)
+
+  env = KukaGymEnv(renders=True, isDiscrete=True)
+  act = deepq.load("kuka_model.pkl")
+  print(act)
+  while True:
+    obs, done = env.reset(), False
+    print("===================================")
+    print("obs")
+    print(obs)
+    episode_rew = 0
+    while not done:
+      env.render()
+      obs, rew, done, _ = env.step(act(obs[None])[0])
+      episode_rew += rew
+    print("Episode reward", episode_rew)


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 import time
@@ -10,28 +10,29 @@ import time
 from baselines import deepq
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv

-def main():
-    env = gym.make('CartPoleBulletEnv-v1')
-    act = deepq.load("cartpole_model.pkl")

-    while True:
-        obs, done = env.reset(), False
-        print("obs")
-        print(obs)
-        print("type(obs)")
-        print(type(obs))
-        episode_rew = 0
-        while not done:
-            env.render()
-           
-            o = obs[None]
-            aa = act(o)
-            a = aa[0]
-            obs, rew, done, _ = env.step(a)
-            episode_rew += rew
-            time.sleep(1./240.)
-        print("Episode reward", episode_rew)
+def main():
+  env = gym.make('CartPoleBulletEnv-v1')
+  act = deepq.load("cartpole_model.pkl")
+
+  while True:
+    obs, done = env.reset(), False
+    print("obs")
+    print(obs)
+    print("type(obs)")
+    print(type(obs))
+    episode_rew = 0
+    while not done:
+      env.render()
+
+      o = obs[None]
+      aa = act(o)
+      a = aa[0]
+      obs, rew, done, _ = env.step(a)
+      episode_rew += rew
+      time.sleep(1. / 240.)
+    print("Episode reward", episode_rew)


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_racecar.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq


 def main():
-    
-    env = RacecarGymEnv(renders=True,isDiscrete=True)
-    act = deepq.load("racecar_model.pkl")
-    print(act)
-    while True:
-        obs, done = env.reset(), False
-        print("===================================")        
-        print("obs")
-        print(obs)
-        episode_rew = 0
-        while not done:
-            env.render()
-            obs, rew, done, _ = env.step(act(obs[None])[0])
-            episode_rew += rew
-        print("Episode reward", episode_rew)
+
+  env = RacecarGymEnv(renders=True, isDiscrete=True)
+  act = deepq.load("racecar_model.pkl")
+  print(act)
+  while True:
+    obs, done = env.reset(), False
+    print("===================================")
+    print("obs")
+    print(obs)
+    episode_rew = 0
+    while not done:
+      env.render()
+      obs, rew, done, _ = env.step(act(obs[None])[0])
+      episode_rew += rew
+    print("Episode reward", episode_rew)


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_zed_racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_zed_racecar.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq


 def main():
-    
-    env = RacecarZEDGymEnv(renders=True)
-    act = deepq.load("racecar_zed_model.pkl")
-    print(act)
-    while True:
-        obs, done = env.reset(), False
-        print("===================================")        
-        print("obs")
-        print(obs)
-        episode_rew = 0
-        while not done:
-            env.render()
-            obs, rew, done, _ = env.step(act(obs[None])[0])
-            episode_rew += rew
-        print("Episode reward", episode_rew)
+
+  env = RacecarZEDGymEnv(renders=True)
+  act = deepq.load("racecar_zed_model.pkl")
+  print(act)
+  while True:
+    obs, done = env.reset(), False
+    print("===================================")
+    print("obs")
+    print(obs)
+    episode_rew = 0
+    while not done:
+      env.render()
+      obs, rew, done, _ = env.step(act(obs[None])[0])
+      episode_rew += rew
+    print("Episode reward", episode_rew)


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv
@@ -12,39 +12,34 @@ from baselines import deepq
 import datetime


-
 def callback(lcl, glb):
-    # stop training if reward exceeds 199
-    total = sum(lcl['episode_rewards'][-101:-1]) / 100
-    totalt = lcl['t']
-    #print("totalt")
-    #print(totalt)
-    is_solved = totalt > 2000 and total >= 10
-    return is_solved
+  # stop training if reward exceeds 199
+  total = sum(lcl['episode_rewards'][-101:-1]) / 100
+  totalt = lcl['t']
+  #print("totalt")
+  #print(totalt)
+  is_solved = totalt > 2000 and total >= 10
+  return is_solved


 def main():
-  	
-    env = KukaCamGymEnv(renders=False, isDiscrete=True)
-    model = deepq.models.cnn_to_mlp(
-        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
-        hiddens=[256],
-        dueling=False
-    )
-    act = deepq.learn(
-        env,
-        q_func=model,
-        lr=1e-3,
-        max_timesteps=10000000,
-        buffer_size=50000,
-        exploration_fraction=0.1,
-        exploration_final_eps=0.02,
-        print_freq=10,
-        callback=callback
-    )
-    print("Saving model to kuka_cam_model.pkl")
-    act.save("kuka_cam_model.pkl")
+
+  env = KukaCamGymEnv(renders=False, isDiscrete=True)
+  model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
+                                  hiddens=[256],
+                                  dueling=False)
+  act = deepq.learn(env,
+                    q_func=model,
+                    lr=1e-3,
+                    max_timesteps=10000000,
+                    buffer_size=50000,
+                    exploration_fraction=0.1,
+                    exploration_final_eps=0.02,
+                    print_freq=10,
+                    callback=callback)
+  print("Saving model to kuka_cam_model.pkl")
+  act.save("kuka_cam_model.pkl")


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
@@ -12,35 +12,32 @@ from baselines import deepq
 import datetime


-
 def callback(lcl, glb):
-    # stop training if reward exceeds 199
-    total = sum(lcl['episode_rewards'][-101:-1]) / 100
-    totalt = lcl['t']
-    #print("totalt")
-    #print(totalt)
-    is_solved = totalt > 2000 and total >= 10
-    return is_solved
+  # stop training if reward exceeds 199
+  total = sum(lcl['episode_rewards'][-101:-1]) / 100
+  totalt = lcl['t']
+  #print("totalt")
+  #print(totalt)
+  is_solved = totalt > 2000 and total >= 10
+  return is_solved


 def main():
-  	
-    env = KukaGymEnv(renders=False, isDiscrete=True)
-    model = deepq.models.mlp([64])
-    act = deepq.learn(
-        env,
-        q_func=model,
-        lr=1e-3,
-        max_timesteps=10000000,
-        buffer_size=50000,
-        exploration_fraction=0.1,
-        exploration_final_eps=0.02,
-        print_freq=10,
-        callback=callback
-    )
-    print("Saving model to kuka_model.pkl")
-    act.save("kuka_model.pkl")
+
+  env = KukaGymEnv(renders=False, isDiscrete=True)
+  model = deepq.models.mlp([64])
+  act = deepq.learn(env,
+                    q_func=model,
+                    lr=1e-3,
+                    max_timesteps=10000000,
+                    buffer_size=50000,
+                    exploration_fraction=0.1,
+                    exploration_final_eps=0.02,
+                    print_freq=10,
+                    callback=callback)
+  print("Saving model to kuka_model.pkl")
+  act.save("kuka_model.pkl")


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_cartpole.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_cartpole.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
@@ -11,29 +11,27 @@ from baselines import deepq


 def callback(lcl, glb):
-    # stop training if reward exceeds 199
-    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
-    return is_solved
+  # stop training if reward exceeds 199
+  is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
+  return is_solved


 def main():
-	
-    env = CartPoleBulletEnv(renders=False)
-    model = deepq.models.mlp([64])
-    act = deepq.learn(
-        env,
-        q_func=model,
-        lr=1e-3,
-        max_timesteps=100000,
-        buffer_size=50000,
-        exploration_fraction=0.1,
-        exploration_final_eps=0.02,
-        print_freq=10,
-        callback=callback
-    )
-    print("Saving model to cartpole_model.pkl")
-    act.save("cartpole_model.pkl")
+
+  env = CartPoleBulletEnv(renders=False)
+  model = deepq.models.mlp([64])
+  act = deepq.learn(env,
+                    q_func=model,
+                    lr=1e-3,
+                    max_timesteps=100000,
+                    buffer_size=50000,
+                    exploration_fraction=0.1,
+                    exploration_final_eps=0.02,
+                    print_freq=10,
+                    callback=callback)
+  print("Saving model to cartpole_model.pkl")
+  act.save("cartpole_model.pkl")


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_racecar.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
@@ -12,33 +12,30 @@ from baselines import deepq
 import datetime


-
 def callback(lcl, glb):
-    # stop training if reward exceeds 199
-    total = sum(lcl['episode_rewards'][-101:-1]) / 100
-    totalt = lcl['t']
-    is_solved = totalt > 2000 and total >= -50
-    return is_solved
+  # stop training if reward exceeds 199
+  total = sum(lcl['episode_rewards'][-101:-1]) / 100
+  totalt = lcl['t']
+  is_solved = totalt > 2000 and total >= -50
+  return is_solved


 def main():
-  
-    env = RacecarGymEnv(renders=False,isDiscrete=True)
-    model = deepq.models.mlp([64])
-    act = deepq.learn(
-        env,
-        q_func=model,
-        lr=1e-3,
-        max_timesteps=10000,
-        buffer_size=50000,
-        exploration_fraction=0.1,
-        exploration_final_eps=0.02,
-        print_freq=10,
-        callback=callback
-    )
-    print("Saving model to racecar_model.pkl")
-    act.save("racecar_model.pkl")
+
+  env = RacecarGymEnv(renders=False, isDiscrete=True)
+  model = deepq.models.mlp([64])
+  act = deepq.learn(env,
+                    q_func=model,
+                    lr=1e-3,
+                    max_timesteps=10000,
+                    buffer_size=50000,
+                    exploration_fraction=0.1,
+                    exploration_final_eps=0.02,
+                    print_freq=10,
+                    callback=callback)
+  print("Saving model to racecar_model.pkl")
+  act.save("racecar_model.pkl")


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_zed_racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_pybullet_zed_racecar.py
@@ -2,7 +2,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
@@ -12,36 +12,32 @@ from baselines import deepq
 import datetime


-
 def callback(lcl, glb):
-    # stop training if reward exceeds 199
-    total = sum(lcl['episode_rewards'][-101:-1]) / 100
-    totalt = lcl['t']
-    is_solved = totalt > 2000 and total >= -50
-    return is_solved
+  # stop training if reward exceeds 199
+  total = sum(lcl['episode_rewards'][-101:-1]) / 100
+  totalt = lcl['t']
+  is_solved = totalt > 2000 and total >= -50
+  return is_solved
+

 def main():
-  
-    env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
-    model = deepq.models.cnn_to_mlp(
-        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
-        hiddens=[256],
-        dueling=False
-    )
-    act = deepq.learn(
-        env,
-        q_func=model,
-        lr=1e-3,
-        max_timesteps=10000,
-        buffer_size=50000,
-        exploration_fraction=0.1,
-        exploration_final_eps=0.02,
-        print_freq=10,
-        callback=callback
-    )
-    print("Saving model to racecar_zed_model.pkl")
-    act.save("racecar_zed_model.pkl")
+
+  env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
+  model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
+                                  hiddens=[256],
+                                  dueling=False)
+  act = deepq.learn(env,
+                    q_func=model,
+                    lr=1e-3,
+                    max_timesteps=10000,
+                    buffer_size=50000,
+                    exploration_fraction=0.1,
+                    exploration_final_eps=0.02,
+                    print_freq=10,
+                    callback=callback)
+  print("Saving model to racecar_zed_model.pkl")
+  act.save("racecar_zed_model.pkl")


 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
@@ -9,9 +9,9 @@ class BulletClient(object):
  def __init__(self, connection_mode=pybullet.DIRECT, options=""):
    """Create a simulation and connect to it."""
    self._client = pybullet.connect(pybullet.SHARED_MEMORY)
-    if(self._client<0):
-      print("options=",options)
-      self._client = pybullet.connect(connection_mode,options=options)
+    if (self._client < 0):
+      print("options=", options)
+      self._client = pybullet.connect(connection_mode, options=options)
    self._shapes = {}

  def __del__(self):
@@ -25,5 +25,5 @@ class BulletClient(object):
    """Inject the client id into Bullet functions."""
    attribute = getattr(pybullet, name)
    if inspect.isbuiltin(attribute):
-        attribute = functools.partial(attribute, physicsClientId=self._client)
+      attribute = functools.partial(attribute, physicsClientId=self._client)
    return attribute
--- a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
@@ -2,10 +2,10 @@
 Classic cart-pole system implemented by Rich Sutton et al.
 Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
 """
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import logging
 import math
@@ -21,26 +21,24 @@ from pkg_resources import parse_version

 logger = logging.getLogger(__name__)

+
 class CartPoleBulletEnv(gym.Env):
-  metadata = {
-    'render.modes': ['human', 'rgb_array'],
-    'video.frames_per_second' : 50
-  }
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self, renders=True):
    # start the bullet physics server
    self._renders = renders
    if (renders):
-	    p.connect(p.GUI)
+      p.connect(p.GUI)
    else:
-    	p.connect(p.DIRECT)
+      p.connect(p.DIRECT)
    self.theta_threshold_radians = 12 * 2 * math.pi / 360
-    self.x_threshold = 0.4 #2.4
+    self.x_threshold = 0.4  #2.4
    high = np.array([
-         self.x_threshold * 2,
-         np.finfo(np.float32).max,
-         self.theta_threshold_radians * 2,
-         np.finfo(np.float32).max])
+        self.x_threshold * 2,
+        np.finfo(np.float32).max, self.theta_threshold_radians * 2,
+        np.finfo(np.float32).max
+    ])

    self.force_mag = 10

@@ -60,7 +58,7 @@ class CartPoleBulletEnv(gym.Env):
    return [seed]

  def step(self, action):
-    force = self.force_mag if action==1 else -self.force_mag
+    force = self.force_mag if action == 1 else -self.force_mag

    p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
    p.stepSimulation()
@@ -78,16 +76,17 @@ class CartPoleBulletEnv(gym.Env):
    return np.array(self.state), reward, done, {}

  def reset(self):
-#    print("-----------reset simulation---------------")
+    #    print("-----------reset simulation---------------")
    p.resetSimulation()
-    self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cartpole.urdf"),[0,0,0])
+    self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
+                               [0, 0, 0])
    p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
    p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
    p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
    self.timeStep = 0.02
    p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
-    p.setGravity(0,0, -9.8)
+    p.setGravity(0, 0, -9.8)
    p.setTimeStep(self.timeStep)
    p.setRealTimeSimulation(0)

@@ -100,4 +99,4 @@ class CartPoleBulletEnv(gym.Env):
    return np.array(self.state)

  def render(self, mode='human', close=False):
-      return
+    return
--- a/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
@@ -22,4 +22,3 @@ class EnvRandomizerBase(object):
      env: The environment to be randomized.
    """
    pass
-
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -1,7 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import pybullet as p
 import numpy as np
@@ -17,49 +17,60 @@ class Kuka:
    self.timeStep = timeStep
    self.maxVelocity = .35
    self.maxForce = 200.
-    self.fingerAForce = 2 
+    self.fingerAForce = 2
    self.fingerBForce = 2.5
    self.fingerTipForce = 2
    self.useInverseKinematics = 1
    self.useSimulation = 1
-    self.useNullSpace =21
+    self.useNullSpace = 21
    self.useOrientation = 1
    self.kukaEndEffectorIndex = 6
    self.kukaGripperIndex = 7
    #lower limits for null space
-    self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
+    self.ll = [-.967, -2, -2.96, 0.19, -2.96, -2.09, -3.05]
    #upper limits for null space
-    self.ul=[.967,2 ,2.96,2.29,2.96,2.09,3.05]
+    self.ul = [.967, 2, 2.96, 2.29, 2.96, 2.09, 3.05]
    #joint ranges for null space
-    self.jr=[5.8,4,5.8,4,5.8,4,6]
+    self.jr = [5.8, 4, 5.8, 4, 5.8, 4, 6]
    #restposes for null space
-    self.rp=[0,0,0,0.5*math.pi,0,-math.pi*0.5*0.66,0]
+    self.rp = [0, 0, 0, 0.5 * math.pi, 0, -math.pi * 0.5 * 0.66, 0]
    #joint damping coefficents
-    self.jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001]
+    self.jd = [
+        0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001,
+        0.00001, 0.00001, 0.00001, 0.00001
+    ]
    self.reset()
-    
+
  def reset(self):
-    objects = p.loadSDF(os.path.join(self.urdfRootPath,"kuka_iiwa/kuka_with_gripper2.sdf"))
+    objects = p.loadSDF(os.path.join(self.urdfRootPath, "kuka_iiwa/kuka_with_gripper2.sdf"))
    self.kukaUid = objects[0]
    #for i in range (p.getNumJoints(self.kukaUid)):
    #  print(p.getJointInfo(self.kukaUid,i))
-    p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000])
-    self.jointPositions=[ 0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048, -0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200 ]
+    p.resetBasePositionAndOrientation(self.kukaUid, [-0.100000, 0.000000, 0.070000],
+                                      [0.000000, 0.000000, 0.000000, 1.000000])
+    self.jointPositions = [
+        0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048,
+        -0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200
+    ]
    self.numJoints = p.getNumJoints(self.kukaUid)
-    for jointIndex in range (self.numJoints):
-      p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex])
-      p.setJointMotorControl2(self.kukaUid,jointIndex,p.POSITION_CONTROL,targetPosition=self.jointPositions[jointIndex],force=self.maxForce)
-    
-    self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath,"tray/tray.urdf"), 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000)
-    self.endEffectorPos = [0.537,0.0,0.5]
+    for jointIndex in range(self.numJoints):
+      p.resetJointState(self.kukaUid, jointIndex, self.jointPositions[jointIndex])
+      p.setJointMotorControl2(self.kukaUid,
+                              jointIndex,
+                              p.POSITION_CONTROL,
+                              targetPosition=self.jointPositions[jointIndex],
+                              force=self.maxForce)
+
+    self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath, "tray/tray.urdf"), 0.640000,
+                              0.075000, -0.190000, 0.000000, 0.000000, 1.000000, 0.000000)
+    self.endEffectorPos = [0.537, 0.0, 0.5]
    self.endEffectorAngle = 0
-    
-    
+
    self.motorNames = []
    self.motorIndices = []
-    
-    for i in range (self.numJoints):
-      jointInfo = p.getJointInfo(self.kukaUid,i)
+
+    for i in range(self.numJoints):
+      jointInfo = p.getJointInfo(self.kukaUid, i)
      qIndex = jointInfo[3]
      if qIndex > -1:
        #print("motorname")
@@ -70,98 +81,136 @@ class Kuka:
  def getActionDimension(self):
    if (self.useInverseKinematics):
      return len(self.motorIndices)
-    return 6 #position x,y,z and roll/pitch/yaw euler angles of end effector
+    return 6  #position x,y,z and roll/pitch/yaw euler angles of end effector

  def getObservationDimension(self):
    return len(self.getObservation())

  def getObservation(self):
    observation = []
-    state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
+    state = p.getLinkState(self.kukaUid, self.kukaGripperIndex)
    pos = state[0]
    orn = state[1]
    euler = p.getEulerFromQuaternion(orn)
-        
+
    observation.extend(list(pos))
    observation.extend(list(euler))
-    
+
    return observation

  def applyAction(self, motorCommands):
-    
+
    #print ("self.numJoints")
    #print (self.numJoints)
    if (self.useInverseKinematics):
-      
+
      dx = motorCommands[0]
      dy = motorCommands[1]
      dz = motorCommands[2]
      da = motorCommands[3]
      fingerAngle = motorCommands[4]
-      
-      state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
+
+      state = p.getLinkState(self.kukaUid, self.kukaEndEffectorIndex)
      actualEndEffectorPos = state[0]
      #print("pos[2] (getLinkState(kukaEndEffectorIndex)")
      #print(actualEndEffectorPos[2])
-      
-    
-      
-      self.endEffectorPos[0] = self.endEffectorPos[0]+dx
-      if (self.endEffectorPos[0]>0.65):
-        self.endEffectorPos[0]=0.65
-      if (self.endEffectorPos[0]<0.50):
-        self.endEffectorPos[0]=0.50
-      self.endEffectorPos[1] = self.endEffectorPos[1]+dy
-      if (self.endEffectorPos[1]<-0.17):
-        self.endEffectorPos[1]=-0.17
-      if (self.endEffectorPos[1]>0.22):
-        self.endEffectorPos[1]=0.22
-      
+
+      self.endEffectorPos[0] = self.endEffectorPos[0] + dx
+      if (self.endEffectorPos[0] > 0.65):
+        self.endEffectorPos[0] = 0.65
+      if (self.endEffectorPos[0] < 0.50):
+        self.endEffectorPos[0] = 0.50
+      self.endEffectorPos[1] = self.endEffectorPos[1] + dy
+      if (self.endEffectorPos[1] < -0.17):
+        self.endEffectorPos[1] = -0.17
+      if (self.endEffectorPos[1] > 0.22):
+        self.endEffectorPos[1] = 0.22
+
      #print ("self.endEffectorPos[2]")
      #print (self.endEffectorPos[2])
      #print("actualEndEffectorPos[2]")
      #print(actualEndEffectorPos[2])
      #if (dz<0 or actualEndEffectorPos[2]<0.5):
-      self.endEffectorPos[2] = self.endEffectorPos[2]+dz
-    
-     
+      self.endEffectorPos[2] = self.endEffectorPos[2] + dz
+
      self.endEffectorAngle = self.endEffectorAngle + da
      pos = self.endEffectorPos
-      orn = p.getQuaternionFromEuler([0,-math.pi,0]) # -math.pi,yaw])
-      if (self.useNullSpace==1):
-        if (self.useOrientation==1):
-          jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,self.ll,self.ul,self.jr,self.rp)
+      orn = p.getQuaternionFromEuler([0, -math.pi, 0])  # -math.pi,yaw])
+      if (self.useNullSpace == 1):
+        if (self.useOrientation == 1):
+          jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos,
+                                                    orn, self.ll, self.ul, self.jr, self.rp)
        else:
-          jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,lowerLimits=self.ll, upperLimits=self.ul, jointRanges=self.jr, restPoses=self.rp)
+          jointPoses = p.calculateInverseKinematics(self.kukaUid,
+                                                    self.kukaEndEffectorIndex,
+                                                    pos,
+                                                    lowerLimits=self.ll,
+                                                    upperLimits=self.ul,
+                                                    jointRanges=self.jr,
+                                                    restPoses=self.rp)
      else:
-        if (self.useOrientation==1):
-          jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,jointDamping=self.jd)
+        if (self.useOrientation == 1):
+          jointPoses = p.calculateInverseKinematics(self.kukaUid,
+                                                    self.kukaEndEffectorIndex,
+                                                    pos,
+                                                    orn,
+                                                    jointDamping=self.jd)
        else:
-          jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos)
-    
+          jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos)
+
      #print("jointPoses")
      #print(jointPoses)
      #print("self.kukaEndEffectorIndex")
      #print(self.kukaEndEffectorIndex)
      if (self.useSimulation):
-        for i in range (self.kukaEndEffectorIndex+1):
+        for i in range(self.kukaEndEffectorIndex + 1):
          #print(i)
-          p.setJointMotorControl2(bodyUniqueId=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,maxVelocity=self.maxVelocity, positionGain=0.3,velocityGain=1)
+          p.setJointMotorControl2(bodyUniqueId=self.kukaUid,
+                                  jointIndex=i,
+                                  controlMode=p.POSITION_CONTROL,
+                                  targetPosition=jointPoses[i],
+                                  targetVelocity=0,
+                                  force=self.maxForce,
+                                  maxVelocity=self.maxVelocity,
+                                  positionGain=0.3,
+                                  velocityGain=1)
      else:
        #reset the joint state (ignoring all dynamics, not recommended to use during simulation)
-        for i in range (self.numJoints):
-          p.resetJointState(self.kukaUid,i,jointPoses[i])
+        for i in range(self.numJoints):
+          p.resetJointState(self.kukaUid, i, jointPoses[i])
      #fingers
-      p.setJointMotorControl2(self.kukaUid,7,p.POSITION_CONTROL,targetPosition=self.endEffectorAngle,force=self.maxForce)
-      p.setJointMotorControl2(self.kukaUid,8,p.POSITION_CONTROL,targetPosition=-fingerAngle,force=self.fingerAForce)
-      p.setJointMotorControl2(self.kukaUid,11,p.POSITION_CONTROL,targetPosition=fingerAngle,force=self.fingerBForce)
-      
-      p.setJointMotorControl2(self.kukaUid,10,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
-      p.setJointMotorControl2(self.kukaUid,13,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
-      
-      
+      p.setJointMotorControl2(self.kukaUid,
+                              7,
+                              p.POSITION_CONTROL,
+                              targetPosition=self.endEffectorAngle,
+                              force=self.maxForce)
+      p.setJointMotorControl2(self.kukaUid,
+                              8,
+                              p.POSITION_CONTROL,
+                              targetPosition=-fingerAngle,
+                              force=self.fingerAForce)
+      p.setJointMotorControl2(self.kukaUid,
+                              11,
+                              p.POSITION_CONTROL,
+                              targetPosition=fingerAngle,
+                              force=self.fingerBForce)
+
+      p.setJointMotorControl2(self.kukaUid,
+                              10,
+                              p.POSITION_CONTROL,
+                              targetPosition=0,
+                              force=self.fingerTipForce)
+      p.setJointMotorControl2(self.kukaUid,
+                              13,
+                              p.POSITION_CONTROL,
+                              targetPosition=0,
+                              force=self.fingerTipForce)
+
    else:
-      for action in range (len(motorCommands)):
+      for action in range(len(motorCommands)):
        motor = self.motorIndices[action]
-        p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce)
-      
+        p.setJointMotorControl2(self.kukaUid,
+                                motor,
+                                p.POSITION_CONTROL,
+                                targetPosition=motorCommands[action],
+                                force=self.maxForce)
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
@@ -1,8 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-
+os.sys.path.insert(0, parentdir)

 import math
 import gym
@@ -21,11 +20,9 @@ maxSteps = 1000
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

+
 class KukaCamGymEnv(gym.Env):
-  metadata = {
-      'render.modes': ['human', 'rgb_array'],
-      'video.frames_per_second' : 50
-  }
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self,
               urdfRoot=pybullet_data.getDataPath(),
@@ -33,7 +30,7 @@ class KukaCamGymEnv(gym.Env):
               isEnableSelfCollision=True,
               renders=False,
               isDiscrete=False):
-    self._timeStep = 1./240.
+    self._timeStep = 1. / 240.
    self._urdfRoot = urdfRoot
    self._actionRepeat = actionRepeat
    self._isEnableSelfCollision = isEnableSelfCollision
@@ -42,14 +39,14 @@ class KukaCamGymEnv(gym.Env):
    self._renders = renders
    self._width = 341
    self._height = 256
-    self._isDiscrete=isDiscrete
+    self._isDiscrete = isDiscrete
    self.terminated = 0
    self._p = p
    if self._renders:
      cid = p.connect(p.SHARED_MEMORY)
-      if (cid<0):
-         p.connect(p.GUI)
-      p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
+      if (cid < 0):
+        p.connect(p.GUI)
+      p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
    else:
      p.connect(p.DIRECT)
    #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
@@ -67,7 +64,10 @@ class KukaCamGymEnv(gym.Env):
      self._action_bound = 1
      action_high = np.array([self._action_bound] * action_dim)
      self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
-    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
+    self.observation_space = spaces.Box(low=0,
+                                        high=255,
+                                        shape=(self._height, self._width, 4),
+                                        dtype=np.uint8)
    self.viewer = None

  def reset(self):
@@ -75,17 +75,19 @@ class KukaCamGymEnv(gym.Env):
    p.resetSimulation()
    p.setPhysicsEngineParameter(numSolverIterations=150)
    p.setTimeStep(self._timeStep)
-    p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
+    p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])

-    p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
+    p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
+               0.000000, 0.000000, 0.0, 1.0)

-    xpos = 0.5 +0.2*random.random()
-    ypos = 0 +0.25*random.random()
-    ang = 3.1415925438*random.random()
-    orn = p.getQuaternionFromEuler([0,0,ang])
-    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
+    xpos = 0.5 + 0.2 * random.random()
+    ypos = 0 + 0.25 * random.random()
+    ang = 3.1415925438 * random.random()
+    orn = p.getQuaternionFromEuler([0, 0, ang])
+    self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.1,
+                               orn[0], orn[1], orn[2], orn[3])

-    p.setGravity(0,0,-10)
+    p.setGravity(0, 0, -10)
    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    p.stepSimulation()
@@ -101,49 +103,59 @@ class KukaCamGymEnv(gym.Env):

  def getExtendedObservation(self):

-     #camEyePos = [0.03,0.236,0.54]
-     #distance = 1.06
-     #pitch=-56
-     #yaw = 258
-     #roll=0
-     #upAxisIndex = 2
-     #camInfo = p.getDebugVisualizerCamera()
-     #print("width,height")
-     #print(camInfo[0])
-     #print(camInfo[1])
-     #print("viewMatrix")
-     #print(camInfo[2])
-     #print("projectionMatrix")
-     #print(camInfo[3])
-     #viewMat = camInfo[2]
-     #viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
-     viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0]
-     #projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
-     projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
+    #camEyePos = [0.03,0.236,0.54]
+    #distance = 1.06
+    #pitch=-56
+    #yaw = 258
+    #roll=0
+    #upAxisIndex = 2
+    #camInfo = p.getDebugVisualizerCamera()
+    #print("width,height")
+    #print(camInfo[0])
+    #print(camInfo[1])
+    #print("viewMatrix")
+    #print(camInfo[2])
+    #print("projectionMatrix")
+    #print(camInfo[3])
+    #viewMat = camInfo[2]
+    #viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
+    viewMat = [
+        -0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722,
+        -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843,
+        0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0
+    ]
+    #projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
+    projMatrix = [
+        0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0,
+        -0.02000020071864128, 0.0
+    ]

-     img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
-     rgb=img_arr[2]
-     np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
-     self._observation = np_img_arr
-     return self._observation
+    img_arr = p.getCameraImage(width=self._width,
+                               height=self._height,
+                               viewMatrix=viewMat,
+                               projectionMatrix=projMatrix)
+    rgb = img_arr[2]
+    np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
+    self._observation = np_img_arr
+    return self._observation

  def step(self, action):
    if (self._isDiscrete):
      dv = 0.01
-      dx = [0,-dv,dv,0,0,0,0][action]
-      dy = [0,0,0,-dv,dv,0,0][action]
-      da = [0,0,0,0,0,-0.1,0.1][action]
+      dx = [0, -dv, dv, 0, 0, 0, 0][action]
+      dy = [0, 0, 0, -dv, dv, 0, 0][action]
+      da = [0, 0, 0, 0, 0, -0.1, 0.1][action]
      f = 0.3
-      realAction = [dx,dy,-0.002,da,f]
+      realAction = [dx, dy, -0.002, da, f]
    else:
      dv = 0.01
      dx = action[0] * dv
      dy = action[1] * dv
      da = action[2] * 0.1
      f = 0.3
-      realAction = [dx,dy,-0.002,da,f]
+      realAction = [dx, dy, -0.002, da, f]

-    return self.step2( realAction)
+    return self.step2(realAction)

  def step2(self, action):
    for i in range(self._actionRepeat):
@@ -156,7 +168,7 @@ class KukaCamGymEnv(gym.Env):

    self._observation = self.getExtendedObservation()
    if self._renders:
-        time.sleep(self._timeStep)
+      time.sleep(self._timeStep)

    #print("self._envStepCounter")
    #print(self._envStepCounter)
@@ -170,66 +182,67 @@ class KukaCamGymEnv(gym.Env):
  def render(self, mode='human', close=False):
    if mode != "rgb_array":
      return np.array([])
-    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-        cameraTargetPosition=base_pos,
-        distance=self._cam_dist,
-        yaw=self._cam_yaw,
-        pitch=self._cam_pitch,
-        roll=0,
-        upAxisIndex=2)
-    proj_matrix = self._p.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._p.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+                                                            distance=self._cam_dist,
+                                                            yaw=self._cam_yaw,
+                                                            pitch=self._cam_pitch,
+                                                            roll=0,
+                                                            upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+                                                     aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
+                                                     nearVal=0.1,
+                                                     farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
+                                              height=RENDER_HEIGHT,
+                                              viewMatrix=view_matrix,
+                                              projectionMatrix=proj_matrix,
+                                              renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array

  def _termination(self):
    #print (self._kuka.endEffectorPos[2])
-    state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+    state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
    actualEndEffectorPos = state[0]

    #print("self._envStepCounter")
    #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>maxSteps):
+    if (self.terminated or self._envStepCounter > maxSteps):
      self._observation = self.getExtendedObservation()
      return True
    maxDist = 0.005
-    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
+    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)

-    if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
+    if (len(closestPoints)):  #(actualEndEffectorPos[2] <= -0.43):
      self.terminated = 1

      #print("closing gripper, attempting grasp")
      #start grasp and terminate
      fingerAngle = 0.3
-      for i in range (100):
-        graspAction = [0,0,0.0001,0,fingerAngle]
+      for i in range(100):
+        graspAction = [0, 0, 0.0001, 0, fingerAngle]
        self._kuka.applyAction(graspAction)
        p.stepSimulation()
-        fingerAngle = fingerAngle-(0.3/100.)
-        if (fingerAngle<0):
-          fingerAngle=0
+        fingerAngle = fingerAngle - (0.3 / 100.)
+        if (fingerAngle < 0):
+          fingerAngle = 0

-      for i in range (1000):
-        graspAction = [0,0,0.001,0,fingerAngle]
+      for i in range(1000):
+        graspAction = [0, 0, 0.001, 0, fingerAngle]
        self._kuka.applyAction(graspAction)
        p.stepSimulation()
-        blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
+        blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
        if (blockPos[2] > 0.23):
          #print("BLOCKPOS!")
          #print(blockPos[2])
          break
-        state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+        state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
        actualEndEffectorPos = state[0]
-        if (actualEndEffectorPos[2]>0.5):
+        if (actualEndEffectorPos[2] > 0.5):
          break

-
      self._observation = self.getExtendedObservation()
      return True
    return False
@@ -237,20 +250,21 @@ class KukaCamGymEnv(gym.Env):
  def _reward(self):

    #rewards is height of target object
-    blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
-    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
+    blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
+    closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
+                                       self._kuka.kukaEndEffectorIndex)

    reward = -1000
    numPt = len(closestPoints)
    #print(numPt)
-    if (numPt>0):
+    if (numPt > 0):
      #print("reward:")
-      reward = -closestPoints[0][8]*10
-    if (blockPos[2] >0.2):
+      reward = -closestPoints[0][8] * 10
+    if (blockPos[2] > 0.2):
      #print("grasped a block!!!")
      #print("self._envStepCounter")
      #print(self._envStepCounter)
-      reward = reward+1000
+      reward = reward + 1000

    #print("reward")
    #print(reward)
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
@@ -1,7 +1,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
-print ("current_dir=" + currentdir)
-os.sys.path.insert(0,currentdir)
+print("current_dir=" + currentdir)
+os.sys.path.insert(0, currentdir)

 import math
 import gym
@@ -20,11 +20,9 @@ largeValObservation = 100
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

+
 class KukaGymEnv(gym.Env):
-  metadata = {
-      'render.modes': ['human', 'rgb_array'],
-      'video.frames_per_second' : 50
-  }
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self,
               urdfRoot=pybullet_data.getDataPath(),
@@ -32,10 +30,10 @@ class KukaGymEnv(gym.Env):
               isEnableSelfCollision=True,
               renders=False,
               isDiscrete=False,
-               maxSteps = 1000):
+               maxSteps=1000):
    #print("KukaGymEnv __init__")
    self._isDiscrete = isDiscrete
-    self._timeStep = 1./240.
+    self._timeStep = 1. / 240.
    self._urdfRoot = urdfRoot
    self._actionRepeat = actionRepeat
    self._isEnableSelfCollision = isEnableSelfCollision
@@ -51,9 +49,9 @@ class KukaGymEnv(gym.Env):
    self._p = p
    if self._renders:
      cid = p.connect(p.SHARED_MEMORY)
-      if (cid<0):
-         cid = p.connect(p.GUI)
-      p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
+      if (cid < 0):
+        cid = p.connect(p.GUI)
+      p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
    else:
      p.connect(p.DIRECT)
    #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
@@ -67,10 +65,10 @@ class KukaGymEnv(gym.Env):
    if (self._isDiscrete):
      self.action_space = spaces.Discrete(7)
    else:
-       action_dim = 3
-       self._action_bound = 1
-       action_high = np.array([self._action_bound] * action_dim)
-       self.action_space = spaces.Box(-action_high, action_high)
+      action_dim = 3
+      self._action_bound = 1
+      action_high = np.array([self._action_bound] * action_dim)
+      self.action_space = spaces.Box(-action_high, action_high)
    self.observation_space = spaces.Box(-observation_high, observation_high)
    self.viewer = None

@@ -80,17 +78,19 @@ class KukaGymEnv(gym.Env):
    p.resetSimulation()
    p.setPhysicsEngineParameter(numSolverIterations=150)
    p.setTimeStep(self._timeStep)
-    p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
+    p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])

-    p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
+    p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
+               0.000000, 0.000000, 0.0, 1.0)

-    xpos = 0.55 +0.12*random.random()
-    ypos = 0 +0.2*random.random()
-    ang = 3.14*0.5+3.1415925438*random.random()
-    orn = p.getQuaternionFromEuler([0,0,ang])
-    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
+    xpos = 0.55 + 0.12 * random.random()
+    ypos = 0 + 0.2 * random.random()
+    ang = 3.14 * 0.5 + 3.1415925438 * random.random()
+    orn = p.getQuaternionFromEuler([0, 0, ang])
+    self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.15,
+                               orn[0], orn[1], orn[2], orn[3])

-    p.setGravity(0,0,-10)
+    p.setGravity(0, 0, -10)
    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    p.stepSimulation()
@@ -105,47 +105,48 @@ class KukaGymEnv(gym.Env):
    return [seed]

  def getExtendedObservation(self):
-     self._observation = self._kuka.getObservation()
-     gripperState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
-     gripperPos = gripperState[0]
-     gripperOrn = gripperState[1]
-     blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
+    self._observation = self._kuka.getObservation()
+    gripperState = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaGripperIndex)
+    gripperPos = gripperState[0]
+    gripperOrn = gripperState[1]
+    blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)

-     invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
-     gripperMat = p.getMatrixFromQuaternion(gripperOrn)
-     dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
-     dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
-     dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
+    invGripperPos, invGripperOrn = p.invertTransform(gripperPos, gripperOrn)
+    gripperMat = p.getMatrixFromQuaternion(gripperOrn)
+    dir0 = [gripperMat[0], gripperMat[3], gripperMat[6]]
+    dir1 = [gripperMat[1], gripperMat[4], gripperMat[7]]
+    dir2 = [gripperMat[2], gripperMat[5], gripperMat[8]]

-     gripperEul =  p.getEulerFromQuaternion(gripperOrn)
-     #print("gripperEul")
-     #print(gripperEul)
-     blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
-     projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
-     blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
-     #print("projectedBlockPos2D")
-     #print(projectedBlockPos2D)
-     #print("blockEulerInGripper")
-     #print(blockEulerInGripper)
+    gripperEul = p.getEulerFromQuaternion(gripperOrn)
+    #print("gripperEul")
+    #print(gripperEul)
+    blockPosInGripper, blockOrnInGripper = p.multiplyTransforms(invGripperPos, invGripperOrn,
+                                                                blockPos, blockOrn)
+    projectedBlockPos2D = [blockPosInGripper[0], blockPosInGripper[1]]
+    blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
+    #print("projectedBlockPos2D")
+    #print(projectedBlockPos2D)
+    #print("blockEulerInGripper")
+    #print(blockEulerInGripper)

-     #we return the relative x,y position and euler angle of block in gripper space
-     blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
+    #we return the relative x,y position and euler angle of block in gripper space
+    blockInGripperPosXYEulZ = [blockPosInGripper[0], blockPosInGripper[1], blockEulerInGripper[2]]

-     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
-     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
-     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
+    #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
+    #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
+    #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)

-     self._observation.extend(list(blockInGripperPosXYEulZ))
-     return self._observation
+    self._observation.extend(list(blockInGripperPosXYEulZ))
+    return self._observation

  def step(self, action):
    if (self._isDiscrete):
      dv = 0.005
-      dx = [0,-dv,dv,0,0,0,0][action]
-      dy = [0,0,0,-dv,dv,0,0][action]
-      da = [0,0,0,0,0,-0.05,0.05][action]
+      dx = [0, -dv, dv, 0, 0, 0, 0][action]
+      dy = [0, 0, 0, -dv, dv, 0, 0][action]
+      da = [0, 0, 0, 0, 0, -0.05, 0.05][action]
      f = 0.3
-      realAction = [dx,dy,-0.002,da,f]
+      realAction = [dx, dy, -0.002, da, f]
    else:
      #print("action[0]=", str(action[0]))
      dv = 0.005
@@ -153,8 +154,8 @@ class KukaGymEnv(gym.Env):
      dy = action[1] * dv
      da = action[2] * 0.05
      f = 0.3
-      realAction = [dx,dy,-0.002,da,f]
-    return self.step2( realAction)
+      realAction = [dx, dy, -0.002, da, f]
+    return self.step2(realAction)

  def step2(self, action):
    for i in range(self._actionRepeat):
@@ -171,11 +172,13 @@ class KukaGymEnv(gym.Env):
    #print(self._envStepCounter)

    done = self._termination()
-    npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
-    actionCost = np.linalg.norm(npaction)*10.
+    npaction = np.array([
+        action[3]
+    ])  #only penalize rotation until learning works well [action[0],action[1],action[3]])
+    actionCost = np.linalg.norm(npaction) * 10.
    #print("actionCost")
    #print(actionCost)
-    reward = self._reward()-actionCost
+    reward = self._reward() - actionCost
    #print("reward")
    #print(reward)

@@ -187,22 +190,23 @@ class KukaGymEnv(gym.Env):
    if mode != "rgb_array":
      return np.array([])

-    base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
-    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-        cameraTargetPosition=base_pos,
-        distance=self._cam_dist,
-        yaw=self._cam_yaw,
-        pitch=self._cam_pitch,
-        roll=0,
-        upAxisIndex=2)
-    proj_matrix = self._p.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._p.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
-        #renderer=self._p.ER_TINY_RENDERER)
-
+    base_pos, orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+                                                            distance=self._cam_dist,
+                                                            yaw=self._cam_yaw,
+                                                            pitch=self._cam_pitch,
+                                                            roll=0,
+                                                            upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+                                                     aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
+                                                     nearVal=0.1,
+                                                     farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
+                                              height=RENDER_HEIGHT,
+                                              viewMatrix=view_matrix,
+                                              projectionMatrix=proj_matrix,
+                                              renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
+    #renderer=self._p.ER_TINY_RENDERER)

    rgb_array = np.array(px, dtype=np.uint8)
    rgb_array = np.reshape(rgb_array, (RENDER_HEIGHT, RENDER_WIDTH, 4))
@@ -210,49 +214,47 @@ class KukaGymEnv(gym.Env):
    rgb_array = rgb_array[:, :, :3]
    return rgb_array

-
  def _termination(self):
    #print (self._kuka.endEffectorPos[2])
-    state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+    state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
    actualEndEffectorPos = state[0]

    #print("self._envStepCounter")
    #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>self._maxSteps):
+    if (self.terminated or self._envStepCounter > self._maxSteps):
      self._observation = self.getExtendedObservation()
      return True
    maxDist = 0.005
-    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
+    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)

-    if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
+    if (len(closestPoints)):  #(actualEndEffectorPos[2] <= -0.43):
      self.terminated = 1

      #print("terminating, closing gripper, attempting grasp")
      #start grasp and terminate
      fingerAngle = 0.3
-      for i in range (100):
-        graspAction = [0,0,0.0001,0,fingerAngle]
+      for i in range(100):
+        graspAction = [0, 0, 0.0001, 0, fingerAngle]
        self._kuka.applyAction(graspAction)
        p.stepSimulation()
-        fingerAngle = fingerAngle-(0.3/100.)
-        if (fingerAngle<0):
-          fingerAngle=0
+        fingerAngle = fingerAngle - (0.3 / 100.)
+        if (fingerAngle < 0):
+          fingerAngle = 0

-      for i in range (1000):
-        graspAction = [0,0,0.001,0,fingerAngle]
+      for i in range(1000):
+        graspAction = [0, 0, 0.001, 0, fingerAngle]
        self._kuka.applyAction(graspAction)
        p.stepSimulation()
-        blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
+        blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
        if (blockPos[2] > 0.23):
          #print("BLOCKPOS!")
          #print(blockPos[2])
          break
-        state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+        state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
        actualEndEffectorPos = state[0]
-        if (actualEndEffectorPos[2]>0.5):
+        if (actualEndEffectorPos[2] > 0.5):
          break

-
      self._observation = self.getExtendedObservation()
      return True
    return False
@@ -260,18 +262,19 @@ class KukaGymEnv(gym.Env):
  def _reward(self):

    #rewards is height of target object
-    blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
-    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
+    blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
+    closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
+                                       self._kuka.kukaEndEffectorIndex)

    reward = -1000

    numPt = len(closestPoints)
    #print(numPt)
-    if (numPt>0):
+    if (numPt > 0):
      #print("reward:")
-      reward = -closestPoints[0][8]*10
-    if (blockPos[2] >0.2):
-      reward = reward+10000
+      reward = -closestPoints[0][8] * 10
+    if (blockPos[2] > 0.2):
+      reward = reward + 10000
      print("successfully grasped a block!!!")
      #print("self._envStepCounter")
      #print(self._envStepCounter)
@@ -279,7 +282,7 @@ class KukaGymEnv(gym.Env):
      #print(self._envStepCounter)
      #print("reward")
      #print(reward)
-     #print("reward")
+    #print("reward")
    #print(reward)
    return reward

--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py
@@ -13,6 +13,7 @@ import glob
 from pkg_resources import parse_version
 import gym

+
 class KukaDiverseObjectEnv(KukaGymEnv):
  """Class for Kuka environment with diverse objects.

@@ -61,7 +62,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    """

    self._isDiscrete = isDiscrete
-    self._timeStep = 1./240.
+    self._timeStep = 1. / 240.
    self._urdfRoot = urdfRoot
    self._actionRepeat = actionRepeat
    self._isEnableSelfCollision = isEnableSelfCollision
@@ -85,9 +86,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):

    if self._renders:
      self.cid = p.connect(p.SHARED_MEMORY)
-      if (self.cid<0):
-         self.cid = p.connect(p.GUI)
-      p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
+      if (self.cid < 0):
+        self.cid = p.connect(p.GUI)
+      p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
    else:
      self.cid = p.connect(p.DIRECT)
    self.seed()
@@ -100,9 +101,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    else:
      self.action_space = spaces.Box(low=-1, high=1, shape=(3,))  # dx, dy, da
      if self._removeHeightHack:
-        self.action_space = spaces.Box(low=-1,
-                                       high=1,
-                                       shape=(4,))  # dx, dy, dz, da
+        self.action_space = spaces.Box(low=-1, high=1, shape=(4,))  # dx, dy, dz, da
    self.viewer = None

  def reset(self):
@@ -111,17 +110,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    # Set the camera settings.
    look = [0.23, 0.2, 0.54]
    distance = 1.
-    pitch = -56 + self._cameraRandom*np.random.uniform(-3, 3)
-    yaw = 245 + self._cameraRandom*np.random.uniform(-3, 3)
+    pitch = -56 + self._cameraRandom * np.random.uniform(-3, 3)
+    yaw = 245 + self._cameraRandom * np.random.uniform(-3, 3)
    roll = 0
-    self._view_matrix = p.computeViewMatrixFromYawPitchRoll(
-        look, distance, yaw, pitch, roll, 2)
-    fov = 20. + self._cameraRandom*np.random.uniform(-2, 2)
+    self._view_matrix = p.computeViewMatrixFromYawPitchRoll(look, distance, yaw, pitch, roll, 2)
+    fov = 20. + self._cameraRandom * np.random.uniform(-2, 2)
    aspect = self._width / self._height
    near = 0.01
    far = 10
-    self._proj_matrix = p.computeProjectionMatrixFOV(
-        fov, aspect, near, far)
+    self._proj_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)

    self._attempted_grasp = False
    self._env_step = 0
@@ -130,18 +127,18 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    p.resetSimulation()
    p.setPhysicsEngineParameter(numSolverIterations=150)
    p.setTimeStep(self._timeStep)
-    p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
+    p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])

-    p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
+    p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
+               0.000000, 0.000000, 0.0, 1.0)

-    p.setGravity(0,0,-10)
+    p.setGravity(0, 0, -10)
    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    p.stepSimulation()

    # Choose the objects in the bin.
-    urdfList = self._get_random_object(
-      self._numObjects, self._isTest)
+    urdfList = self._get_random_object(self._numObjects, self._isTest)
    self._objectUids = self._randomly_place_objects(urdfList)
    self._observation = self._get_observation()
    return np.array(self._observation)
@@ -156,17 +153,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
      The list of object unique ID's.
    """

-
    # Randomize positions of each object urdf.
    objectUids = []
    for urdf_name in urdfList:
-      xpos = 0.4 +self._blockRandom*random.random()
-      ypos = self._blockRandom*(random.random()-.5)
-      angle = np.pi/2 + self._blockRandom * np.pi * random.random()
+      xpos = 0.4 + self._blockRandom * random.random()
+      ypos = self._blockRandom * (random.random() - .5)
+      angle = np.pi / 2 + self._blockRandom * np.pi * random.random()
      orn = p.getQuaternionFromEuler([0, 0, angle])
      urdf_path = os.path.join(self._urdfRoot, urdf_name)
-      uid = p.loadURDF(urdf_path, [xpos, ypos, .15],
-        [orn[0], orn[1], orn[2], orn[3]])
+      uid = p.loadURDF(urdf_path, [xpos, ypos, .15], [orn[0], orn[1], orn[2], orn[3]])
      objectUids.append(uid)
      # Let each object fall to the tray individual, to prevent object
      # intersection.
@@ -178,9 +173,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    """Return the observation as an image.
    """
    img_arr = p.getCameraImage(width=self._width,
-                                      height=self._height,
-                                      viewMatrix=self._view_matrix,
-                                      projectionMatrix=self._proj_matrix)
+                               height=self._height,
+                               viewMatrix=self._view_matrix,
+                               projectionMatrix=self._proj_matrix)
    rgb = img_arr[2]
    np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
    return np_img_arr[:, :, :3]
@@ -246,8 +241,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
        break

    # If we are close to the bin, attempt grasp.
-    state = p.getLinkState(self._kuka.kukaUid,
-                                  self._kuka.kukaEndEffectorIndex)
+    state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
    end_effector_pos = state[0]
    if end_effector_pos[2] <= 0.1:
      finger_angle = 0.3
@@ -257,7 +251,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
        p.stepSimulation()
        #if self._renders:
        #  time.sleep(self._timeStep)
-        finger_angle -= 0.3/100.
+        finger_angle -= 0.3 / 100.
        if finger_angle < 0:
          finger_angle = 0
      for _ in range(500):
@@ -266,7 +260,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
        p.stepSimulation()
        if self._renders:
          time.sleep(self._timeStep)
-        finger_angle -= 0.3/100.
+        finger_angle -= 0.3 / 100.
        if finger_angle < 0:
          finger_angle = 0
      self._attempted_grasp = True
@@ -274,9 +268,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    done = self._termination()
    reward = self._reward()

-    debug = {
-        'grasp_success': self._graspSuccess
-    }
+    debug = {'grasp_success': self._graspSuccess}
    return observation, reward, done, debug

  def _reward(self):
@@ -288,8 +280,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
    reward = 0
    self._graspSuccess = 0
    for uid in self._objectUids:
-      pos, _ = p.getBasePositionAndOrientation(
-        uid)
+      pos, _ = p.getBasePositionAndOrientation(uid)
      # If any block is above height, provide reward.
      if pos[2] > 0.2:
        self._graspSuccess += 1
@@ -319,8 +310,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
      urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*[1-9]/*.urdf')
    found_object_directories = glob.glob(urdf_pattern)
    total_num_objects = len(found_object_directories)
-    selected_objects = np.random.choice(np.arange(total_num_objects),
-                                        num_objects)
+    selected_objects = np.random.choice(np.arange(total_num_objects), num_objects)
    selected_objects_filenames = []
    for object_index in selected_objects:
      selected_objects_filenames += [found_object_directories[object_index]]
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur.py
@@ -15,9 +15,8 @@ OVERHEAT_SHUTDOWN_TORQUE = 2.45
 OVERHEAT_SHUTDOWN_TIME = 1.0
 LEG_POSITION = ["front_left", "back_left", "front_right", "back_right"]
 MOTOR_NAMES = [
-    "motor_front_leftL_joint", "motor_front_leftR_joint",
-    "motor_back_leftL_joint", "motor_back_leftR_joint",
-    "motor_front_rightL_joint", "motor_front_rightR_joint",
+    "motor_front_leftL_joint", "motor_front_leftR_joint", "motor_back_leftL_joint",
+    "motor_back_leftR_joint", "motor_front_rightL_joint", "motor_front_rightR_joint",
    "motor_back_rightL_joint", "motor_back_rightR_joint"
 ]
 LEG_LINK_ID = [2, 3, 5, 6, 8, 9, 11, 12, 15, 16, 18, 19, 21, 22, 24, 25]
@@ -33,7 +32,7 @@ class Minitaur(object):

  def __init__(self,
               pybullet_client,
-               urdf_root= os.path.join(os.path.dirname(__file__),"../data"),
+               urdf_root=os.path.join(os.path.dirname(__file__), "../data"),
               time_step=0.01,
               self_collision_enabled=False,
               motor_velocity_limit=np.inf,
@@ -87,10 +86,9 @@ class Minitaur(object):
    if self._accurate_motor_model_enabled:
      self._kp = motor_kp
      self._kd = motor_kd
-      self._motor_model = motor.MotorModel(
-          torque_control_enabled=self._torque_control_enabled,
-          kp=self._kp,
-          kd=self._kd)
+      self._motor_model = motor.MotorModel(torque_control_enabled=self._torque_control_enabled,
+                                           kp=self._kp,
+                                           kd=self._kd)
    elif self._pd_control_enabled:
      self._kp = 8
      self._kd = kd_for_pd_controllers
@@ -101,15 +99,12 @@ class Minitaur(object):
    self.Reset()

  def _RecordMassInfoFromURDF(self):
-    self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(
-        self.quadruped, BASE_LINK_ID)[0]
+    self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(self.quadruped, BASE_LINK_ID)[0]
    self._leg_masses_urdf = []
    self._leg_masses_urdf.append(
-        self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[
-            0])
+        self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[0])
    self._leg_masses_urdf.append(
-        self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[
-            0])
+        self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[0])

  def _BuildJointNameToIdDict(self):
    num_joints = self._pybullet_client.getNumJoints(self.quadruped)
@@ -119,9 +114,7 @@ class Minitaur(object):
      self._joint_name_to_id[joint_info[1].decode("UTF-8")] = joint_info[0]

  def _BuildMotorIdList(self):
-    self._motor_id_list = [
-        self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES
-    ]
+    self._motor_id_list = [self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES]

  def Reset(self, reload_urdf=True):
    """Reset the minitaur to its initial states.
@@ -144,39 +137,35 @@ class Minitaur(object):
      self._RecordMassInfoFromURDF()
      self.ResetPose(add_constraint=True)
      if self._on_rack:
-        self._pybullet_client.createConstraint(
-            self.quadruped, -1, -1, -1, self._pybullet_client.JOINT_FIXED,
-            [0, 0, 0], [0, 0, 0], [0, 0, 1])
+        self._pybullet_client.createConstraint(self.quadruped, -1, -1, -1,
+                                               self._pybullet_client.JOINT_FIXED, [0, 0, 0],
+                                               [0, 0, 0], [0, 0, 1])
    else:
-      self._pybullet_client.resetBasePositionAndOrientation(
-          self.quadruped, INIT_POSITION, INIT_ORIENTATION)
-      self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
-                                              [0, 0, 0])
+      self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, INIT_POSITION,
+                                                            INIT_ORIENTATION)
+      self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
      self.ResetPose(add_constraint=False)

    self._overheat_counter = np.zeros(self.num_motors)
    self._motor_enabled_list = [True] * self.num_motors

  def _SetMotorTorqueById(self, motor_id, torque):
-    self._pybullet_client.setJointMotorControl2(
-        bodyIndex=self.quadruped,
-        jointIndex=motor_id,
-        controlMode=self._pybullet_client.TORQUE_CONTROL,
-        force=torque)
+    self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
+                                                jointIndex=motor_id,
+                                                controlMode=self._pybullet_client.TORQUE_CONTROL,
+                                                force=torque)

  def _SetDesiredMotorAngleById(self, motor_id, desired_angle):
-    self._pybullet_client.setJointMotorControl2(
-        bodyIndex=self.quadruped,
-        jointIndex=motor_id,
-        controlMode=self._pybullet_client.POSITION_CONTROL,
-        targetPosition=desired_angle,
-        positionGain=self._kp,
-        velocityGain=self._kd,
-        force=self._max_force)
+    self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
+                                                jointIndex=motor_id,
+                                                controlMode=self._pybullet_client.POSITION_CONTROL,
+                                                targetPosition=desired_angle,
+                                                positionGain=self._kp,
+                                                velocityGain=self._kd,
+                                                force=self._max_force)

  def _SetDesiredMotorAngleByName(self, motor_name, desired_angle):
-    self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name],
-                                   desired_angle)
+    self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name], desired_angle)

  def ResetPose(self, add_constraint):
    """Reset the pose of the minitaur.
@@ -200,59 +189,53 @@ class Minitaur(object):
    knee_angle = -2.1834

    leg_position = LEG_POSITION[leg_id]
-    self._pybullet_client.resetJointState(
-        self.quadruped,
-        self._joint_name_to_id["motor_" + leg_position + "L_joint"],
-        self._motor_direction[2 * leg_id] * half_pi,
-        targetVelocity=0)
-    self._pybullet_client.resetJointState(
-        self.quadruped,
-        self._joint_name_to_id["knee_" + leg_position + "L_link"],
-        self._motor_direction[2 * leg_id] * knee_angle,
-        targetVelocity=0)
-    self._pybullet_client.resetJointState(
-        self.quadruped,
-        self._joint_name_to_id["motor_" + leg_position + "R_joint"],
-        self._motor_direction[2 * leg_id + 1] * half_pi,
-        targetVelocity=0)
-    self._pybullet_client.resetJointState(
-        self.quadruped,
-        self._joint_name_to_id["knee_" + leg_position + "R_link"],
-        self._motor_direction[2 * leg_id + 1] * knee_angle,
-        targetVelocity=0)
+    self._pybullet_client.resetJointState(self.quadruped,
+                                          self._joint_name_to_id["motor_" + leg_position +
+                                                                 "L_joint"],
+                                          self._motor_direction[2 * leg_id] * half_pi,
+                                          targetVelocity=0)
+    self._pybullet_client.resetJointState(self.quadruped,
+                                          self._joint_name_to_id["knee_" + leg_position +
+                                                                 "L_link"],
+                                          self._motor_direction[2 * leg_id] * knee_angle,
+                                          targetVelocity=0)
+    self._pybullet_client.resetJointState(self.quadruped,
+                                          self._joint_name_to_id["motor_" + leg_position +
+                                                                 "R_joint"],
+                                          self._motor_direction[2 * leg_id + 1] * half_pi,
+                                          targetVelocity=0)
+    self._pybullet_client.resetJointState(self.quadruped,
+                                          self._joint_name_to_id["knee_" + leg_position +
+                                                                 "R_link"],
+                                          self._motor_direction[2 * leg_id + 1] * knee_angle,
+                                          targetVelocity=0)
    if add_constraint:
      self._pybullet_client.createConstraint(
-          self.quadruped, self._joint_name_to_id["knee_"
-                                                 + leg_position + "R_link"],
-          self.quadruped, self._joint_name_to_id["knee_"
-                                                 + leg_position + "L_link"],
-          self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
-          KNEE_CONSTRAINT_POINT_RIGHT, KNEE_CONSTRAINT_POINT_LEFT)
+          self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_link"],
+          self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_link"],
+          self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_RIGHT,
+          KNEE_CONSTRAINT_POINT_LEFT)

    if self._accurate_motor_model_enabled or self._pd_control_enabled:
      # Disable the default motor in pybullet.
      self._pybullet_client.setJointMotorControl2(
          bodyIndex=self.quadruped,
-          jointIndex=(self._joint_name_to_id["motor_"
-                                             + leg_position + "L_joint"]),
+          jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
          controlMode=self._pybullet_client.VELOCITY_CONTROL,
          targetVelocity=0,
          force=knee_friction_force)
      self._pybullet_client.setJointMotorControl2(
          bodyIndex=self.quadruped,
-          jointIndex=(self._joint_name_to_id["motor_"
-                                             + leg_position + "R_joint"]),
+          jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
          controlMode=self._pybullet_client.VELOCITY_CONTROL,
          targetVelocity=0,
          force=knee_friction_force)

    else:
-      self._SetDesiredMotorAngleByName(
-          "motor_" + leg_position + "L_joint",
-          self._motor_direction[2 * leg_id] * half_pi)
+      self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
+                                       self._motor_direction[2 * leg_id] * half_pi)
      self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
-                                       self._motor_direction[2 * leg_id
-                                                             + 1] * half_pi)
+                                       self._motor_direction[2 * leg_id + 1] * half_pi)

    self._pybullet_client.setJointMotorControl2(
        bodyIndex=self.quadruped,
@@ -273,8 +256,7 @@ class Minitaur(object):
    Returns:
      The position of minitaur's base.
    """
-    position, _ = (
-        self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
+    position, _ = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
    return position

  def GetBaseOrientation(self):
@@ -283,8 +265,7 @@ class Minitaur(object):
    Returns:
      The orientation of minitaur's base.
    """
-    _, orientation = (
-        self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
+    _, orientation = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
    return orientation

  def GetActionDimension(self):
@@ -304,10 +285,9 @@ class Minitaur(object):
    """
    upper_bound = np.array([0.0] * self.GetObservationDimension())
    upper_bound[0:self.num_motors] = math.pi  # Joint angle.
-    upper_bound[self.num_motors:2 * self.num_motors] = (
-        motor.MOTOR_SPEED_LIMIT)  # Joint velocity.
-    upper_bound[2 * self.num_motors:3 * self.num_motors] = (
-        motor.OBSERVED_TORQUE_LIMIT)  # Joint torque.
+    upper_bound[self.num_motors:2 * self.num_motors] = (motor.MOTOR_SPEED_LIMIT)  # Joint velocity.
+    upper_bound[2 * self.num_motors:3 * self.num_motors] = (motor.OBSERVED_TORQUE_LIMIT
+                                                           )  # Joint torque.
    upper_bound[3 * self.num_motors:] = 1.0  # Quaternion of base orientation.
    return upper_bound

@@ -354,12 +334,9 @@ class Minitaur(object):
    """
    if self._motor_velocity_limit < np.inf:
      current_motor_angle = self.GetMotorAngles()
-      motor_commands_max = (
-          current_motor_angle + self.time_step * self._motor_velocity_limit)
-      motor_commands_min = (
-          current_motor_angle - self.time_step * self._motor_velocity_limit)
-      motor_commands = np.clip(motor_commands, motor_commands_min,
-                               motor_commands_max)
+      motor_commands_max = (current_motor_angle + self.time_step * self._motor_velocity_limit)
+      motor_commands_min = (current_motor_angle - self.time_step * self._motor_velocity_limit)
+      motor_commands = np.clip(motor_commands, motor_commands_min, motor_commands_max)

    if self._accurate_motor_model_enabled or self._pd_control_enabled:
      q = self.GetMotorAngles()
@@ -373,8 +350,7 @@ class Minitaur(object):
              self._overheat_counter[i] += 1
            else:
              self._overheat_counter[i] = 0
-            if (self._overheat_counter[i] >
-                OVERHEAT_SHUTDOWN_TIME / self.time_step):
+            if (self._overheat_counter[i] > OVERHEAT_SHUTDOWN_TIME / self.time_step):
              self._motor_enabled_list[i] = False

        # The torque is already in the observation space because we use
@@ -382,12 +358,11 @@ class Minitaur(object):
        self._observed_motor_torques = observed_torque

        # Transform into the motor space when applying the torque.
-        self._applied_motor_torque = np.multiply(actual_torque,
-                                                 self._motor_direction)
+        self._applied_motor_torque = np.multiply(actual_torque, self._motor_direction)

-        for motor_id, motor_torque, motor_enabled in zip(
-            self._motor_id_list, self._applied_motor_torque,
-            self._motor_enabled_list):
+        for motor_id, motor_torque, motor_enabled in zip(self._motor_id_list,
+                                                         self._applied_motor_torque,
+                                                         self._motor_enabled_list):
          if motor_enabled:
            self._SetMotorTorqueById(motor_id, motor_torque)
          else:
@@ -403,14 +378,12 @@ class Minitaur(object):
        self._applied_motor_torques = np.multiply(self._observed_motor_torques,
                                                  self._motor_direction)

-        for motor_id, motor_torque in zip(self._motor_id_list,
-                                          self._applied_motor_torques):
+        for motor_id, motor_torque in zip(self._motor_id_list, self._applied_motor_torques):
          self._SetMotorTorqueById(motor_id, motor_torque)
    else:
-      motor_commands_with_direction = np.multiply(motor_commands,
-                                                  self._motor_direction)
-      for motor_id, motor_command_with_direction in zip(
-          self._motor_id_list, motor_commands_with_direction):
+      motor_commands_with_direction = np.multiply(motor_commands, self._motor_direction)
+      for motor_id, motor_command_with_direction in zip(self._motor_id_list,
+                                                        motor_commands_with_direction):
        self._SetDesiredMotorAngleById(motor_id, motor_command_with_direction)

  def GetMotorAngles(self):
@@ -471,13 +444,13 @@ class Minitaur(object):
    quater_pi = math.pi / 4
    for i in range(self.num_motors):
      action_idx = i // 2
-      forward_backward_component = (-scale_for_singularity * quater_pi * (
-          actions[action_idx + half_num_motors] + offset_for_singularity))
+      forward_backward_component = (
+          -scale_for_singularity * quater_pi *
+          (actions[action_idx + half_num_motors] + offset_for_singularity))
      extension_component = (-1)**i * quater_pi * actions[action_idx]
      if i >= half_num_motors:
        extension_component = -extension_component
-      motor_angle[i] = (
-          math.pi + forward_backward_component + extension_component)
+      motor_angle[i] = (math.pi + forward_backward_component + extension_component)
    return motor_angle

  def GetBaseMassFromURDF(self):
@@ -489,8 +462,7 @@ class Minitaur(object):
    return self._leg_masses_urdf

  def SetBaseMass(self, base_mass):
-    self._pybullet_client.changeDynamics(
-        self.quadruped, BASE_LINK_ID, mass=base_mass)
+    self._pybullet_client.changeDynamics(self.quadruped, BASE_LINK_ID, mass=base_mass)

  def SetLegMasses(self, leg_masses):
    """Set the mass of the legs.
@@ -504,11 +476,9 @@ class Minitaur(object):
        leg_masses[1] is the mass of the motor.
    """
    for link_id in LEG_LINK_ID:
-      self._pybullet_client.changeDynamics(
-          self.quadruped, link_id, mass=leg_masses[0])
+      self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[0])
    for link_id in MOTOR_LINK_ID:
-      self._pybullet_client.changeDynamics(
-          self.quadruped, link_id, mass=leg_masses[1])
+      self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[1])

  def SetFootFriction(self, foot_friction):
    """Set the lateral friction of the feet.
@@ -518,8 +488,7 @@ class Minitaur(object):
        shared by all four feet.
    """
    for link_id in FOOT_LINK_ID:
-      self._pybullet_client.changeDynamics(
-          self.quadruped, link_id, lateralFriction=foot_friction)
+      self._pybullet_client.changeDynamics(self.quadruped, link_id, lateralFriction=foot_friction)

  def SetBatteryVoltage(self, voltage):
    if self._accurate_motor_model_enabled:
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
@@ -6,8 +6,7 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-
+os.sys.path.insert(0, parentdir)

 import math
 import time
@@ -34,8 +33,9 @@ OBSERVATION_EPS = 0.01
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

-duckStartPos = [0,0,0.25]
-duckStartOrn = [0.5,0.5,0.5,0.5]
+duckStartPos = [0, 0, 0.25]
+duckStartOrn = [0.5, 0.5, 0.5, 0.5]
+

 class MinitaurBulletDuckEnv(gym.Env):
  """The gym environment for the minitaur.
@@ -47,34 +47,32 @@ class MinitaurBulletDuckEnv(gym.Env):
  expenditure.

  """
-  metadata = {
-      "render.modes": ["human", "rgb_array"],
-      "video.frames_per_second": 50
-  }
+  metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}

-  def __init__(self,
-               urdf_root=pybullet_data.getDataPath(),
-               action_repeat=1,
-               distance_weight=1.0,
-               energy_weight=0.005,
-               shake_weight=0.0,
-               drift_weight=0.0,
-               distance_limit=float("inf"),
-               observation_noise_stdev=0.0,
-               self_collision_enabled=True,
-               motor_velocity_limit=np.inf,
-               pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
-               leg_model_enabled=True,
-               accurate_motor_model_enabled=True,
-               motor_kp=1.0,
-               motor_kd=0.02,
-               torque_control_enabled=False,
-               motor_overheat_protection=True,
-               hard_reset=True,
-               on_rack=False,
-               render=False,
-               kd_for_pd_controllers=0.3,
-               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
+  def __init__(
+      self,
+      urdf_root=pybullet_data.getDataPath(),
+      action_repeat=1,
+      distance_weight=1.0,
+      energy_weight=0.005,
+      shake_weight=0.0,
+      drift_weight=0.0,
+      distance_limit=float("inf"),
+      observation_noise_stdev=0.0,
+      self_collision_enabled=True,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=False,  #not needed to be true if accurate motor model is enabled (has its own better PD)
+      leg_model_enabled=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.0,
+      motor_kd=0.02,
+      torque_control_enabled=False,
+      motor_overheat_protection=True,
+      hard_reset=True,
+      on_rack=False,
+      render=False,
+      kd_for_pd_controllers=0.3,
+      env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
    """Initialize the minitaur gym environment.

    Args:
@@ -152,17 +150,14 @@ class MinitaurBulletDuckEnv(gym.Env):
      self._action_repeat *= NUM_SUBSTEPS

    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(
-          connection_mode=pybullet.GUI)
+      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
    else:
      self._pybullet_client = bullet_client.BulletClient()

    self.seed()
    self.reset()
-    observation_high = (
-        self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
-    observation_low = (
-        self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
+    observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
+    observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
    action_dim = 8
    action_high = np.array([self._action_bound] * action_dim)
    self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
@@ -183,35 +178,36 @@ class MinitaurBulletDuckEnv(gym.Env):
          numSolverIterations=int(self._num_bullet_solver_iterations))
      self._pybullet_client.setTimeStep(self._time_step)
      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
-      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
+      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,
+                                                    duckStartPos, duckStartOrn)
      self._pybullet_client.setGravity(0, 0, -10)
      acc_motor = self._accurate_motor_model_enabled
      motor_protect = self._motor_overheat_protection
-      self.minitaur = (minitaur.Minitaur(
-          pybullet_client=self._pybullet_client,
-          urdf_root=self._urdf_root,
-          time_step=self._time_step,
-          self_collision_enabled=self._self_collision_enabled,
-          motor_velocity_limit=self._motor_velocity_limit,
-          pd_control_enabled=self._pd_control_enabled,
-          accurate_motor_model_enabled=acc_motor,
-          motor_kp=self._motor_kp,
-          motor_kd=self._motor_kd,
-          torque_control_enabled=self._torque_control_enabled,
-          motor_overheat_protection=motor_protect,
-          on_rack=self._on_rack,
-          kd_for_pd_controllers=self._kd_for_pd_controllers))
+      self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
+                                         urdf_root=self._urdf_root,
+                                         time_step=self._time_step,
+                                         self_collision_enabled=self._self_collision_enabled,
+                                         motor_velocity_limit=self._motor_velocity_limit,
+                                         pd_control_enabled=self._pd_control_enabled,
+                                         accurate_motor_model_enabled=acc_motor,
+                                         motor_kp=self._motor_kp,
+                                         motor_kd=self._motor_kd,
+                                         torque_control_enabled=self._torque_control_enabled,
+                                         motor_overheat_protection=motor_protect,
+                                         on_rack=self._on_rack,
+                                         kd_for_pd_controllers=self._kd_for_pd_controllers))
    else:
      self.minitaur.Reset(reload_urdf=False)
-      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
+      self._pybullet_client.resetBasePositionAndOrientation(self._duckId, duckStartPos,
+                                                            duckStartOrn)
    if self._env_randomizer is not None:
      self._env_randomizer.randomize_env(self)

    self._env_step_counter = 0
    self._last_base_position = [0, 0, 0]
    self._objectives = []
-    self._pybullet_client.resetDebugVisualizerCamera(
-        self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
+    self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
+                                                     self._cam_pitch, [0, 0, 0])
    if not self._torque_control_enabled:
      for _ in range(100):
        if self._pd_control_enabled or self._accurate_motor_model_enabled:
@@ -228,8 +224,7 @@ class MinitaurBulletDuckEnv(gym.Env):
      for i, action_component in enumerate(action):
        if not (-self._action_bound - ACTION_EPS <= action_component <=
                self._action_bound + ACTION_EPS):
-          raise ValueError(
-              "{}th action {} out of bounds.".format(i, action_component))
+          raise ValueError("{}th action {} out of bounds.".format(i, action_component))
      action = self.minitaur.ConvertFromLegModel(action)
    return action

@@ -258,8 +253,8 @@ class MinitaurBulletDuckEnv(gym.Env):
      if time_to_sleep > 0:
        time.sleep(time_to_sleep)
      base_pos = self.minitaur.GetBasePosition()
-      self._pybullet_client.resetDebugVisualizerCamera(
-          self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
+      self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
+                                                       self._cam_pitch, base_pos)
    action = self._transform_action_to_motor_command(action)
    for _ in range(self._action_repeat):
      self.minitaur.ApplyAction(action)
@@ -281,12 +276,17 @@ class MinitaurBulletDuckEnv(gym.Env):
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
-    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._pybullet_client.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
+                                                                   aspect=float(RENDER_WIDTH) /
+                                                                   RENDER_HEIGHT,
+                                                                   nearVal=0.1,
+                                                                   farVal=100.0)
+    (_, _, px, _,
+     _) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
+                                               height=RENDER_HEIGHT,
+                                               viewMatrix=view_matrix,
+                                               projectionMatrix=proj_matrix,
+                                               renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
@@ -297,9 +297,8 @@ class MinitaurBulletDuckEnv(gym.Env):
    Returns:
      A numpy array of motor angles.
    """
-    return np.array(
-        self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
-                          MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
+    return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
+                                      NUM_MOTORS])

  def get_minitaur_motor_velocities(self):
    """Get the minitaur's motor velocities.
@@ -308,8 +307,8 @@ class MinitaurBulletDuckEnv(gym.Env):
      A numpy array of motor velocities.
    """
    return np.array(
-        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
-                          MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
+        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
+                          NUM_MOTORS])

  def get_minitaur_motor_torques(self):
    """Get the minitaur's motor torques.
@@ -318,8 +317,8 @@ class MinitaurBulletDuckEnv(gym.Env):
      A numpy array of motor torques.
    """
    return np.array(
-        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
-                          MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
+        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
+                          NUM_MOTORS])

  def get_minitaur_base_orientation(self):
    """Get the minitaur's base orientation, represented by a quaternion.
@@ -330,8 +329,8 @@ class MinitaurBulletDuckEnv(gym.Env):
    return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])

  def lost_duck(self):
-    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
-    return len(points)>0
+    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId)
+    return len(points) > 0

  def is_fallen(self):
    """Decide whether the minitaur has fallen.
@@ -347,8 +346,7 @@ class MinitaurBulletDuckEnv(gym.Env):
    rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
    local_up = rot_mat[6:]
    pos = self.minitaur.GetBasePosition()
-    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
-            pos[2] < 0.13)
+    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)

  def _termination(self):
    position = self.minitaur.GetBasePosition()
@@ -364,12 +362,9 @@ class MinitaurBulletDuckEnv(gym.Env):
    energy_reward = np.abs(
        np.dot(self.minitaur.GetMotorTorques(),
               self.minitaur.GetMotorVelocities())) * self._time_step
-    reward = (
-        self._distance_weight * forward_reward -
-        self._energy_weight * energy_reward + self._drift_weight * drift_reward
-        + self._shake_weight * shake_reward)
-    self._objectives.append(
-        [forward_reward, energy_reward, drift_reward, shake_reward])
+    reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
+              self._drift_weight * drift_reward + self._shake_weight * shake_reward)
+    self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
    return reward

  def get_objectives(self):
@@ -383,9 +378,9 @@ class MinitaurBulletDuckEnv(gym.Env):
    self._get_observation()
    observation = np.array(self._observation)
    if self._observation_noise_stdev > 0:
-      observation += (np.random.normal(
-          scale=self._observation_noise_stdev, size=observation.shape) *
-                      self.minitaur.GetObservationUpperBound())
+      observation += (
+          np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
+          self.minitaur.GetObservationUpperBound())
    return observation

  if parse_version(gym.__version__) < parse_version('0.9.6'):
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
@@ -45,24 +45,20 @@ class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
    minitaur.SetBaseMass(randomized_base_mass)

    leg_masses = minitaur.GetLegMassesFromURDF()
-    leg_masses_lower_bound = np.array(leg_masses) * (
-        1.0 + self._minitaur_leg_mass_err_range[0])
-    leg_masses_upper_bound = np.array(leg_masses) * (
-        1.0 + self._minitaur_leg_mass_err_range[1])
+    leg_masses_lower_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[0])
+    leg_masses_upper_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[1])
    randomized_leg_masses = [
        np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
        for i in range(len(leg_masses))
    ]
    minitaur.SetLegMasses(randomized_leg_masses)

-    randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
-                                                BATTERY_VOLTAGE_RANGE[1])
+    randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0], BATTERY_VOLTAGE_RANGE[1])
    minitaur.SetBatteryVoltage(randomized_battery_voltage)

    randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
                                              MOTOR_VISCOUS_DAMPING_RANGE[1])
    minitaur.SetMotorViscousDamping(randomized_motor_damping)

-    randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
-                                              MINITAUR_LEG_FRICTION[1])
+    randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0], MINITAUR_LEG_FRICTION[1])
    minitaur.SetFootFriction(randomized_foot_friction)
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -5,8 +5,7 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-
+os.sys.path.insert(0, parentdir)

 import math
 import time
@@ -33,6 +32,7 @@ OBSERVATION_EPS = 0.01
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

+
 class MinitaurBulletEnv(gym.Env):
  """The gym environment for the minitaur.

@@ -43,34 +43,32 @@ class MinitaurBulletEnv(gym.Env):
  expenditure.

  """
-  metadata = {
-      "render.modes": ["human", "rgb_array"],
-      "video.frames_per_second": 50
-  }
+  metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}

-  def __init__(self,
-               urdf_root=pybullet_data.getDataPath(),
-               action_repeat=1,
-               distance_weight=1.0,
-               energy_weight=0.005,
-               shake_weight=0.0,
-               drift_weight=0.0,
-               distance_limit=float("inf"),
-               observation_noise_stdev=0.0,
-               self_collision_enabled=True,
-               motor_velocity_limit=np.inf,
-               pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
-               leg_model_enabled=True,
-               accurate_motor_model_enabled=True,
-               motor_kp=1.0,
-               motor_kd=0.02,
-               torque_control_enabled=False,
-               motor_overheat_protection=True,
-               hard_reset=True,
-               on_rack=False,
-               render=False,
-               kd_for_pd_controllers=0.3,
-               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
+  def __init__(
+      self,
+      urdf_root=pybullet_data.getDataPath(),
+      action_repeat=1,
+      distance_weight=1.0,
+      energy_weight=0.005,
+      shake_weight=0.0,
+      drift_weight=0.0,
+      distance_limit=float("inf"),
+      observation_noise_stdev=0.0,
+      self_collision_enabled=True,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=False,  #not needed to be true if accurate motor model is enabled (has its own better PD)
+      leg_model_enabled=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.0,
+      motor_kd=0.02,
+      torque_control_enabled=False,
+      motor_overheat_protection=True,
+      hard_reset=True,
+      on_rack=False,
+      render=False,
+      kd_for_pd_controllers=0.3,
+      env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
    """Initialize the minitaur gym environment.

    Args:
@@ -147,17 +145,14 @@ class MinitaurBulletEnv(gym.Env):
      self._action_repeat *= NUM_SUBSTEPS

    if self._is_render:
-      self._pybullet_client = bullet_client.BulletClient(
-          connection_mode=pybullet.GUI)
+      self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
    else:
      self._pybullet_client = bullet_client.BulletClient()

    self.seed()
    self.reset()
-    observation_high = (
-        self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
-    observation_low = (
-        self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
+    observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
+    observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
    action_dim = 8
    action_high = np.array([self._action_bound] * action_dim)
    self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
@@ -178,25 +173,25 @@ class MinitaurBulletEnv(gym.Env):
          numSolverIterations=int(self._num_bullet_solver_iterations))
      self._pybullet_client.setTimeStep(self._time_step)
      plane = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
-      self._pybullet_client.changeVisualShape(plane,-1,rgbaColor=[1,1,1,0.9])
-      self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0)
+      self._pybullet_client.changeVisualShape(plane, -1, rgbaColor=[1, 1, 1, 0.9])
+      self._pybullet_client.configureDebugVisualizer(
+          self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION, 0)
      self._pybullet_client.setGravity(0, 0, -10)
      acc_motor = self._accurate_motor_model_enabled
      motor_protect = self._motor_overheat_protection
-      self.minitaur = (minitaur.Minitaur(
-          pybullet_client=self._pybullet_client,
-          urdf_root=self._urdf_root,
-          time_step=self._time_step,
-          self_collision_enabled=self._self_collision_enabled,
-          motor_velocity_limit=self._motor_velocity_limit,
-          pd_control_enabled=self._pd_control_enabled,
-          accurate_motor_model_enabled=acc_motor,
-          motor_kp=self._motor_kp,
-          motor_kd=self._motor_kd,
-          torque_control_enabled=self._torque_control_enabled,
-          motor_overheat_protection=motor_protect,
-          on_rack=self._on_rack,
-          kd_for_pd_controllers=self._kd_for_pd_controllers))
+      self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
+                                         urdf_root=self._urdf_root,
+                                         time_step=self._time_step,
+                                         self_collision_enabled=self._self_collision_enabled,
+                                         motor_velocity_limit=self._motor_velocity_limit,
+                                         pd_control_enabled=self._pd_control_enabled,
+                                         accurate_motor_model_enabled=acc_motor,
+                                         motor_kp=self._motor_kp,
+                                         motor_kd=self._motor_kd,
+                                         torque_control_enabled=self._torque_control_enabled,
+                                         motor_overheat_protection=motor_protect,
+                                         on_rack=self._on_rack,
+                                         kd_for_pd_controllers=self._kd_for_pd_controllers))
    else:
      self.minitaur.Reset(reload_urdf=False)

@@ -206,8 +201,8 @@ class MinitaurBulletEnv(gym.Env):
    self._env_step_counter = 0
    self._last_base_position = [0, 0, 0]
    self._objectives = []
-    self._pybullet_client.resetDebugVisualizerCamera(
-        self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
+    self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
+                                                     self._cam_pitch, [0, 0, 0])
    if not self._torque_control_enabled:
      for _ in range(100):
        if self._pd_control_enabled or self._accurate_motor_model_enabled:
@@ -224,8 +219,7 @@ class MinitaurBulletEnv(gym.Env):
      for i, action_component in enumerate(action):
        if not (-self._action_bound - ACTION_EPS <= action_component <=
                self._action_bound + ACTION_EPS):
-          raise ValueError(
-              "{}th action {} out of bounds.".format(i, action_component))
+          raise ValueError("{}th action {} out of bounds.".format(i, action_component))
      action = self.minitaur.ConvertFromLegModel(action)
    return action

@@ -256,14 +250,15 @@ class MinitaurBulletEnv(gym.Env):
      base_pos = self.minitaur.GetBasePosition()
      camInfo = self._pybullet_client.getDebugVisualizerCamera()
      curTargetPos = camInfo[11]
-      distance=camInfo[10]
+      distance = camInfo[10]
      yaw = camInfo[8]
-      pitch=camInfo[9]
-      targetPos = [0.95*curTargetPos[0]+0.05*base_pos[0],0.95*curTargetPos[1]+0.05*base_pos[1],curTargetPos[2]]
+      pitch = camInfo[9]
+      targetPos = [
+          0.95 * curTargetPos[0] + 0.05 * base_pos[0], 0.95 * curTargetPos[1] + 0.05 * base_pos[1],
+          curTargetPos[2]
+      ]

-
-      self._pybullet_client.resetDebugVisualizerCamera(
-          distance, yaw, pitch, base_pos)
+      self._pybullet_client.resetDebugVisualizerCamera(distance, yaw, pitch, base_pos)
    action = self._transform_action_to_motor_command(action)
    for _ in range(self._action_repeat):
      self.minitaur.ApplyAction(action)
@@ -285,12 +280,17 @@ class MinitaurBulletEnv(gym.Env):
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
-    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._pybullet_client.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
+                                                                   aspect=float(RENDER_WIDTH) /
+                                                                   RENDER_HEIGHT,
+                                                                   nearVal=0.1,
+                                                                   farVal=100.0)
+    (_, _, px, _,
+     _) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
+                                               height=RENDER_HEIGHT,
+                                               viewMatrix=view_matrix,
+                                               projectionMatrix=proj_matrix,
+                                               renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
@@ -301,9 +301,8 @@ class MinitaurBulletEnv(gym.Env):
    Returns:
      A numpy array of motor angles.
    """
-    return np.array(
-        self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
-                          MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
+    return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
+                                      NUM_MOTORS])

  def get_minitaur_motor_velocities(self):
    """Get the minitaur's motor velocities.
@@ -312,8 +311,8 @@ class MinitaurBulletEnv(gym.Env):
      A numpy array of motor velocities.
    """
    return np.array(
-        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
-                          MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
+        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
+                          NUM_MOTORS])

  def get_minitaur_motor_torques(self):
    """Get the minitaur's motor torques.
@@ -322,8 +321,8 @@ class MinitaurBulletEnv(gym.Env):
      A numpy array of motor torques.
    """
    return np.array(
-        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
-                          MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
+        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
+                          NUM_MOTORS])

  def get_minitaur_base_orientation(self):
    """Get the minitaur's base orientation, represented by a quaternion.
@@ -347,8 +346,7 @@ class MinitaurBulletEnv(gym.Env):
    rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
    local_up = rot_mat[6:]
    pos = self.minitaur.GetBasePosition()
-    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
-            pos[2] < 0.13)
+    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)

  def _termination(self):
    position = self.minitaur.GetBasePosition()
@@ -364,12 +362,9 @@ class MinitaurBulletEnv(gym.Env):
    energy_reward = np.abs(
        np.dot(self.minitaur.GetMotorTorques(),
               self.minitaur.GetMotorVelocities())) * self._time_step
-    reward = (
-        self._distance_weight * forward_reward -
-        self._energy_weight * energy_reward + self._drift_weight * drift_reward
-        + self._shake_weight * shake_reward)
-    self._objectives.append(
-        [forward_reward, energy_reward, drift_reward, shake_reward])
+    reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
+              self._drift_weight * drift_reward + self._shake_weight * shake_reward)
+    self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
    return reward

  def get_objectives(self):
@@ -383,9 +378,9 @@ class MinitaurBulletEnv(gym.Env):
    self._get_observation()
    observation = np.array(self._observation)
    if self._observation_noise_stdev > 0:
-      observation += (np.random.normal(
-          scale=self._observation_noise_stdev, size=observation.shape) *
-                      self.minitaur.GetObservationUpperBound())
+      observation += (
+          np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
+          self.minitaur.GetObservationUpperBound())
    return observation

  if parse_version(gym.__version__) < parse_version('0.9.6'):
--- a/examples/pybullet/gym/pybullet_envs/bullet/motor.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/motor.py
@@ -7,8 +7,7 @@ MOTOR_VOLTAGE = 16.0
 MOTOR_RESISTANCE = 0.186
 MOTOR_TORQUE_CONSTANT = 0.0954
 MOTOR_VISCOUS_DAMPING = 0
-MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING
-                                     + MOTOR_TORQUE_CONSTANT)
+MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING + MOTOR_TORQUE_CONSTANT)


 class MotorModel(object):
@@ -24,10 +23,7 @@ class MotorModel(object):
  pd gains, viscous friction, back-EMF voltage and current-torque profile.
  """

-  def __init__(self,
-               torque_control_enabled=False,
-               kp=1.2,
-               kd=0):
+  def __init__(self, torque_control_enabled=False, kp=1.2, kd=0):
    self._torque_control_enabled = torque_control_enabled
    self._kp = kp
    self._kd = kd
@@ -50,8 +46,7 @@ class MotorModel(object):
  def get_viscous_dampling(self):
    return self._viscous_damping

-  def convert_to_torque(self, motor_commands, current_motor_angle,
-                        current_motor_velocity):
+  def convert_to_torque(self, motor_commands, current_motor_angle, current_motor_velocity):
    """Convert the commands (position control or torque control) to torque.

    Args:
@@ -66,8 +61,8 @@ class MotorModel(object):
    if self._torque_control_enabled:
      pwm = motor_commands
    else:
-      pwm = (-self._kp * (current_motor_angle - motor_commands)
-             - self._kd * current_motor_velocity)
+      pwm = (-self._kp * (current_motor_angle - motor_commands) -
+             self._kd * current_motor_velocity)
    pwm = np.clip(pwm, -1.0, 1.0)
    return self._convert_to_torque_from_pwm(pwm, current_motor_velocity)

@@ -81,21 +76,19 @@ class MotorModel(object):
      actual_torque: The torque that needs to be applied to the motor.
      observed_torque: The torque observed by the sensor.
    """
-    observed_torque = np.clip(
-        self._torque_constant * (pwm * self._voltage / self._resistance),
-        -OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
+    observed_torque = np.clip(self._torque_constant * (pwm * self._voltage / self._resistance),
+                              -OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)

    # Net voltage is clipped at 50V by diodes on the motor controller.
-    voltage_net = np.clip(pwm * self._voltage -
-                          (self._torque_constant + self._viscous_damping)
-                          * current_motor_velocity,
-                          -VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
+    voltage_net = np.clip(
+        pwm * self._voltage -
+        (self._torque_constant + self._viscous_damping) * current_motor_velocity,
+        -VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
    current = voltage_net / self._resistance
    current_sign = np.sign(current)
    current_magnitude = np.absolute(current)

    # Saturate torque based on empirical current relation.
-    actual_torque = np.interp(current_magnitude, self._current_table,
-                              self._torque_table)
+    actual_torque = np.interp(current_magnitude, self._current_table, self._torque_table)
    actual_torque = np.multiply(current_sign, actual_torque)
    return actual_torque, observed_torque
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecar.py
@@ -4,80 +4,150 @@ import math

 import numpy as np

+
 class Racecar:

-	def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
-		self.urdfRootPath = urdfRootPath
-		self.timeStep = timeStep
-		self._p = bullet_client
-		self.reset()
+  def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
+    self.urdfRootPath = urdfRootPath
+    self.timeStep = timeStep
+    self._p = bullet_client
+    self.reset()

-	def reset(self):
-		car = self._p.loadURDF(os.path.join(self.urdfRootPath,"racecar/racecar_differential.urdf"), [0,0,.2],useFixedBase=False)
-		self.racecarUniqueId = car
-		#for i in range (self._p.getNumJoints(car)):
-		#	print (self._p.getJointInfo(car,i))
-		for wheel in range(self._p.getNumJoints(car)):
-				self._p.setJointMotorControl2(car,wheel,self._p.VELOCITY_CONTROL,targetVelocity=0,force=0)
-				self._p.getJointInfo(car,wheel)
+  def reset(self):
+    car = self._p.loadURDF(os.path.join(self.urdfRootPath, "racecar/racecar_differential.urdf"),
+                           [0, 0, .2],
+                           useFixedBase=False)
+    self.racecarUniqueId = car
+    #for i in range (self._p.getNumJoints(car)):
+    #	print (self._p.getJointInfo(car,i))
+    for wheel in range(self._p.getNumJoints(car)):
+      self._p.setJointMotorControl2(car,
+                                    wheel,
+                                    self._p.VELOCITY_CONTROL,
+                                    targetVelocity=0,
+                                    force=0)
+      self._p.getJointInfo(car, wheel)

-		#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
-		c = self._p.createConstraint(car,9,car,11,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
+    #self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
+    c = self._p.createConstraint(car,
+                                 9,
+                                 car,
+                                 11,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=1, maxForce=10000)

-		c = self._p.createConstraint(car,10,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 10,
+                                 car,
+                                 13,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)

-		c = self._p.createConstraint(car,9,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 9,
+                                 car,
+                                 13,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)

-		c = self._p.createConstraint(car,16,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 16,
+                                 car,
+                                 18,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=1, maxForce=10000)

-		c = self._p.createConstraint(car,16,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 16,
+                                 car,
+                                 19,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)

-		c = self._p.createConstraint(car,17,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 17,
+                                 car,
+                                 19,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)

-		c = self._p.createConstraint(car,1,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15, maxForce=10000)
-		c = self._p.createConstraint(car,3,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
-		self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15,maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 1,
+                                 car,
+                                 18,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)
+    c = self._p.createConstraint(car,
+                                 3,
+                                 car,
+                                 19,
+                                 jointType=self._p.JOINT_GEAR,
+                                 jointAxis=[0, 1, 0],
+                                 parentFramePosition=[0, 0, 0],
+                                 childFramePosition=[0, 0, 0])
+    self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)

-		self.steeringLinks = [0,2]
-		self.maxForce = 20
-		self.nMotors = 2
-		self.motorizedwheels=[8,15]
-		self.speedMultiplier = 20.
-		self.steeringMultiplier = 0.5
+    self.steeringLinks = [0, 2]
+    self.maxForce = 20
+    self.nMotors = 2
+    self.motorizedwheels = [8, 15]
+    self.speedMultiplier = 20.
+    self.steeringMultiplier = 0.5

-	def getActionDimension(self):
-		return self.nMotors
+  def getActionDimension(self):
+    return self.nMotors

-	def getObservationDimension(self):
-		return len(self.getObservation())
+  def getObservationDimension(self):
+    return len(self.getObservation())

-	def getObservation(self):
-		observation = []
-		pos,orn=self._p.getBasePositionAndOrientation(self.racecarUniqueId)
+  def getObservation(self):
+    observation = []
+    pos, orn = self._p.getBasePositionAndOrientation(self.racecarUniqueId)

-		observation.extend(list(pos))
-		observation.extend(list(orn))
+    observation.extend(list(pos))
+    observation.extend(list(orn))

-		return observation
+    return observation

-	def applyAction(self, motorCommands):
-		targetVelocity=motorCommands[0]*self.speedMultiplier
-		#print("targetVelocity")
-		#print(targetVelocity)
-		steeringAngle = motorCommands[1]*self.steeringMultiplier
-		#print("steeringAngle")
-		#print(steeringAngle)
-		#print("maxForce")
-		#print(self.maxForce)
+  def applyAction(self, motorCommands):
+    targetVelocity = motorCommands[0] * self.speedMultiplier
+    #print("targetVelocity")
+    #print(targetVelocity)
+    steeringAngle = motorCommands[1] * self.steeringMultiplier
+    #print("steeringAngle")
+    #print(steeringAngle)
+    #print("maxForce")
+    #print(self.maxForce)

-		for motor in self.motorizedwheels:
-			self._p.setJointMotorControl2(self.racecarUniqueId,motor,self._p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)
-		for steer in self.steeringLinks:
-			self._p.setJointMotorControl2(self.racecarUniqueId,steer,self._p.POSITION_CONTROL,targetPosition=steeringAngle)
+    for motor in self.motorizedwheels:
+      self._p.setJointMotorControl2(self.racecarUniqueId,
+                                    motor,
+                                    self._p.VELOCITY_CONTROL,
+                                    targetVelocity=targetVelocity,
+                                    force=self.maxForce)
+    for steer in self.steeringLinks:
+      self._p.setJointMotorControl2(self.racecarUniqueId,
+                                    steer,
+                                    self._p.POSITION_CONTROL,
+                                    targetPosition=steeringAngle)
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -1,7 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import math
 import gym
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

+
 class RacecarGymEnv(gym.Env):
-  metadata = {
-      'render.modes': ['human', 'rgb_array'],
-      'video.frames_per_second' : 50
-  }
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self,
               urdfRoot=pybullet_data.getDataPath(),
@@ -42,25 +40,24 @@ class RacecarGymEnv(gym.Env):
    self._renders = renders
    self._isDiscrete = isDiscrete
    if self._renders:
-      self._p = bullet_client.BulletClient(
-          connection_mode=pybullet.GUI)
+      self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
    else:
      self._p = bullet_client.BulletClient()

    self.seed()
    #self.reset()
-    observationDim = 2 #len(self.getExtendedObservation())
+    observationDim = 2  #len(self.getExtendedObservation())
    #print("observationDim")
    #print(observationDim)
    # observation_high = np.array([np.finfo(np.float32).max] * observationDim)
-    observation_high = np.ones(observationDim) * 1000 #np.inf
+    observation_high = np.ones(observationDim) * 1000  #np.inf
    if (isDiscrete):
      self.action_space = spaces.Discrete(9)
    else:
-       action_dim = 2
-       self._action_bound = 1
-       action_high = np.array([self._action_bound] * action_dim)
-       self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
+      action_dim = 2
+      self._action_bound = 1
+      action_high = np.array([self._action_bound] * action_dim)
+      self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
    self.observation_space = spaces.Box(-observation_high, observation_high, dtype=np.float32)
    self.viewer = None

@@ -69,23 +66,24 @@ class RacecarGymEnv(gym.Env):
    #p.setPhysicsEngineParameter(numSolverIterations=300)
    self._p.setTimeStep(self._timeStep)
    #self._p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"))
-    stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
+    stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
    #move the stadium objects slightly above 0
    #for i in stadiumobjects:
    #	pos,orn = self._p.getBasePositionAndOrientation(i)
    #	newpos = [pos[0],pos[1],pos[2]-0.1]
    #	self._p.resetBasePositionAndOrientation(i,newpos,orn)

-    dist = 5 +2.*random.random()
-    ang = 2.*3.1415925438*random.random()
+    dist = 5 + 2. * random.random()
+    ang = 2. * 3.1415925438 * random.random()

    ballx = dist * math.sin(ang)
    bally = dist * math.cos(ang)
    ballz = 1

-    self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2.urdf"),[ballx,bally,ballz])
-    self._p.setGravity(0,0,-10)
-    self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
+    self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2.urdf"),
+                                          [ballx, bally, ballz])
+    self._p.setGravity(0, 0, -10)
+    self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    for i in range(100):
      self._p.stepSimulation()
@@ -100,26 +98,26 @@ class RacecarGymEnv(gym.Env):
    return [seed]

  def getExtendedObservation(self):
-     self._observation = [] #self._racecar.getObservation()
-     carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-     ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
-     invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
-     ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
+    self._observation = []  #self._racecar.getObservation()
+    carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
+    invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
+    ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)

-     self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
-     return self._observation
+    self._observation.extend([ballPosInCar[0], ballPosInCar[1]])
+    return self._observation

  def step(self, action):
    if (self._renders):
-      basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+      basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
      #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)

    if (self._isDiscrete):
-	    fwd = [-1,-1,-1,0,0,0,1,1,1]
-	    steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
-	    forward = fwd[action]
-	    steer = steerings[action]
-	    realaction = [forward,steer]
+      fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
+      steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
+      forward = fwd[action]
+      steer = steerings[action]
+      realaction = [forward, steer]
    else:
      realaction = action

@@ -142,35 +140,37 @@ class RacecarGymEnv(gym.Env):
  def render(self, mode='human', close=False):
    if mode != "rgb_array":
      return np.array([])
-    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-        cameraTargetPosition=base_pos,
-        distance=self._cam_dist,
-        yaw=self._cam_yaw,
-        pitch=self._cam_pitch,
-        roll=0,
-        upAxisIndex=2)
-    proj_matrix = self._p.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._p.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+                                                            distance=self._cam_dist,
+                                                            yaw=self._cam_yaw,
+                                                            pitch=self._cam_pitch,
+                                                            roll=0,
+                                                            upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+                                                     aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
+                                                     nearVal=0.1,
+                                                     farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
+                                              height=RENDER_HEIGHT,
+                                              viewMatrix=view_matrix,
+                                              projectionMatrix=proj_matrix,
+                                              renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array

-
  def _termination(self):
-    return self._envStepCounter>1000
+    return self._envStepCounter > 1000

  def _reward(self):
-    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
+    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
+                                             10000)

    numPt = len(closestPoints)
-    reward=-1000
+    reward = -1000
    #print(numPt)
-    if (numPt>0):
+    if (numPt > 0):
      #print("reward:")
      reward = -closestPoints[0][8]
      #print(reward)
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -1,7 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import math
 import gym
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960

+
 class RacecarZEDGymEnv(gym.Env):
-  metadata = {
-      'render.modes': ['human', 'rgb_array'],
-      'video.frames_per_second' : 50
-  }
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self,
               urdfRoot=pybullet_data.getDataPath(),
@@ -44,8 +42,7 @@ class RacecarZEDGymEnv(gym.Env):

    self._isDiscrete = isDiscrete
    if self._renders:
-      self._p = bullet_client.BulletClient(
-          connection_mode=pybullet.GUI)
+      self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
    else:
      self._p = bullet_client.BulletClient()

@@ -59,11 +56,14 @@ class RacecarZEDGymEnv(gym.Env):
    if (isDiscrete):
      self.action_space = spaces.Discrete(9)
    else:
-       action_dim = 2
-       self._action_bound = 1
-       action_high = np.array([self._action_bound] * action_dim)
-       self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
-    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
+      action_dim = 2
+      self._action_bound = 1
+      action_high = np.array([self._action_bound] * action_dim)
+      self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
+    self.observation_space = spaces.Box(low=0,
+                                        high=255,
+                                        shape=(self._height, self._width, 4),
+                                        dtype=np.uint8)

    self.viewer = None

@@ -72,23 +72,24 @@ class RacecarZEDGymEnv(gym.Env):
    #p.setPhysicsEngineParameter(numSolverIterations=300)
    self._p.setTimeStep(self._timeStep)
    #self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
-    stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
+    stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
    #move the stadium objects slightly above 0
    for i in stadiumobjects:
-      pos,orn = self._p.getBasePositionAndOrientation(i)
-      newpos = [pos[0],pos[1],pos[2]+0.1]
-      self._p.resetBasePositionAndOrientation(i,newpos,orn)
+      pos, orn = self._p.getBasePositionAndOrientation(i)
+      newpos = [pos[0], pos[1], pos[2] + 0.1]
+      self._p.resetBasePositionAndOrientation(i, newpos, orn)

-    dist = 5 +2.*random.random()
-    ang = 2.*3.1415925438*random.random()
+    dist = 5 + 2. * random.random()
+    ang = 2. * 3.1415925438 * random.random()

    ballx = dist * math.sin(ang)
    bally = dist * math.cos(ang)
    ballz = 1

-    self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2red.urdf"),[ballx,bally,ballz])
-    self._p.setGravity(0,0,-10)
-    self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
+    self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2red.urdf"),
+                                          [ballx, bally, ballz])
+    self._p.setGravity(0, 0, -10)
+    self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    for i in range(100):
      self._p.stepSimulation()
@@ -103,38 +104,50 @@ class RacecarZEDGymEnv(gym.Env):
    return [seed]

  def getExtendedObservation(self):
-     carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-     carmat = self._p.getMatrixFromQuaternion(carorn)
-     ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
-     invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
-     ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
-     dist0 = 0.3
-     dist1 = 1.
-     eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3]
-     targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3]
-     up = [carmat[2],carmat[5],carmat[8]]
-     viewMat = self._p.computeViewMatrix(eyePos,targetPos,up)
-     #viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
-     #print("projectionMatrix:")
-     #print(self._p.getDebugVisualizerCamera()[3])
-     projMatrix = [0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
-     img_arr = self._p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
-     rgb=img_arr[2]
-     np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
-     self._observation = np_img_arr
-     return self._observation
+    carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    carmat = self._p.getMatrixFromQuaternion(carorn)
+    ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
+    invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
+    ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)
+    dist0 = 0.3
+    dist1 = 1.
+    eyePos = [
+        carpos[0] + dist0 * carmat[0], carpos[1] + dist0 * carmat[3],
+        carpos[2] + dist0 * carmat[6] + 0.3
+    ]
+    targetPos = [
+        carpos[0] + dist1 * carmat[0], carpos[1] + dist1 * carmat[3],
+        carpos[2] + dist1 * carmat[6] + 0.3
+    ]
+    up = [carmat[2], carmat[5], carmat[8]]
+    viewMat = self._p.computeViewMatrix(eyePos, targetPos, up)
+    #viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
+    #print("projectionMatrix:")
+    #print(self._p.getDebugVisualizerCamera()[3])
+    projMatrix = [
+        0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0,
+        0.0, 0.0, -0.02000020071864128, 0.0
+    ]
+    img_arr = self._p.getCameraImage(width=self._width,
+                                     height=self._height,
+                                     viewMatrix=viewMat,
+                                     projectionMatrix=projMatrix)
+    rgb = img_arr[2]
+    np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
+    self._observation = np_img_arr
+    return self._observation

  def step(self, action):
    if (self._renders):
-      basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+      basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
      #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)

    if (self._isDiscrete):
-            fwd = [-1,-1,-1,0,0,0,1,1,1]
-            steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
-            forward = fwd[action]
-            steer = steerings[action]
-            realaction = [forward,steer]
+      fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
+      steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
+      forward = fwd[action]
+      steer = steerings[action]
+      realaction = [forward, steer]
    else:
      realaction = action

@@ -157,35 +170,37 @@ class RacecarZEDGymEnv(gym.Env):
  def render(self, mode='human', close=False):
    if mode != "rgb_array":
      return np.array([])
-    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-        cameraTargetPosition=base_pos,
-        distance=self._cam_dist,
-        yaw=self._cam_yaw,
-        pitch=self._cam_pitch,
-        roll=0,
-        upAxisIndex=2)
-    proj_matrix = self._p.computeProjectionMatrixFOV(
-        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-        nearVal=0.1, farVal=100.0)
-    (_, _, px, _, _) = self._p.getCameraImage(
-        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
-        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+                                                            distance=self._cam_dist,
+                                                            yaw=self._cam_yaw,
+                                                            pitch=self._cam_pitch,
+                                                            roll=0,
+                                                            upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+                                                     aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
+                                                     nearVal=0.1,
+                                                     farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
+                                              height=RENDER_HEIGHT,
+                                              viewMatrix=view_matrix,
+                                              projectionMatrix=proj_matrix,
+                                              renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array

-
  def _termination(self):
-    return self._envStepCounter>1000
+    return self._envStepCounter > 1000

  def _reward(self):
-    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
+    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
+                                             10000)

    numPt = len(closestPoints)
-    reward=-1000
+    reward = -1000
    #print(numPt)
-    if (numPt>0):
+    if (numPt > 0):
      #print("reward:")
      reward = -closestPoints[0][8]
      #print(reward)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py
@@ -4,11 +4,10 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-print("parentdir=",parentdir)
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)

-
-from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env  import PyBulletDeepMimicEnv
+from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
 from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
 from pybullet_utils.logger import Logger
 from pybullet_envs.deep_mimic.testrl import update_world, update_timestep, build_world
@@ -17,36 +16,40 @@ import pybullet_utils.mpi_util as MPIUtil
 args = []
 world = None

+
 def run():
-    global update_timestep
-    global world
+  global update_timestep
+  global world

-    done = False
-    while not (done):
-        update_world(world, update_timestep)
+  done = False
+  while not (done):
+    update_world(world, update_timestep)
+
+  return

-    return

 def shutdown():
-    global world
+  global world
+
+  Logger.print2('Shutting down...')
+  world.shutdown()
+  return

-    Logger.print2('Shutting down...')
-    world.shutdown()
-    return

 def main():
-    global args
-    global world
+  global args
+  global world

-    # Command line arguments
-    args = sys.argv[1:]
-    enable_draw = False
-    world = build_world(args, enable_draw)
+  # Command line arguments
+  args = sys.argv[1:]
+  enable_draw = False
+  world = build_world(args, enable_draw)

-    run()
-    shutdown()
+  run()
+  shutdown()
+
+  return

-    return

 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
@@ -1,6 +1,7 @@
 from enum import Enum

+
 class ActionSpace(Enum):
-    Null = 0
-    Continuous = 1
-    Discrete = 2
+  Null = 0
+  Continuous = 1
+  Discrete = 2
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
@@ -1,19 +1,21 @@
-from abc import  abstractmethod
+from abc import abstractmethod
 import sys, abc
 if sys.version_info >= (3, 4):
-    ABC = abc.ABC
+  ABC = abc.ABC
 else:
-    ABC = abc.ABCMeta('ABC', (), {})
+  ABC = abc.ABCMeta('ABC', (), {})

 import numpy as np
 from enum import Enum

+
 class Env(ABC):
-    class Terminate(Enum):
-        Null = 0
-        Fail = 1
-        Succ = 2
-        
-    def __init__(self, args, enable_draw):
-        self.enable_draw = enable_draw
-        return
+
+  class Terminate(Enum):
+    Null = 0
+    Fail = 1
+    Succ = 2
+
+  def __init__(self, args, enable_draw):
+    self.enable_draw = enable_draw
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
@@ -1,36 +1,61 @@
 from pybullet_utils import bullet_client
 import math
-	
+
+
 class HumanoidPoseInterpolator(object):
+
  def __init__(self):
    pass
-  
-  def Reset(self,basePos=[0,0,0], baseOrn=[0,0,0,1],chestRot=[0,0,0,1], neckRot=[0,0,0,1],rightHipRot= [0,0,0,1], rightKneeRot=[0],rightAnkleRot = [0,0,0,1],
-            rightShoulderRot = [0,0,0,1],rightElbowRot = [0], leftHipRot = [0,0,0,1], leftKneeRot = [0],leftAnkleRot = [0,0,0,1],
-           leftShoulderRot = [0,0,0,1] ,leftElbowRot = [0],
-           baseLinVel = [0,0,0],baseAngVel = [0,0,0], chestVel = [0,0,0],neckVel = [0,0,0],rightHipVel = [0,0,0],rightKneeVel = [0],
-           rightAnkleVel = [0,0,0],rightShoulderVel = [0,0,0],rightElbowVel = [0],leftHipVel = [0,0,0],leftKneeVel = [0],leftAnkleVel = [0,0,0],leftShoulderVel = [0,0,0],leftElbowVel = [0]
-            ):
-    
+
+  def Reset(self,
+            basePos=[0, 0, 0],
+            baseOrn=[0, 0, 0, 1],
+            chestRot=[0, 0, 0, 1],
+            neckRot=[0, 0, 0, 1],
+            rightHipRot=[0, 0, 0, 1],
+            rightKneeRot=[0],
+            rightAnkleRot=[0, 0, 0, 1],
+            rightShoulderRot=[0, 0, 0, 1],
+            rightElbowRot=[0],
+            leftHipRot=[0, 0, 0, 1],
+            leftKneeRot=[0],
+            leftAnkleRot=[0, 0, 0, 1],
+            leftShoulderRot=[0, 0, 0, 1],
+            leftElbowRot=[0],
+            baseLinVel=[0, 0, 0],
+            baseAngVel=[0, 0, 0],
+            chestVel=[0, 0, 0],
+            neckVel=[0, 0, 0],
+            rightHipVel=[0, 0, 0],
+            rightKneeVel=[0],
+            rightAnkleVel=[0, 0, 0],
+            rightShoulderVel=[0, 0, 0],
+            rightElbowVel=[0],
+            leftHipVel=[0, 0, 0],
+            leftKneeVel=[0],
+            leftAnkleVel=[0, 0, 0],
+            leftShoulderVel=[0, 0, 0],
+            leftElbowVel=[0]):
+
    self._basePos = basePos
    self._baseLinVel = baseLinVel
    #print("HumanoidPoseInterpolator.Reset: baseLinVel = ", baseLinVel)
-    self._baseOrn =baseOrn
+    self._baseOrn = baseOrn
    self._baseAngVel = baseAngVel
-    
+
    self._chestRot = chestRot
-    self._chestVel =chestVel
+    self._chestVel = chestVel
    self._neckRot = neckRot
    self._neckVel = neckVel
-    
+
    self._rightHipRot = rightHipRot
    self._rightHipVel = rightHipVel
-    self._rightKneeRot =rightKneeRot
+    self._rightKneeRot = rightKneeRot
    self._rightKneeVel = rightKneeVel
    self._rightAnkleRot = rightAnkleRot
    self._rightAnkleVel = rightAnkleVel
-    
-    self._rightShoulderRot =rightShoulderRot
+
+    self._rightShoulderRot = rightShoulderRot
    self._rightShoulderVel = rightShoulderVel
    self._rightElbowRot = rightElbowRot
    self._rightElbowVel = rightElbowVel
@@ -39,225 +64,253 @@ class HumanoidPoseInterpolator(object):
    self._leftHipVel = leftHipVel
    self._leftKneeRot = leftKneeRot
    self._leftKneeVel = leftKneeVel
-    self._leftAnkleRot =leftAnkleRot
+    self._leftAnkleRot = leftAnkleRot
    self._leftAnkleVel = leftAnkleVel
-    
+
    self._leftShoulderRot = leftShoulderRot
    self._leftShoulderVel = leftShoulderVel
-    self._leftElbowRot =leftElbowRot
+    self._leftElbowRot = leftElbowRot
    self._leftElbowVel = leftElbowVel

-  def ComputeLinVel(self,posStart, posEnd, deltaTime):
-    vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
+  def ComputeLinVel(self, posStart, posEnd, deltaTime):
+    vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
+           (posEnd[2] - posStart[2]) / deltaTime]
    return vel
-  
-  def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
-    pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
+    pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
+                                                        ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-    
+
  def NormalizeVector(self, vec):
-    length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
-    if (length2>0):
+    length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2]
+    if (length2 > 0):
      length = math.sqrt(length2)
-      
+
  def NormalizeQuaternion(self, orn):
-    length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
-    if (length2>0):
+    length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2] + orn[3] * orn[3]
+    if (length2 > 0):
      length = math.sqrt(length2)
-      orn[0]/=length
-      orn[1]/=length
-      orn[2]/=length
-      orn[3]/=length
+      orn[0] /= length
+      orn[1] /= length
+      orn[2] /= length
+      orn[3] /= length
      return orn
-      
+
    #print("Normalize? length=",length)

-    
  def PostProcessMotionData(self, frameData):
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    
-    chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
-    
-    neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
-    rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
-    rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
-    rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
-    leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
-    leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
-    leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
-    
-    
-  def GetPose(self):
-     pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
-       self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
-       self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
-       self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
-       self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
-       self._rightKneeRot[0],
-       self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
-       self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
-       self._rightElbowRot[0],
-       self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
-       self._leftKneeRot[0],
-       self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
-       self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
-       self._leftElbowRot[0]	]
-     return pose
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]

-  def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
+    chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
+
+    neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
+    rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
+    rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
+    rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
+    leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
+    leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
+    leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
+
+  def GetPose(self):
+    pose = [
+        self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
+        self._baseOrn[2], self._baseOrn[3], self._chestRot[0], self._chestRot[1],
+        self._chestRot[2], self._chestRot[3], self._neckRot[0], self._neckRot[1], self._neckRot[2],
+        self._neckRot[3], self._rightHipRot[0], self._rightHipRot[1], self._rightHipRot[2],
+        self._rightHipRot[3], self._rightKneeRot[0], self._rightAnkleRot[0],
+        self._rightAnkleRot[1], self._rightAnkleRot[2], self._rightAnkleRot[3],
+        self._rightShoulderRot[0], self._rightShoulderRot[1], self._rightShoulderRot[2],
+        self._rightShoulderRot[3], self._rightElbowRot[0], self._leftHipRot[0],
+        self._leftHipRot[1], self._leftHipRot[2], self._leftHipRot[3], self._leftKneeRot[0],
+        self._leftAnkleRot[0], self._leftAnkleRot[1], self._leftAnkleRot[2], self._leftAnkleRot[3],
+        self._leftShoulderRot[0], self._leftShoulderRot[1], self._leftShoulderRot[2],
+        self._leftShoulderRot[3], self._leftElbowRot[0]
+    ]
+    return pose
+
+  def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
    keyFrameDuration = frameData[0]
-    basePos1Start = [frameData[1],frameData[2],frameData[3]]
-    basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]), 
-      basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]), 
-      basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
-    self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
-    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
-    self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
-    
+    basePos1Start = [frameData[1], frameData[2], frameData[3]]
+    basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._basePos = [
+        basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
+        basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
+        basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
+    ]
+    self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
+    baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
+    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
+    self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
+                                          bullet_client)
+
    ##pre-rotate to make z-up
    #y2zPos=[0,0,0.0]
    #y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
    #basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)

-    chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
-    chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
-    self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
-    self._chestVel = self.ComputeAngVelRel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
-    
-    neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
-    neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
-    self._neckRot =  bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
-    self._neckVel = self.ComputeAngVelRel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
-    
-    rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
-    rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
-    self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
-    self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
-    
+    chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
+    chestRotEnd = [frameDataNext[9], frameDataNext[10], frameDataNext[11], frameDataNext[8]]
+    self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart, chestRotEnd, frameFraction)
+    self._chestVel = self.ComputeAngVelRel(chestRotStart, chestRotEnd, keyFrameDuration,
+                                           bullet_client)
+
+    neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
+    neckRotEnd = [frameDataNext[13], frameDataNext[14], frameDataNext[15], frameDataNext[12]]
+    self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart, neckRotEnd, frameFraction)
+    self._neckVel = self.ComputeAngVelRel(neckRotStart, neckRotEnd, keyFrameDuration,
+                                          bullet_client)
+
+    rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
+    rightHipRotEnd = [frameDataNext[17], frameDataNext[18], frameDataNext[19], frameDataNext[16]]
+    self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart, rightHipRotEnd,
+                                                         frameFraction)
+    self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart, rightHipRotEnd, keyFrameDuration,
+                                              bullet_client)
+
    rightKneeRotStart = [frameData[20]]
    rightKneeRotEnd = [frameDataNext[20]]
-    self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
-    self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
-    
-    rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
-    rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
-    self._rightAnkleRot =  bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
-    self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
-      
-    rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
-    rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
-    self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
-    self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
-    
+    self._rightKneeRot = [
+        rightKneeRotStart[0] + frameFraction * (rightKneeRotEnd[0] - rightKneeRotStart[0])
+    ]
+    self._rightKneeVel = [(rightKneeRotEnd[0] - rightKneeRotStart[0]) / keyFrameDuration]
+
+    rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
+    rightAnkleRotEnd = [frameDataNext[22], frameDataNext[23], frameDataNext[24], frameDataNext[21]]
+    self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart, rightAnkleRotEnd,
+                                                           frameFraction)
+    self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart, rightAnkleRotEnd,
+                                                keyFrameDuration, bullet_client)
+
+    rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
+    rightShoulderRotEnd = [
+        frameDataNext[26], frameDataNext[27], frameDataNext[28], frameDataNext[25]
+    ]
+    self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,
+                                                              rightShoulderRotEnd, frameFraction)
+    self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart, rightShoulderRotEnd,
+                                                   keyFrameDuration, bullet_client)
+
    rightElbowRotStart = [frameData[29]]
    rightElbowRotEnd = [frameDataNext[29]]
-    self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
-    self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
-    
-    leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
-    leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
-    self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
-    self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
-    
+    self._rightElbowRot = [
+        rightElbowRotStart[0] + frameFraction * (rightElbowRotEnd[0] - rightElbowRotStart[0])
+    ]
+    self._rightElbowVel = [(rightElbowRotEnd[0] - rightElbowRotStart[0]) / keyFrameDuration]
+
+    leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
+    leftHipRotEnd = [frameDataNext[31], frameDataNext[32], frameDataNext[33], frameDataNext[30]]
+    self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart, leftHipRotEnd,
+                                                        frameFraction)
+    self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd, keyFrameDuration,
+                                             bullet_client)
+
    leftKneeRotStart = [frameData[34]]
    leftKneeRotEnd = [frameDataNext[34]]
-    self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
-    self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
-    
-    leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
-    leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
-    self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
-    self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
+    self._leftKneeRot = [
+        leftKneeRotStart[0] + frameFraction * (leftKneeRotEnd[0] - leftKneeRotStart[0])
+    ]
+    self._leftKneeVel = [(leftKneeRotEnd[0] - leftKneeRotStart[0]) / keyFrameDuration]

-    leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
-    leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
-    self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
-    self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
+    leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
+    leftAnkleRotEnd = [frameDataNext[36], frameDataNext[37], frameDataNext[38], frameDataNext[35]]
+    self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart, leftAnkleRotEnd,
+                                                          frameFraction)
+    self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart, leftAnkleRotEnd,
+                                               keyFrameDuration, bullet_client)
+
+    leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
+    leftShoulderRotEnd = [
+        frameDataNext[40], frameDataNext[41], frameDataNext[42], frameDataNext[39]
+    ]
+    self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,
+                                                             leftShoulderRotEnd, frameFraction)
+    self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart, leftShoulderRotEnd,
+                                                  keyFrameDuration, bullet_client)

    leftElbowRotStart = [frameData[43]]
    leftElbowRotEnd = [frameDataNext[43]]
-    self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
-    self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
+    self._leftElbowRot = [
+        leftElbowRotStart[0] + frameFraction * (leftElbowRotEnd[0] - leftElbowRotStart[0])
+    ]
+    self._leftElbowVel = [(leftElbowRotEnd[0] - leftElbowRotStart[0]) / keyFrameDuration]

    pose = self.GetPose()
    return pose
-    
+
  def ConvertFromAction(self, pybullet_client, action):
    #turn action into pose
-    
-    self.Reset()#?? needed?
-    index=0
+
+    self.Reset()  #?? needed?
+    index = 0
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
    #print("pose._chestRot=",pose._chestRot)

    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._rightKneeRot = [angle]
-        
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._rightElbowRot = [angle]
-    
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._leftKneeRot = [angle]
-    
-    
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-    
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    axis = [action[index+1],action[index+2],action[index+3]]
-    index+=4
-    self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
-        
+    axis = [action[index + 1], action[index + 2], action[index + 3]]
+    index += 4
+    self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
+
    angle = action[index]
-    index+=1
+    index += 1
    self._leftElbowRot = [angle]
-    
+
    pose = self.GetPose()
    return pose
-  
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
@@ -1,7 +1,9 @@
 import json
 import math

+
 class MotionCaptureData(object):
+
  def __init__(self):
    self.Reset()

@@ -13,30 +15,33 @@ class MotionCaptureData(object):
      self._motion_data = json.load(f)

  def NumFrames(self):
-    return  len(self._motion_data['Frames'])
+    return len(self._motion_data['Frames'])

  def KeyFrameDuraction(self):
-  	return self._motion_data['Frames'][0][0]
- 
+    return self._motion_data['Frames'][0][0]
+
  def getCycleTime(self):
    keyFrameDuration = self.KeyFrameDuraction()
-    cycleTime = keyFrameDuration*(self.NumFrames()-1)
+    cycleTime = keyFrameDuration * (self.NumFrames() - 1)
    return cycleTime
-    
+
  def calcCycleCount(self, simTime, cycleTime):
-    phases = simTime / cycleTime;
+    phases = simTime / cycleTime
    count = math.floor(phases)
    loop = True
    #count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
    return count
-    
+
  def computeCycleOffset(self):
-    firstFrame=0
-    lastFrame = self.NumFrames()-1
+    firstFrame = 0
+    lastFrame = self.NumFrames() - 1
    frameData = self._motion_data['Frames'][0]
    frameDataNext = self._motion_data['Frames'][lastFrame]
-    
-    basePosStart = [frameData[1],frameData[2],frameData[3]]
-    basePosEnd = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._cycleOffset = [basePosEnd[0]-basePosStart[0],basePosEnd[1]-basePosStart[1],basePosEnd[2]-basePosStart[2]]
-    return self._cycleOffset
+
+    basePosStart = [frameData[1], frameData[2], frameData[3]]
+    basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._cycleOffset = [
+        basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
+        basePosEnd[2] - basePosStart[2]
+    ]
+    return self._cycleOffset
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
@@ -9,301 +9,313 @@ from pybullet_envs.deep_mimic.env import humanoid_stable_pd
 import pybullet_data
 import pybullet as p1
 import random
-      
-      
+
+
 class PyBulletDeepMimicEnv(Env):
-    def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
-      super().__init__(arg_parser, enable_draw)
-      self._num_agents = 1
-      self._pybullet_client = pybullet_client
-      self._isInitialized = False
-      self._useStablePD = True
-      self._arg_parser = arg_parser
-      self.reset()
-    
-    def reset(self):
-      
-      
-      if not self._isInitialized:
-        if self.enable_draw:
-          self._pybullet_client =  bullet_client.BulletClient(connection_mode=p1.GUI)
-          #disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
-          self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI,0)
-        else:
-          self._pybullet_client  =  bullet_client.BulletClient()
-        
-        self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
-        z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
-        self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
-        #print("planeId=",self._planeId)
-        self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
-        self._pybullet_client.setGravity(0,-9.8,0)
-        
-        self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
-        self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
-        
-        self._mocapData = motion_capture_data.MotionCaptureData()
-        
-        motion_file = self._arg_parser.parse_strings('motion_file')
-        print("motion_file=",motion_file[0])
-                
-        motionPath = pybullet_data.getDataPath()+"/"+motion_file[0]
-        #motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
-        self._mocapData.Load(motionPath)
-        timeStep = 1./600.
-        useFixedBase=False
-        self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
-        self._isInitialized = True
-        
-        self._pybullet_client.setTimeStep(timeStep)
-        self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
-       
-        
-        selfCheck = False
-        if (selfCheck):
-          curTime = 0
-          while self._pybullet_client.isConnected():
-            self._humanoid.setSimTime(curTime)
-            state = self._humanoid.getState()
-            #print("state=",state)
-            pose = self._humanoid.computePose(self._humanoid._frameFraction)
-            for i in range (10):
-              curTime+=timeStep
-              #taus = self._humanoid.computePDForces(pose)
-              #self._humanoid.applyPDForces(taus)
-              #self._pybullet_client.stepSimulation()
-            time.sleep(timeStep)
-      #print("numframes = ", self._humanoid._mocap_data.NumFrames())
-      #startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
-      rnrange = 1000
-      rn = random.randint(0,rnrange)
-      startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
-      self.t = startTime
-      
-      self._humanoid.setSimTime(startTime)
-      
-      self._humanoid.resetPose()
-      #this clears the contact points. Todo: add API to explicitly clear all contact points?
-      #self._pybullet_client.stepSimulation()
-      self._humanoid.resetPose()
-      self.needs_update_time = self.t-1#force update
-       	
-    def get_num_agents(self):
-    	return self._num_agents
-    	
-    def get_action_space(self, agent_id):
-    	return ActionSpace(ActionSpace.Continuous)
-    	
-    def get_reward_min(self, agent_id):
-    	return 0
-    
-    def get_reward_max(self, agent_id):
-    	return 1
-    	
-    def get_reward_fail(self, agent_id):
-    	return self.get_reward_min(agent_id)
-    
-    def get_reward_succ(self, agent_id):
-    	return self.get_reward_max(agent_id)
-    	
-    #scene_name == "imitate" -> cDrawSceneImitate
-    def get_state_size(self, agent_id):
-        #cCtController::GetStateSize()
-        #int state_size = cDeepMimicCharController::GetStateSize();
-        #                     state_size += GetStatePoseSize();#106
-	      #                     state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
-        #state_size += GetStatePhaseSize();#1
-        #197
-        return 197

-    def build_state_norm_groups(self, agent_id):
-        #if (mEnablePhaseInput)
-	      #{
-		    #int phase_group = gNormGroupNone;
-		    #int phase_offset = GetStatePhaseOffset();
-		    #int phase_size = GetStatePhaseSize();
-		    #out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
-        groups = [0]*self.get_state_size(agent_id)
-        groups[0] = -1
-        return groups	
-        
-    def build_state_offset(self, agent_id):
-        out_offset = [0]*self.get_state_size(agent_id)
-        phase_offset = -0.5
-        out_offset[0] = phase_offset
-        return np.array(out_offset)
-    
-    def build_state_scale(self, agent_id):
-        out_scale = [1]*self.get_state_size(agent_id)
-        phase_scale = 2
-        out_scale[0] = phase_scale
-        return np.array(out_scale)
+  def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
+    super().__init__(arg_parser, enable_draw)
+    self._num_agents = 1
+    self._pybullet_client = pybullet_client
+    self._isInitialized = False
+    self._useStablePD = True
+    self._arg_parser = arg_parser
+    self.reset()

-    def get_goal_size(self, agent_id):
-        return 0
+  def reset(self):

-    def get_action_size(self, agent_id):
-     	  ctrl_size = 43 #numDof
-     	  root_size = 7
-     	  return ctrl_size - root_size
-     	  
-    def build_goal_norm_groups(self, agent_id):
-        return np.array([])
-        
-    def build_goal_offset(self, agent_id):
-        return np.array([])
-    
-    def build_goal_scale(self, agent_id):
-        return np.array([])
-        
-    def build_action_offset(self, agent_id):
-    	  out_offset = [0] * self.get_action_size(agent_id)
-    	  out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
-    	    -0.200000000,0.0000000000,0.0000000000,	0.00000000,	-0.2000000,	1.57000000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	0.00000000,	0.00000000,	0.00000000,	-0.2000000,	-1.5700000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	1.57000000,	0.00000000,	0.00000000,	0.00000000,	-0.2000000,	0.00000000,	0.00000000,
-    	    0.00000000,	-0.2000000,	-1.5700000]
-    	  #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
-    	  #see cCtCtrlUtil::BuildOffsetScalePDSpherical
-    	  return np.array(out_offset)
-    	  
-    def build_action_scale(self, agent_id):
-    	  out_scale = [1] * self.get_action_size(agent_id)
-    	  #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
-    	  #see cCtCtrlUtil::BuildOffsetScalePDSpherical
-    	  out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
-    	    1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
-    	    1.000000000000,	1.000000000000,	0.159235668789,	0.159235668789,	1.000000000000,
-    	    1.000000000000,	1.000000000000,	0.079617834394,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.159235668789,	0.120772946859,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.159235668789,	0.159235668789,	1.000000000000,	1.000000000000,
-    	    1.000000000000,	0.107758620689,	1.000000000000,	1.000000000000,	1.000000000000,
-    	    0.159235668789]
-    	  return np.array(out_scale)
-    
-    def build_action_bound_min(self, agent_id):
-    	  #see cCtCtrlUtil::BuildBoundsPDSpherical
-    	  out_scale = [-1] * self.get_action_size(agent_id)
-    	  out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
-    	    -1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000,	-1.000000000,
-	    	  -1.000000000,	-7.850000000,	-6.280000000,	-1.000000000,	-1.000000000,	-1.000000000,
-	    	  -12.56000000,	-1.000000000,	-1.000000000,	-1.000000000,	-4.710000000,
-	    	  -7.779999999,	-1.000000000,	-1.000000000,	-1.000000000,	-7.850000000,
-	    	  -6.280000000,	-1.000000000,	-1.000000000,	-1.000000000,	-8.460000000,
-	    	  -1.000000000,	-1.000000000,	-1.000000000,	-4.710000000]
+    if not self._isInitialized:
+      if self.enable_draw:
+        self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
+        #disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
+        self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
+      else:
+        self._pybullet_client = bullet_client.BulletClient()
+
+      self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
+      z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
+      self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
+                                                     z2y,
+                                                     useMaximalCoordinates=True)
+      #print("planeId=",self._planeId)
+      self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
+      self._pybullet_client.setGravity(0, -9.8, 0)
+
+      self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
+      self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
+
+      self._mocapData = motion_capture_data.MotionCaptureData()
+
+      motion_file = self._arg_parser.parse_strings('motion_file')
+      print("motion_file=", motion_file[0])
+
+      motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
+      #motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
+      self._mocapData.Load(motionPath)
+      timeStep = 1. / 600.
+      useFixedBase = False
+      self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData,
+                                                           timeStep, useFixedBase)
+      self._isInitialized = True

-    	  return out_scale
-    
-    def build_action_bound_max(self, agent_id):
-    	  out_scale = [1] * self.get_action_size(agent_id)
-    	  out_scale=[
-    	    4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
-    	    1.000000000,1.000000000,8.779999999,1.000000000,	1.0000000,	1.0000000,
-    	  	4.7100000,	6.2800000,	1.0000000,	1.0000000,	1.0000000,
-	    	  12.560000,	1.0000000,	1.0000000,	1.0000000,	7.8500000,
-	    	  8.7799999,	1.0000000,	1.0000000,	1.0000000,	4.7100000,
-	    	  6.2800000,	1.0000000,	1.0000000,	1.0000000,	10.100000,
-	    	  1.0000000,	1.0000000,	1.0000000,	7.8500000]
-    	  return out_scale
-    	  
-    def set_mode(self, mode):
-    	  self._mode = mode
-    	  
-    def need_new_action(self, agent_id):
-        if self.t>=self.needs_update_time:
-          self.needs_update_time = self.t + 1./30.
-          return True
-        return False
-        
-    def record_state(self, agent_id):
-        state = self._humanoid.getState()
-        
-        return np.array(state)
-        
-        
-    def record_goal(self, agent_id):
-        return np.array([])
-        
-    def calc_reward(self, agent_id):
-        kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
-        reward = self._humanoid.getReward(kinPose)
-        return reward
-        
-    def set_action(self, agent_id, action):
-        #print("action=",)
-        #for a in action:
-        #  print(a)
-        np.savetxt("pb_action.csv", action, delimiter=",")
-        self.desiredPose = self._humanoid.convertActionToPose(action)
-        #we need the target root positon and orientation to be zero, to be compatible with deep mimic
-        self.desiredPose[0] = 0
-        self.desiredPose[1] = 0
-        self.desiredPose[2] = 0
-        self.desiredPose[3] = 0
-        self.desiredPose[4] = 0
-        self.desiredPose[5] = 0
-        self.desiredPose[6] = 0
-        target_pose = np.array(self.desiredPose)
-        
-        
-        np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
-        
-        #print("set_action: desiredPose=", self.desiredPose)
-        
-    def log_val(self, agent_id, val):
-        pass
-        
-        
-    def update(self, timeStep):
-      #print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
      self._pybullet_client.setTimeStep(timeStep)
-      self._humanoid._timeStep = timeStep
-        
-      for i in range(1):
-          self.t += timeStep
-          self._humanoid.setSimTime(self.t)
-          
-          if self.desiredPose:
-            kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
-            self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=True)
-            #pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
-            #self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
-            #print("desiredPositions=",self.desiredPose)
-            maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200,  150, 90, 90, 90, 90, 100,100,100,100, 60]
-            
-            if self._useStablePD:
-              taus = self._humanoid.computePDForces(self.desiredPose, desiredVelocities=None, maxForces=maxForces)
-              self._humanoid.applyPDForces(taus)
-            else:
-              self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
-            
-            self._pybullet_client.stepSimulation()
-            
+      self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)

-    def set_sample_count(self, count):
-        return
-        
-    def check_terminate(self, agent_id):
-      return Env.Terminate(self.is_episode_end())
-      
-    def is_episode_end(self):
-      isEnded = self._humanoid.terminates()
-      #also check maximum time, 20 seconds (todo get from file)
-      #print("self.t=",self.t)
-      if (self.t>20):
-        isEnded = True
-      return isEnded
-      
-    def check_valid_episode(self):
-      #could check if limbs exceed velocity threshold
-      return true
-    
-    def getKeyboardEvents(self):
-      return self._pybullet_client.getKeyboardEvents()
-    
-    def isKeyTriggered(self, keys, key):
-      o = ord(key)
-      #print("ord=",o)
-      if o in keys:
-        return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
-      return False
+      selfCheck = False
+      if (selfCheck):
+        curTime = 0
+        while self._pybullet_client.isConnected():
+          self._humanoid.setSimTime(curTime)
+          state = self._humanoid.getState()
+          #print("state=",state)
+          pose = self._humanoid.computePose(self._humanoid._frameFraction)
+          for i in range(10):
+            curTime += timeStep
+            #taus = self._humanoid.computePDForces(pose)
+            #self._humanoid.applyPDForces(taus)
+            #self._pybullet_client.stepSimulation()
+          time.sleep(timeStep)
+    #print("numframes = ", self._humanoid._mocap_data.NumFrames())
+    #startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
+    rnrange = 1000
+    rn = random.randint(0, rnrange)
+    startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
+    self.t = startTime
+
+    self._humanoid.setSimTime(startTime)
+
+    self._humanoid.resetPose()
+    #this clears the contact points. Todo: add API to explicitly clear all contact points?
+    #self._pybullet_client.stepSimulation()
+    self._humanoid.resetPose()
+    self.needs_update_time = self.t - 1  #force update
+
+  def get_num_agents(self):
+    return self._num_agents
+
+  def get_action_space(self, agent_id):
+    return ActionSpace(ActionSpace.Continuous)
+
+  def get_reward_min(self, agent_id):
+    return 0
+
+  def get_reward_max(self, agent_id):
+    return 1
+
+  def get_reward_fail(self, agent_id):
+    return self.get_reward_min(agent_id)
+
+  def get_reward_succ(self, agent_id):
+    return self.get_reward_max(agent_id)
+
+  #scene_name == "imitate" -> cDrawSceneImitate
+  def get_state_size(self, agent_id):
+    #cCtController::GetStateSize()
+    #int state_size = cDeepMimicCharController::GetStateSize();
+    #                     state_size += GetStatePoseSize();#106
+    #                     state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
+    #state_size += GetStatePhaseSize();#1
+    #197
+    return 197
+
+  def build_state_norm_groups(self, agent_id):
+    #if (mEnablePhaseInput)
+    #{
+    #int phase_group = gNormGroupNone;
+    #int phase_offset = GetStatePhaseOffset();
+    #int phase_size = GetStatePhaseSize();
+    #out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
+    groups = [0] * self.get_state_size(agent_id)
+    groups[0] = -1
+    return groups
+
+  def build_state_offset(self, agent_id):
+    out_offset = [0] * self.get_state_size(agent_id)
+    phase_offset = -0.5
+    out_offset[0] = phase_offset
+    return np.array(out_offset)
+
+  def build_state_scale(self, agent_id):
+    out_scale = [1] * self.get_state_size(agent_id)
+    phase_scale = 2
+    out_scale[0] = phase_scale
+    return np.array(out_scale)
+
+  def get_goal_size(self, agent_id):
+    return 0
+
+  def get_action_size(self, agent_id):
+    ctrl_size = 43  #numDof
+    root_size = 7
+    return ctrl_size - root_size
+
+  def build_goal_norm_groups(self, agent_id):
+    return np.array([])
+
+  def build_goal_offset(self, agent_id):
+    return np.array([])
+
+  def build_goal_scale(self, agent_id):
+    return np.array([])
+
+  def build_action_offset(self, agent_id):
+    out_offset = [0] * self.get_action_size(agent_id)
+    out_offset = [
+        0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
+        0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
+        0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+        -0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
+        0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+        -0.2000000, -1.5700000
+    ]
+    #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+    #see cCtCtrlUtil::BuildOffsetScalePDSpherical
+    return np.array(out_offset)
+
+  def build_action_scale(self, agent_id):
+    out_scale = [1] * self.get_action_size(agent_id)
+    #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+    #see cCtCtrlUtil::BuildOffsetScalePDSpherical
+    out_scale = [
+        0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
+        1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
+        1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
+        1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
+        1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
+        0.159235668789
+    ]
+    return np.array(out_scale)
+
+  def build_action_bound_min(self, agent_id):
+    #see cCtCtrlUtil::BuildBoundsPDSpherical
+    out_scale = [-1] * self.get_action_size(agent_id)
+    out_scale = [
+        -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
+        -1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
+        -1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
+        -1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
+        -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
+        -1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
+        -1.000000000, -4.710000000
+    ]
+
+    return out_scale
+
+  def build_action_bound_max(self, agent_id):
+    out_scale = [1] * self.get_action_size(agent_id)
+    out_scale = [
+        4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
+        1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
+        1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
+        8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
+        1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
+    ]
+    return out_scale
+
+  def set_mode(self, mode):
+    self._mode = mode
+
+  def need_new_action(self, agent_id):
+    if self.t >= self.needs_update_time:
+      self.needs_update_time = self.t + 1. / 30.
+      return True
+    return False
+
+  def record_state(self, agent_id):
+    state = self._humanoid.getState()
+
+    return np.array(state)
+
+  def record_goal(self, agent_id):
+    return np.array([])
+
+  def calc_reward(self, agent_id):
+    kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
+    reward = self._humanoid.getReward(kinPose)
+    return reward
+
+  def set_action(self, agent_id, action):
+    #print("action=",)
+    #for a in action:
+    #  print(a)
+    np.savetxt("pb_action.csv", action, delimiter=",")
+    self.desiredPose = self._humanoid.convertActionToPose(action)
+    #we need the target root positon and orientation to be zero, to be compatible with deep mimic
+    self.desiredPose[0] = 0
+    self.desiredPose[1] = 0
+    self.desiredPose[2] = 0
+    self.desiredPose[3] = 0
+    self.desiredPose[4] = 0
+    self.desiredPose[5] = 0
+    self.desiredPose[6] = 0
+    target_pose = np.array(self.desiredPose)
+
+    np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
+
+    #print("set_action: desiredPose=", self.desiredPose)
+
+  def log_val(self, agent_id, val):
+    pass
+
+  def update(self, timeStep):
+    #print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
+    self._pybullet_client.setTimeStep(timeStep)
+    self._humanoid._timeStep = timeStep
+
+    for i in range(1):
+      self.t += timeStep
+      self._humanoid.setSimTime(self.t)
+
+      if self.desiredPose:
+        kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
+        self._humanoid.initializePose(self._humanoid._poseInterpolator,
+                                      self._humanoid._kin_model,
+                                      initBase=True)
+        #pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
+        #self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
+        #print("desiredPositions=",self.desiredPose)
+        maxForces = [
+            0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
+            90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
+            100, 100, 60
+        ]
+
+        if self._useStablePD:
+          taus = self._humanoid.computePDForces(self.desiredPose,
+                                                desiredVelocities=None,
+                                                maxForces=maxForces)
+          self._humanoid.applyPDForces(taus)
+        else:
+          self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
+
+        self._pybullet_client.stepSimulation()
+
+  def set_sample_count(self, count):
+    return
+
+  def check_terminate(self, agent_id):
+    return Env.Terminate(self.is_episode_end())
+
+  def is_episode_end(self):
+    isEnded = self._humanoid.terminates()
+    #also check maximum time, 20 seconds (todo get from file)
+    #print("self.t=",self.t)
+    if (self.t > 20):
+      isEnded = True
+    return isEnded
+
+  def check_valid_episode(self):
+    #could check if limbs exceed velocity threshold
+    return true
+
+  def getKeyboardEvents(self):
+    return self._pybullet_client.getKeyboardEvents()
+
+  def isKeyTriggered(self, keys, key):
+    o = ord(key)
+    #print("ord=",o)
+    if o in keys:
+      return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
+    return False
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadrupedPoseInterpolator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadrupedPoseInterpolator.py
@@ -1,54 +1,66 @@
 from pybullet_utils import bullet_client
 import math
-	
+
+
 class QuadrupedPoseInterpolator(object):
+
  def __init__(self):
    pass
-    
-    
-  def ComputeLinVel(self,posStart, posEnd, deltaTime):
-    vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
+
+  def ComputeLinVel(self, posStart, posEnd, deltaTime):
+    vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
+           (posEnd[2] - posStart[2]) / deltaTime]
    return vel
-  
-  def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
-    ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
-    pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
-    axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
-    angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
+
+  def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
+    ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
+    pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
+                                                        ornEnd)
+    axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
+    angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
+              (axis[2] * angle) / deltaTime]
    return angVel
-  
-  def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
+
+  def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
    keyFrameDuration = frameData[0]
-    basePos1Start = [frameData[1],frameData[2],frameData[3]]
-    basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
-    self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]), 
-      basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]), 
-      basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
-    self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
-    baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
-    baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
-    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
-    self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
-    
-    jointPositions=[self._basePos[0],self._basePos[1],self._basePos[2],
-       self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3]]
-    jointVelocities=[self._baseLinVel[0],self._baseLinVel[1],self._baseLinVel[2],
-      self._baseAngVel[0],self._baseAngVel[1],self._baseAngVel[2]]
-    
-    for j in range (12):
-      index=j+8
-      jointPosStart=frameData[index]
-      jointPosEnd=frameDataNext[index]
-      jointPos=jointPosStart+frameFraction*(jointPosEnd-jointPosStart)
-      jointVel=(jointPosEnd-jointPosStart)/keyFrameDuration
+    basePos1Start = [frameData[1], frameData[2], frameData[3]]
+    basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+    self._basePos = [
+        basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
+        basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
+        basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
+    ]
+    self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
+    baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
+    baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
+    self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
+    self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
+                                          bullet_client)
+
+    jointPositions = [
+        self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
+        self._baseOrn[2], self._baseOrn[3]
+    ]
+    jointVelocities = [
+        self._baseLinVel[0], self._baseLinVel[1], self._baseLinVel[2], self._baseAngVel[0],
+        self._baseAngVel[1], self._baseAngVel[2]
+    ]
+
+    for j in range(12):
+      index = j + 8
+      jointPosStart = frameData[index]
+      jointPosEnd = frameDataNext[index]
+      jointPos = jointPosStart + frameFraction * (jointPosEnd - jointPosStart)
+      jointVel = (jointPosEnd - jointPosStart) / keyFrameDuration
      jointPositions.append(jointPos)
      jointVelocities.append(jointVel)
    self._jointPositions = jointPositions
    self._jointVelocities = jointVelocities
-    return jointPositions,jointVelocities
+    return jointPositions, jointVelocities
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadruped_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/quadruped_stable_pd.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testHumanoid.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testHumanoid.py
@@ -8,87 +8,89 @@ import pybullet as p1
 import humanoid_pose_interpolator
 import numpy as np

-pybullet_client =  bullet_client.BulletClient(connection_mode=p1.GUI)
+pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)

 pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
-z2y = pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
+z2y = pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
 #planeId = pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
-planeId= pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
-pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)        
+planeId = pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
+                                   z2y,
+                                   useMaximalCoordinates=True)
+pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
 #print("planeId=",planeId)

-pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
-pybullet_client.setGravity(0,-9.8,0)
+pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
+pybullet_client.setGravity(0, -9.8, 0)

 pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)

-
 mocapData = motion_capture_data.MotionCaptureData()
 #motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_walk.txt"
-motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_backflip.txt"
+motionPath = pybullet_data.getDataPath() + "/data/motions/humanoid3d_backflip.txt"
 mocapData.Load(motionPath)
-timeStep = 1./600
-useFixedBase=False
+timeStep = 1. / 600
+useFixedBase = False
 humanoid = humanoid_stable_pd.HumanoidStablePD(pybullet_client, mocapData, timeStep, useFixedBase)
 isInitialized = True

 pybullet_client.setTimeStep(timeStep)
 pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
-timeId = pybullet_client.addUserDebugParameter("time",0,10,0)
+timeId = pybullet_client.addUserDebugParameter("time", 0, 10, 0)
+

-  
 def isKeyTriggered(keys, key):
  o = ord(key)
  if o in keys:
    return keys[ord(key)] & pybullet_client.KEY_WAS_TRIGGERED
  return False
-  
+
+
 animating = False
 singleStep = False

-
-t=0
+t = 0
 while (1):

  keys = pybullet_client.getKeyboardEvents()
  #print(keys)
  if isKeyTriggered(keys, ' '):
-      animating = not animating
+    animating = not animating

  if isKeyTriggered(keys, 'b'):
-      singleStep = True
-  
+    singleStep = True
+
  if animating or singleStep:
-    
-    
+
    singleStep = False
    #t = pybullet_client.readUserDebugParameter(timeId)
    #print("t=",t)
-    for i in range (1):
+    for i in range(1):

-      print("t=",t)
+      print("t=", t)
      humanoid.setSimTime(t)
-        
+
      humanoid.computePose(humanoid._frameFraction)
      pose = humanoid._poseInterpolator
      #humanoid.initializePose(pose=pose, phys_model = humanoid._sim_model, initBase=True, initializeVelocity=True)
      #humanoid.resetPose()
-      
-      
+
      desiredPose = humanoid.computePose(humanoid._frameFraction)
-      #desiredPose = desiredPose.GetPose() 
+      #desiredPose = desiredPose.GetPose()
      #curPose = HumanoidPoseInterpolator()
      #curPose.reset()
      s = humanoid.getState()
      #np.savetxt("pb_record_state_s.csv", s, delimiter=",")
-      maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200,  150, 90, 90, 90, 90, 100,100,100,100, 60]
+      maxForces = [
+          0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90, 90,
+          90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100, 100,
+          100, 60
+      ]
      taus = humanoid.computePDForces(desiredPose, desiredVelocities=None, maxForces=maxForces)
-      
+
      #print("taus=",taus)
      humanoid.applyPDForces(taus)
-      
+
      pybullet_client.stepSimulation()
-      t+=1./600.
-      
-    
-  time.sleep(1./600.)
+      t += 1. / 600.
+
+  time.sleep(1. / 600.)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testLaikago.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/testLaikago.py
@@ -8,240 +8,258 @@ import motion_capture_data
 import quadrupedPoseInterpolator

 useConstraints = False
-  
-p =  bullet_client.BulletClient(connection_mode=p1.GUI)
+
+p = bullet_client.BulletClient(connection_mode=p1.GUI)
 p.setAdditionalSearchPath(pybullet_data.getDataPath())

 plane = p.loadURDF("plane.urdf")
-p.setGravity(0,0,-10)
-timeStep=1./500
+p.setGravity(0, 0, -10)
+timeStep = 1. / 500
 p.setTimeStep(timeStep)
 #p.setDefaultContactERP(0)
-#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS 
+#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
 urdfFlags = p.URDF_USE_SELF_COLLISION

-
-startPos=[0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
-startOrn=[0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
-quadruped = p.loadURDF("laikago/laikago.urdf",startPos,startOrn, flags = urdfFlags,useFixedBase=False)
-p.resetBasePositionAndOrientation(quadruped,startPos,startOrn)
+startPos = [0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
+startOrn = [0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
+quadruped = p.loadURDF("laikago/laikago.urdf",
+                       startPos,
+                       startOrn,
+                       flags=urdfFlags,
+                       useFixedBase=False)
+p.resetBasePositionAndOrientation(quadruped, startPos, startOrn)
 if not useConstraints:
  for j in range(p.getNumJoints(quadruped)):
-    p.setJointMotorControl2(quadruped,j,p.POSITION_CONTROL,force=0)
-  
-#This cube is added as a soft constraint to keep the laikago from falling 
+    p.setJointMotorControl2(quadruped, j, p.POSITION_CONTROL, force=0)
+
+#This cube is added as a soft constraint to keep the laikago from falling
 #since we didn't train it yet, it doesn't balance
-cube = p.loadURDF("cube_no_rotation.urdf",[0,0,-0.5],[0,0.5,0.5,0])
-p.setCollisionFilterGroupMask(cube,-1,0,0)
+cube = p.loadURDF("cube_no_rotation.urdf", [0, 0, -0.5], [0, 0.5, 0.5, 0])
+p.setCollisionFilterGroupMask(cube, -1, 0, 0)
 for j in range(p.getNumJoints(cube)):
-  p.setJointMotorControl2(cube,j,p.POSITION_CONTROL,force=0)
-  p.setCollisionFilterGroupMask(cube,j,0,0)
-  p.changeVisualShape(cube,j,rgbaColor=[1,0,0,0])
-cid = p.createConstraint(cube,p.getNumJoints(cube)-1,quadruped,-1,p.JOINT_FIXED,[0,0,0],[0,1,0],[0,0,0])
+  p.setJointMotorControl2(cube, j, p.POSITION_CONTROL, force=0)
+  p.setCollisionFilterGroupMask(cube, j, 0, 0)
+  p.changeVisualShape(cube, j, rgbaColor=[1, 0, 0, 0])
+cid = p.createConstraint(cube,
+                         p.getNumJoints(cube) - 1, quadruped, -1, p.JOINT_FIXED, [0, 0, 0],
+                         [0, 1, 0], [0, 0, 0])
 p.changeConstraint(cid, maxForce=10)

+jointIds = []
+paramIds = []
+jointOffsets = []
+jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
+jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

-jointIds=[]
-paramIds=[]
-jointOffsets=[]
-jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
-jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
-
-for i in range (4):
+for i in range(4):
  jointOffsets.append(0)
  jointOffsets.append(-0.7)
  jointOffsets.append(0.7)

-maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
+maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)

-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                jointIds.append(j)
-
-    
-startQ=[0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892, -0.068262, 0.836745, -1.534517]
 for j in range(p.getNumJoints(quadruped)):
-  p.resetJointState(quadruped,jointIds[j],jointDirections[j]*startQ[j]+jointOffsets[j])
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    jointIds.append(j)

+startQ = [
+    0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892,
+    -0.068262, 0.836745, -1.534517
+]
+for j in range(p.getNumJoints(quadruped)):
+  p.resetJointState(quadruped, jointIds[j], jointDirections[j] * startQ[j] + jointOffsets[j])

 qpi = quadrupedPoseInterpolator.QuadrupedPoseInterpolator()

 #enable collision between lower legs

-for j in range (p.getNumJoints(quadruped)):
-		print(p.getJointInfo(quadruped,j))
+for j in range(p.getNumJoints(quadruped)):
+  print(p.getJointInfo(quadruped, j))

 #2,5,8 and 11 are the lower legs
-lower_legs = [2,5,8,11]
+lower_legs = [2, 5, 8, 11]
 for l0 in lower_legs:
-	for l1 in lower_legs:
-		if (l1>l0):
-			enableCollision = 1
-			print("collision for pair",l0,l1, p.getJointInfo(quadruped,l0)[12],p.getJointInfo(quadruped,l1)[12], "enabled=",enableCollision)
-			p.setCollisionFilterPair(quadruped, quadruped, 2,5,enableCollision)
+  for l1 in lower_legs:
+    if (l1 > l0):
+      enableCollision = 1
+      print("collision for pair", l0, l1,
+            p.getJointInfo(quadruped, l0)[12],
+            p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
+      p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)

-jointIds=[]
-paramIds=[]
-jointOffsets=[]
-jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
-jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
+jointIds = []
+paramIds = []
+jointOffsets = []
+jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
+jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

-for i in range (4):
-	jointOffsets.append(0)
-	jointOffsets.append(-0.7)
-	jointOffsets.append(0.7)
+for i in range(4):
+  jointOffsets.append(0)
+  jointOffsets.append(-0.7)
+  jointOffsets.append(0.7)

-maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
+maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)

-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                jointIds.append(j)
+for j in range(p.getNumJoints(quadruped)):
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    jointIds.append(j)

-		
-p.getCameraImage(480,320)
+p.getCameraImage(480, 320)
 p.setRealTimeSimulation(0)

-joints=[]
-
+joints = []

 mocapData = motion_capture_data.MotionCaptureData()

-motionPath = pybullet_data.getDataPath()+"/data/motions/laikago_walk.txt"
+motionPath = pybullet_data.getDataPath() + "/data/motions/laikago_walk.txt"

 mocapData.Load(motionPath)
-print("mocapData.NumFrames=",mocapData.NumFrames())
-print("mocapData.KeyFrameDuraction=",mocapData.KeyFrameDuraction())
-print("mocapData.getCycleTime=",mocapData.getCycleTime())
-print("mocapData.computeCycleOffset=",mocapData.computeCycleOffset())
+print("mocapData.NumFrames=", mocapData.NumFrames())
+print("mocapData.KeyFrameDuraction=", mocapData.KeyFrameDuraction())
+print("mocapData.getCycleTime=", mocapData.getCycleTime())
+print("mocapData.computeCycleOffset=", mocapData.computeCycleOffset())

 stablePD = pd_controller_stable.PDControllerStable(p)

 cycleTime = mocapData.getCycleTime()
-t=0
+t = 0

-while t<10.*cycleTime:
+while t < 10. * cycleTime:
  #get interpolated joint
  keyFrameDuration = mocapData.KeyFrameDuraction()
  cycleTime = mocapData.getCycleTime()
  cycleCount = mocapData.calcCycleCount(t, cycleTime)
-  
+
  #print("cycleTime=",cycleTime)
  #print("cycleCount=",cycleCount)
-  
+
  #print("cycles=",cycles)
-  frameTime = t - cycleCount*cycleTime
+  frameTime = t - cycleCount * cycleTime
  #print("frameTime=",frameTime)
-  if (frameTime<0):
+  if (frameTime < 0):
    frameTime += cycleTime

-  frame = int(frameTime/keyFrameDuration)
-  frameNext = frame+1
-  if (frameNext >=  mocapData.NumFrames()):
+  frame = int(frameTime / keyFrameDuration)
+  frameNext = frame + 1
+  if (frameNext >= mocapData.NumFrames()):
    frameNext = frame
-  frameFraction = (frameTime - frame*keyFrameDuration)/(keyFrameDuration)
+  frameFraction = (frameTime - frame * keyFrameDuration) / (keyFrameDuration)
  #print("frame=",frame)
  #print("frameFraction=",frameFraction)
  frameData = mocapData._motion_data['Frames'][frame]
  frameDataNext = mocapData._motion_data['Frames'][frameNext]
-    
-  jointsStr,qdot=qpi.Slerp(frameFraction, frameData, frameDataNext, p)
-  
+
+  jointsStr, qdot = qpi.Slerp(frameFraction, frameData, frameDataNext, p)
+
  maxForce = p.readUserDebugParameter(maxForceId)
-  print("jointIds=",jointIds)
+  print("jointIds=", jointIds)

  if useConstraints:
-    for j in range (12):
+    for j in range(12):
      #skip the base positional dofs
-      targetPos = float(jointsStr[j+7])
-      p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
-    
+      targetPos = float(jointsStr[j + 7])
+      p.setJointMotorControl2(quadruped,
+                              jointIds[j],
+                              p.POSITION_CONTROL,
+                              jointDirections[j] * targetPos + jointOffsets[j],
+                              force=maxForce)
+
  else:
-    desiredPositions=[]
-    for j in range (7):
+    desiredPositions = []
+    for j in range(7):
      targetPosUnmodified = float(jointsStr[j])
      desiredPositions.append(targetPosUnmodified)
-    for j in range (12):
-      targetPosUnmodified = float(jointsStr[j+7])
-      targetPos=jointDirections[j]*targetPosUnmodified+jointOffsets[j]
+    for j in range(12):
+      targetPosUnmodified = float(jointsStr[j + 7])
+      targetPos = jointDirections[j] * targetPosUnmodified + jointOffsets[j]
      desiredPositions.append(targetPos)
-    numBaseDofs=6
-    totalDofs=12+numBaseDofs
-    desiredVelocities=None
-    if desiredVelocities==None:
-      desiredVelocities = [0]*totalDofs 
-    taus = stablePD.computePD(bodyUniqueId=quadruped, 
-      jointIndices = jointIds,
-      desiredPositions = desiredPositions,
-      desiredVelocities = desiredVelocities,
-      kps = [4000]*totalDofs,
-      kds = [40]*totalDofs,
-      maxForces = [500]*totalDofs, 
-      timeStep=timeStep)
-    
-    dofIndex=6
+    numBaseDofs = 6
+    totalDofs = 12 + numBaseDofs
+    desiredVelocities = None
+    if desiredVelocities == None:
+      desiredVelocities = [0] * totalDofs
+    taus = stablePD.computePD(bodyUniqueId=quadruped,
+                              jointIndices=jointIds,
+                              desiredPositions=desiredPositions,
+                              desiredVelocities=desiredVelocities,
+                              kps=[4000] * totalDofs,
+                              kds=[40] * totalDofs,
+                              maxForces=[500] * totalDofs,
+                              timeStep=timeStep)
+
+    dofIndex = 6
    scaling = 1
-    for index in range (len(jointIds)):
+    for index in range(len(jointIds)):
      jointIndex = jointIds[index]
-      force=[scaling*taus[dofIndex]]
-      print("force[", jointIndex,"]=",force)
-      p.setJointMotorControlMultiDof(quadruped, jointIndex, controlMode=p.TORQUE_CONTROL, force=force)
-      dofIndex+=1
-   
+      force = [scaling * taus[dofIndex]]
+      print("force[", jointIndex, "]=", force)
+      p.setJointMotorControlMultiDof(quadruped,
+                                     jointIndex,
+                                     controlMode=p.TORQUE_CONTROL,
+                                     force=force)
+      dofIndex += 1

  p.stepSimulation()
-  t+=timeStep
+  t += timeStep
  time.sleep(timeStep)

-useOrgData=False
+useOrgData = False
 if useOrgData:
-  with open("data1.txt","r") as filestream:
-  	for line in filestream:
-  		maxForce = p.readUserDebugParameter(maxForceId)
-  		currentline = line.split(",")
-  		frame = currentline[0]
-  		t = currentline[1]
-  		joints=currentline[2:14]
-  		for j in range (12):
-  			targetPos = float(joints[j])
-  			p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
-  		p.stepSimulation()
-  		for lower_leg in lower_legs:
-  			#print("points for ", quadruped, " link: ", lower_leg)
-  			pts = p.getContactPoints(quadruped,-1, lower_leg)
-  			#print("num points=",len(pts))
-  			#for pt in pts:
-  			#	print(pt[9])
-  		time.sleep(1./500.)
-
-
-for j in range (p.getNumJoints(quadruped)):
-        p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
-        info = p.getJointInfo(quadruped,j)
-        js = p.getJointState(quadruped,j)
-        #print(info)
-        jointName = info[1]
-        jointType = info[2]
-        if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
-                paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"),-4,4,(js[0]-jointOffsets[j])/jointDirections[j]))
+  with open("data1.txt", "r") as filestream:
+    for line in filestream:
+      maxForce = p.readUserDebugParameter(maxForceId)
+      currentline = line.split(",")
+      frame = currentline[0]
+      t = currentline[1]
+      joints = currentline[2:14]
+      for j in range(12):
+        targetPos = float(joints[j])
+        p.setJointMotorControl2(quadruped,
+                                jointIds[j],
+                                p.POSITION_CONTROL,
+                                jointDirections[j] * targetPos + jointOffsets[j],
+                                force=maxForce)
+      p.stepSimulation()
+      for lower_leg in lower_legs:
+        #print("points for ", quadruped, " link: ", lower_leg)
+        pts = p.getContactPoints(quadruped, -1, lower_leg)
+        #print("num points=",len(pts))
+        #for pt in pts:
+        #	print(pt[9])
+      time.sleep(1. / 500.)

+for j in range(p.getNumJoints(quadruped)):
+  p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
+  info = p.getJointInfo(quadruped, j)
+  js = p.getJointState(quadruped, j)
+  #print(info)
+  jointName = info[1]
+  jointType = info[2]
+  if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
+    paramIds.append(
+        p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
+                                (js[0] - jointOffsets[j]) / jointDirections[j]))

 p.setRealTimeSimulation(1)

 while (1):
-	
-	for i in range(len(paramIds)):
-		c = paramIds[i]
-		targetPos = p.readUserDebugParameter(c)
-		maxForce = p.readUserDebugParameter(maxForceId)
-		p.setJointMotorControl2(quadruped,jointIds[i],p.POSITION_CONTROL,jointDirections[i]*targetPos+jointOffsets[i], force=maxForce)
-	
+
+  for i in range(len(paramIds)):
+    c = paramIds[i]
+    targetPos = p.readUserDebugParameter(c)
+    maxForce = p.readUserDebugParameter(maxForceId)
+    p.setJointMotorControl2(quadruped,
+                            jointIds[i],
+                            p.POSITION_CONTROL,
+                            jointDirections[i] * targetPos + jointOffsets[i],
+                            force=maxForce)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py
@@ -5,17 +5,18 @@ import pybullet_data

 AGENT_TYPE_KEY = "AgentType"

-def build_agent(world, id, file):
-    agent = None
-    with open(pybullet_data.getDataPath()+"/"+file) as data_file:    
-        json_data = json.load(data_file)
-        
-        assert AGENT_TYPE_KEY in json_data
-        agent_type = json_data[AGENT_TYPE_KEY]
-        
-        if (agent_type == PPOAgent.NAME):
-            agent = PPOAgent(world, id, json_data)
-        else:
-            assert False, 'Unsupported agent type: ' + agent_type

-    return agent
+def build_agent(world, id, file):
+  agent = None
+  with open(pybullet_data.getDataPath() + "/" + file) as data_file:
+    json_data = json.load(data_file)
+
+    assert AGENT_TYPE_KEY in json_data
+    agent_type = json_data[AGENT_TYPE_KEY]
+
+    if (agent_type == PPOAgent.NAME):
+      agent = PPOAgent(world, id, json_data)
+    else:
+      assert False, 'Unsupported agent type: ' + agent_type
+
+  return agent
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py
@@ -2,53 +2,54 @@ import json
 import numpy as np
 import pybullet_utils.math_util as MathUtil

+
 class ExpParams(object):
-    RATE_KEY = 'Rate'
-    INIT_ACTION_RATE_KEY = 'InitActionRate'
-    NOISE_KEY = 'Noise'
-    NOISE_INTERNAL_KEY = 'NoiseInternal'
-    TEMP_KEY = 'Temp'
+  RATE_KEY = 'Rate'
+  INIT_ACTION_RATE_KEY = 'InitActionRate'
+  NOISE_KEY = 'Noise'
+  NOISE_INTERNAL_KEY = 'NoiseInternal'
+  TEMP_KEY = 'Temp'

-    def __init__(self):
-        self.rate = 0.2
-        self.init_action_rate = 0
-        self.noise = 0.1
-        self.noise_internal = 0
-        self.temp = 0.1
-        return
+  def __init__(self):
+    self.rate = 0.2
+    self.init_action_rate = 0
+    self.noise = 0.1
+    self.noise_internal = 0
+    self.temp = 0.1
+    return

-    def __str__(self):
-        str = ''
-        str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
-        str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
-        str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
-        str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
-        str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
-        return str
+  def __str__(self):
+    str = ''
+    str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
+    str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
+    str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
+    str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
+    str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
+    return str

-    def load(self, json_data):
-        if (self.RATE_KEY in json_data):
-            self.rate = json_data[self.RATE_KEY]
+  def load(self, json_data):
+    if (self.RATE_KEY in json_data):
+      self.rate = json_data[self.RATE_KEY]

-        if (self.INIT_ACTION_RATE_KEY in json_data):
-            self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
+    if (self.INIT_ACTION_RATE_KEY in json_data):
+      self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]

-        if (self.NOISE_KEY in json_data):
-            self.noise = json_data[self.NOISE_KEY]
+    if (self.NOISE_KEY in json_data):
+      self.noise = json_data[self.NOISE_KEY]

-        if (self.NOISE_INTERNAL_KEY in json_data):
-            self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
+    if (self.NOISE_INTERNAL_KEY in json_data):
+      self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]

-        if (self.TEMP_KEY in json_data):
-            self.temp = json_data[self.TEMP_KEY]
+    if (self.TEMP_KEY in json_data):
+      self.temp = json_data[self.TEMP_KEY]

-        return
+    return

-    def lerp(self, other, t):
-        lerp_params = ExpParams()
-        lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
-        lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
-        lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
-        lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
-        lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
-        return lerp_params
+  def lerp(self, other, t):
+    lerp_params = ExpParams()
+    lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
+    lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
+    lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
+    lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
+    lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
+    return lerp_params
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/init.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/init.py
@@ -1 +1 @@
-from . import *
+from . import *
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py
@@ -3,11 +3,12 @@ import pybullet_envs.deep_mimic.learning.tf_util as TFUtil

 NAME = "fc_2layers_1024units"

-def build_net(input_tfs, reuse=False):
-    layers = [1024, 512]
-    activation = tf.nn.relu

-    input_tf = tf.concat(axis=-1, values=input_tfs)          
-    h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
-    h = activation(h)
-    return h
+def build_net(input_tfs, reuse=False):
+  layers = [1024, 512]
+  activation = tf.nn.relu
+
+  input_tf = tf.concat(axis=-1, values=input_tfs)
+  h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
+  h = activation(h)
+  return h
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py
@@ -1,11 +1,12 @@
 import pybullet_envs.deep_mimic.learning.nets.fc_2layers_1024units as fc_2layers_1024units

-def build_net(net_name, input_tfs, reuse=False):
-    net = None

-    if (net_name == fc_2layers_1024units.NAME):
-        net = fc_2layers_1024units.build_net(input_tfs, reuse)
-    else:
-        assert False, 'Unsupported net: ' + net_name
-    
-    return net
+def build_net(net_name, input_tfs, reuse=False):
+  net = None
+
+  if (net_name == fc_2layers_1024units.NAME):
+    net = fc_2layers_1024units.build_net(input_tfs, reuse)
+  else:
+    assert False, 'Unsupported net: ' + net_name
+
+  return net
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py
@@ -3,147 +3,149 @@ import copy
 import pybullet_utils.mpi_util as MPIUtil
 from pybullet_utils.logger import Logger

+
 class Normalizer(object):
-    CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
+  CHECK_SYNC_COUNT = 50000  # check synchronization after a certain number of entries

-    # these group IDs must be the same as those in CharController.h
-    NORM_GROUP_SINGLE = 0
-    NORM_GROUP_NONE = -1
+  # these group IDs must be the same as those in CharController.h
+  NORM_GROUP_SINGLE = 0
+  NORM_GROUP_NONE = -1

-    class Group(object):
-        def __init__(self, id, indices):
-            self.id = id
-            self.indices = indices
-            return
+  class Group(object):

-    def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
-        self.eps = eps
-        self.clip = clip
-        self.mean = np.zeros(size)
-        self.mean_sq = np.zeros(size)
-        self.std = np.ones(size)
-        self.count = 0
-        self.groups = self._build_groups(groups_ids)
+    def __init__(self, id, indices):
+      self.id = id
+      self.indices = indices
+      return

-        self.new_count = 0
-        self.new_sum = np.zeros_like(self.mean)
-        self.new_sum_sq = np.zeros_like(self.mean_sq)
-        return
+  def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
+    self.eps = eps
+    self.clip = clip
+    self.mean = np.zeros(size)
+    self.mean_sq = np.zeros(size)
+    self.std = np.ones(size)
+    self.count = 0
+    self.groups = self._build_groups(groups_ids)

-    def record(self, x):
-        size = self.get_size()
-        is_array = isinstance(x, np.ndarray)
-        if not is_array:
-            assert(size == 1)
-            x = np.array([[x]])
+    self.new_count = 0
+    self.new_sum = np.zeros_like(self.mean)
+    self.new_sum_sq = np.zeros_like(self.mean_sq)
+    return

-        assert x.shape[-1] == size, \
-            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
-        x = np.reshape(x, [-1, size])
+  def record(self, x):
+    size = self.get_size()
+    is_array = isinstance(x, np.ndarray)
+    if not is_array:
+      assert (size == 1)
+      x = np.array([[x]])

-        self.new_count += x.shape[0]
-        self.new_sum += np.sum(x, axis=0)
-        self.new_sum_sq += np.sum(np.square(x), axis=0)
-        return
+    assert x.shape[-1] == size, \
+        Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
+    x = np.reshape(x, [-1, size])

-    def update(self):
-        new_count = MPIUtil.reduce_sum(self.new_count)
-        new_sum = MPIUtil.reduce_sum(self.new_sum)
-        new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
+    self.new_count += x.shape[0]
+    self.new_sum += np.sum(x, axis=0)
+    self.new_sum_sq += np.sum(np.square(x), axis=0)
+    return

-        new_total = self.count + new_count
-        if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
-            assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
+  def update(self):
+    new_count = MPIUtil.reduce_sum(self.new_count)
+    new_sum = MPIUtil.reduce_sum(self.new_sum)
+    new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)

-        if new_count > 0:
-            new_mean = self._process_group_data(new_sum / new_count, self.mean)
-            new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
-            w_old = float(self.count) / new_total
-            w_new = float(new_count) / new_total
+    new_total = self.count + new_count
+    if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
+      assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')

-            self.mean = w_old * self.mean + w_new * new_mean
-            self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
-            self.count = new_total
-            self.std = self.calc_std(self.mean, self.mean_sq)
+    if new_count > 0:
+      new_mean = self._process_group_data(new_sum / new_count, self.mean)
+      new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
+      w_old = float(self.count) / new_total
+      w_new = float(new_count) / new_total

-            self.new_count = 0
-            self.new_sum.fill(0)
-            self.new_sum_sq.fill(0)
+      self.mean = w_old * self.mean + w_new * new_mean
+      self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
+      self.count = new_total
+      self.std = self.calc_std(self.mean, self.mean_sq)

-        return
+      self.new_count = 0
+      self.new_sum.fill(0)
+      self.new_sum_sq.fill(0)

-    def get_size(self):
-        return self.mean.size
+    return

-    def set_mean_std(self, mean, std):
-        size = self.get_size()
-        is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
-        
-        if not is_array:
-            assert(size == 1)
-            mean = np.array([mean])
-            std = np.array([std])
+  def get_size(self):
+    return self.mean.size

-        assert len(mean) == size and len(std) == size, \
-            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
-        
-        self.mean = mean
-        self.std = std
-        self.mean_sq = self.calc_mean_sq(self.mean, self.std)
-        return
+  def set_mean_std(self, mean, std):
+    size = self.get_size()
+    is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)

-    def normalize(self, x):
-        norm_x = (x - self.mean) / self.std
-        norm_x = np.clip(norm_x, -self.clip, self.clip)
-        return norm_x
+    if not is_array:
+      assert (size == 1)
+      mean = np.array([mean])
+      std = np.array([std])

-    def unnormalize(self, norm_x):
-        x = norm_x * self.std + self.mean
-        return x
+    assert len(mean) == size and len(std) == size, \
+        Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))

-    def calc_std(self, mean, mean_sq):
-        var = mean_sq - np.square(mean)
-        # some time floating point errors can lead to small negative numbers
-        var = np.maximum(var, 0)
-        std = np.sqrt(var)
-        std = np.maximum(std, self.eps)
-        return std
+    self.mean = mean
+    self.std = std
+    self.mean_sq = self.calc_mean_sq(self.mean, self.std)
+    return

-    def calc_mean_sq(self, mean, std):
-        return np.square(std) + np.square(self.mean)
+  def normalize(self, x):
+    norm_x = (x - self.mean) / self.std
+    norm_x = np.clip(norm_x, -self.clip, self.clip)
+    return norm_x

-    def check_synced(self):
-        synced = True
-        if MPIUtil.is_root_proc():
-            vars = np.concatenate([self.mean, self.mean_sq])
-            MPIUtil.bcast(vars)
-        else:
-            vars_local = np.concatenate([self.mean, self.mean_sq])
-            vars_root = np.empty_like(vars_local)
-            MPIUtil.bcast(vars_root)
-            synced = (vars_local == vars_root).all()
-        return synced
+  def unnormalize(self, norm_x):
+    x = norm_x * self.std + self.mean
+    return x

-    def _build_groups(self, groups_ids):
-        groups = []
-        if groups_ids is None:
-            curr_id = self.NORM_GROUP_SINGLE
-            curr_list = np.arange(self.get_size()).astype(np.int32)
-            groups.append(self.Group(curr_id, curr_list))
-        else:
-            ids = np.unique(groups_ids)
-            for id in ids:
-                curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
-                groups.append(self.Group(id, curr_list))
+  def calc_std(self, mean, mean_sq):
+    var = mean_sq - np.square(mean)
+    # some time floating point errors can lead to small negative numbers
+    var = np.maximum(var, 0)
+    std = np.sqrt(var)
+    std = np.maximum(std, self.eps)
+    return std

-        return groups
+  def calc_mean_sq(self, mean, std):
+    return np.square(std) + np.square(self.mean)

-    def _process_group_data(self, new_data, old_data):
-        proc_data = new_data.copy()
-        for group in self.groups:
-            if group.id == self.NORM_GROUP_NONE:
-                proc_data[group.indices] = old_data[group.indices]
-            elif group.id != self.NORM_GROUP_SINGLE:
-                avg = np.mean(new_data[group.indices])
-                proc_data[group.indices] = avg
-        return proc_data
+  def check_synced(self):
+    synced = True
+    if MPIUtil.is_root_proc():
+      vars = np.concatenate([self.mean, self.mean_sq])
+      MPIUtil.bcast(vars)
+    else:
+      vars_local = np.concatenate([self.mean, self.mean_sq])
+      vars_root = np.empty_like(vars_local)
+      MPIUtil.bcast(vars_root)
+      synced = (vars_local == vars_root).all()
+    return synced
+
+  def _build_groups(self, groups_ids):
+    groups = []
+    if groups_ids is None:
+      curr_id = self.NORM_GROUP_SINGLE
+      curr_list = np.arange(self.get_size()).astype(np.int32)
+      groups.append(self.Group(curr_id, curr_list))
+    else:
+      ids = np.unique(groups_ids)
+      for id in ids:
+        curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
+        groups.append(self.Group(id, curr_list))
+
+    return groups
+
+  def _process_group_data(self, new_data, old_data):
+    proc_data = new_data.copy()
+    for group in self.groups:
+      if group.id == self.NORM_GROUP_NONE:
+        proc_data[group.indices] = old_data[group.indices]
+      elif group.id != self.NORM_GROUP_SINGLE:
+        avg = np.mean(new_data[group.indices])
+        proc_data[group.indices] = avg
+    return proc_data
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py
@@ -1,46 +1,47 @@
 import numpy as np
 from pybullet_envs.deep_mimic.env.env import Env

+
 class Path(object):
-    def __init__(self):
-        self.clear()
-        return

-    def pathlength(self):
-        return len(self.actions)
+  def __init__(self):
+    self.clear()
+    return

-    def is_valid(self):
-        valid = True
-        l = self.pathlength()
-        valid &= len(self.states) == l + 1
-        valid &= len(self.goals) == l + 1
-        valid &= len(self.actions) == l
-        valid &= len(self.logps) == l
-        valid &= len(self.rewards) == l
-        valid &= len(self.flags) == l
+  def pathlength(self):
+    return len(self.actions)

-        return valid
+  def is_valid(self):
+    valid = True
+    l = self.pathlength()
+    valid &= len(self.states) == l + 1
+    valid &= len(self.goals) == l + 1
+    valid &= len(self.actions) == l
+    valid &= len(self.logps) == l
+    valid &= len(self.rewards) == l
+    valid &= len(self.flags) == l

-    def check_vals(self):
-        for vals in [self.states, self.goals, self.actions, self.logps,
-                  self.rewards]:
-            for v in vals:
-                if not np.isfinite(v).all():
-                    return False
-        return True
+    return valid

-    def clear(self):
-        self.states = []
-        self.goals = []
-        self.actions = []
-        self.logps = []
-        self.rewards = []
-        self.flags = []
-        self.terminate = Env.Terminate.Null
-        return
+  def check_vals(self):
+    for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
+      for v in vals:
+        if not np.isfinite(v).all():
+          return False
+    return True

-    def get_pathlen(self):
-        return len(self.rewards)
+  def clear(self):
+    self.states = []
+    self.goals = []
+    self.actions = []
+    self.logps = []
+    self.rewards = []
+    self.flags = []
+    self.terminate = Env.Terminate.Null
+    return

-    def calc_return(self):
-        return sum(self.rewards)
+  def get_pathlen(self):
+    return len(self.rewards)
+
+  def calc_return(self):
+    return sum(self.rewards)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py
@@ -13,341 +13,343 @@ import pybullet_utils.mpi_util as MPIUtil
 import pybullet_utils.math_util as MathUtil
 from pybullet_envs.deep_mimic.env.action_space import ActionSpace
 from pybullet_envs.deep_mimic.env.env import Env
-
 '''
 Policy Gradient Agent
 '''

+
 class PGAgent(TFAgent):
-    NAME = 'PG'
+  NAME = 'PG'

-    ACTOR_NET_KEY = 'ActorNet'
-    ACTOR_STEPSIZE_KEY = 'ActorStepsize'
-    ACTOR_MOMENTUM_KEY = 'ActorMomentum'
-    ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
-    ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
+  ACTOR_NET_KEY = 'ActorNet'
+  ACTOR_STEPSIZE_KEY = 'ActorStepsize'
+  ACTOR_MOMENTUM_KEY = 'ActorMomentum'
+  ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
+  ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'

-    CRITIC_NET_KEY = 'CriticNet'
-    CRITIC_STEPSIZE_KEY = 'CriticStepsize'
-    CRITIC_MOMENTUM_KEY = 'CriticMomentum'
-    CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
-    
-    EXP_ACTION_FLAG = 1 << 0
+  CRITIC_NET_KEY = 'CriticNet'
+  CRITIC_STEPSIZE_KEY = 'CriticStepsize'
+  CRITIC_MOMENTUM_KEY = 'CriticMomentum'
+  CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'

-    def __init__(self, world, id, json_data): 
-        self._exp_action = False
-        super().__init__(world, id, json_data)
-        return
+  EXP_ACTION_FLAG = 1 << 0

-    def reset(self):
-        super().reset()
-        self._exp_action = False
-        return
+  def __init__(self, world, id, json_data):
+    self._exp_action = False
+    super().__init__(world, id, json_data)
+    return

-    def _check_action_space(self):
-        action_space = self.get_action_space()
-        return action_space == ActionSpace.Continuous
+  def reset(self):
+    super().reset()
+    self._exp_action = False
+    return

-    def _load_params(self, json_data):
-        super()._load_params(json_data)
-        self.val_min, self.val_max = self._calc_val_bounds(self.discount)
-        self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
-        return
+  def _check_action_space(self):
+    action_space = self.get_action_space()
+    return action_space == ActionSpace.Continuous

-    def _build_nets(self, json_data):
-        assert self.ACTOR_NET_KEY in json_data
-        assert self.CRITIC_NET_KEY in json_data
+  def _load_params(self, json_data):
+    super()._load_params(json_data)
+    self.val_min, self.val_max = self._calc_val_bounds(self.discount)
+    self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
+    return

-        actor_net_name = json_data[self.ACTOR_NET_KEY]
-        critic_net_name = json_data[self.CRITIC_NET_KEY]
-        actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
-        
-        s_size = self.get_state_size()
-        g_size = self.get_goal_size()
-        a_size = self.get_action_size()
+  def _build_nets(self, json_data):
+    assert self.ACTOR_NET_KEY in json_data
+    assert self.CRITIC_NET_KEY in json_data

-        # setup input tensors
-        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
-        self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
-        self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
-        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
-        self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
+    actor_net_name = json_data[self.ACTOR_NET_KEY]
+    critic_net_name = json_data[self.CRITIC_NET_KEY]
+    actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
+                                   ) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

-        with tf.variable_scope('main'):
-            with tf.variable_scope('actor'):
-                self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
-            with tf.variable_scope('critic'):
-                self.critic_tf = self._build_net_critic(critic_net_name)
+    s_size = self.get_state_size()
+    g_size = self.get_goal_size()
+    a_size = self.get_action_size()

-        if (self.actor_tf != None):
-            Logger.print2('Built actor net: ' + actor_net_name)
+    # setup input tensors
+    self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")  # observations
+    self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")  # target value s
+    self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")  # advantage
+    self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")  # target actions
+    self.g_tf = tf.placeholder(tf.float32,
+                               shape=([None, g_size] if self.has_goal() else None),
+                               name="g")  # goals

-        if (self.critic_tf != None):
-            Logger.print2('Built critic net: ' + critic_net_name)
+    with tf.variable_scope('main'):
+      with tf.variable_scope('actor'):
+        self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
+      with tf.variable_scope('critic'):
+        self.critic_tf = self._build_net_critic(critic_net_name)

-        return
+    if (self.actor_tf != None):
+      Logger.print2('Built actor net: ' + actor_net_name)

-    def _build_normalizers(self):
-        super()._build_normalizers()
-        with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
-            with tf.variable_scope(self.RESOURCE_SCOPE):
-                val_offset, val_scale = self._calc_val_offset_scale(self.discount)
-                self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
-                self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
-        return
+    if (self.critic_tf != None):
+      Logger.print2('Built critic net: ' + critic_net_name)

-    def _init_normalizers(self):
-        super()._init_normalizers()
-        with self.sess.as_default(), self.graph.as_default():
-            self.val_norm.update()
-        return
+    return

-    def _load_normalizers(self):
-        super()._load_normalizers()
-        self.val_norm.load()
-        return
+  def _build_normalizers(self):
+    super()._build_normalizers()
+    with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
+      with tf.variable_scope(self.RESOURCE_SCOPE):
+        val_offset, val_scale = self._calc_val_offset_scale(self.discount)
+        self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
+        self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
+    return

-    def _build_losses(self, json_data):
-        actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
-        critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
+  def _init_normalizers(self):
+    super()._init_normalizers()
+    with self.sess.as_default(), self.graph.as_default():
+      self.val_norm.update()
+    return

-        norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
-        self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
+  def _load_normalizers(self):
+    super()._load_normalizers()
+    self.val_norm.load()
+    return

-        if (critic_weight_decay != 0):
-            self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
-        
-        norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
-        norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
+  def _build_losses(self, json_data):
+    actor_weight_decay = 0 if (
+        self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
+    critic_weight_decay = 0 if (
+        self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]

-        self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
-        self.actor_loss_tf *= self.adv_tf
-        self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
+    norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
+        self.critic_tf)
+    self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))

-        norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
-        norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
-        a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
-        a_bound_loss /= self.exp_params_curr.noise
-        self.actor_loss_tf += a_bound_loss
+    if (critic_weight_decay != 0):
+      self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')

-        if (actor_weight_decay != 0):
-            self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
-        
-        return
+    norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
+    norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf

-    def _build_solvers(self, json_data):
-        actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
-        actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
-        critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
-        critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
-        
-        critic_vars = self._tf_vars('main/critic')
-        critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
-        self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
-        self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
+    self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
+    self.actor_loss_tf *= self.adv_tf
+    self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)

-        actor_vars = self._tf_vars('main/actor')
-        actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
-        self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
-        self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
+    norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
+    norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
+    a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
+    a_bound_loss /= self.exp_params_curr.noise
+    self.actor_loss_tf += a_bound_loss

-        return
+    if (actor_weight_decay != 0):
+      self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')

-    def _build_net_actor(self, net_name, init_output_scale):
-        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
-        input_tfs = [norm_s_tf]
-        if (self.has_goal()):
-            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
-            input_tfs += [norm_g_tf]
-        
-        h = NetBuilder.build_net(net_name, input_tfs)
-        norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
-                                kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
-        
-        a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
-        return a_tf
-    
-    def _build_net_critic(self, net_name):
-        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
-        input_tfs = [norm_s_tf]
-        if (self.has_goal()):
-            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
-            input_tfs += [norm_g_tf]
-        
-        h = NetBuilder.build_net(net_name, input_tfs)
-        norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
-                                kernel_initializer=TFUtil.xavier_initializer);
+    return

-        norm_val_tf = tf.reshape(norm_val_tf, [-1])
-        val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
-        return val_tf
+  def _build_solvers(self, json_data):
+    actor_stepsize = 0.001 if (
+        self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
+    actor_momentum = 0.9 if (
+        self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
+    critic_stepsize = 0.01 if (
+        self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
+    critic_momentum = 0.9 if (
+        self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]

-    def _initialize_vars(self):
-        super()._initialize_vars()
-        self._sync_solvers()
-        return
+    critic_vars = self._tf_vars('main/critic')
+    critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
+                                            momentum=critic_momentum)
+    self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
+    self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)

-    def _sync_solvers(self):
-        self.actor_solver.sync()
-        self.critic_solver.sync()
-        return
+    actor_vars = self._tf_vars('main/actor')
+    actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
+    self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
+    self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)

-    def _decide_action(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            self._exp_action = False
-            a = self._eval_actor(s, g)[0]
-            logp = 0
+    return

-            if self._enable_stoch_policy():
-                # epsilon-greedy
-                rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
-                if rand_action:
-                    norm_exp_noise = np.random.randn(*a.shape)
-                    norm_exp_noise *= self.exp_params_curr.noise
-                    exp_noise = norm_exp_noise * self.a_norm.std
-                    a += exp_noise
+  def _build_net_actor(self, net_name, init_output_scale):
+    norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
+    input_tfs = [norm_s_tf]
+    if (self.has_goal()):
+      norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
+      input_tfs += [norm_g_tf]

-                    logp = self._calc_action_logp(norm_exp_noise)
-                    self._exp_action = True
+    h = NetBuilder.build_net(net_name, input_tfs)
+    norm_a_tf = tf.layers.dense(inputs=h,
+                                units=self.get_action_size(),
+                                activation=None,
+                                kernel_initializer=tf.random_uniform_initializer(
+                                    minval=-init_output_scale, maxval=init_output_scale))

-        return a, logp
+    a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
+    return a_tf

-    def _enable_stoch_policy(self):
-        return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
+  def _build_net_critic(self, net_name):
+    norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
+    input_tfs = [norm_s_tf]
+    if (self.has_goal()):
+      norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
+      input_tfs += [norm_g_tf]

-    def _eval_actor(self, s, g):
-        s = np.reshape(s, [-1, self.get_state_size()])
-        g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
-          
-        feed = {
-            self.s_tf : s,
-            self.g_tf : g
-        }
+    h = NetBuilder.build_net(net_name, input_tfs)
+    norm_val_tf = tf.layers.dense(inputs=h,
+                                  units=1,
+                                  activation=None,
+                                  kernel_initializer=TFUtil.xavier_initializer)

-        a = self.actor_tf.eval(feed)
-        return a
-    
-    def _eval_critic(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            s = np.reshape(s, [-1, self.get_state_size()])
-            g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
+    norm_val_tf = tf.reshape(norm_val_tf, [-1])
+    val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
+    return val_tf

-            feed = {
-                self.s_tf : s,
-                self.g_tf : g
-            }
+  def _initialize_vars(self):
+    super()._initialize_vars()
+    self._sync_solvers()
+    return

-            val = self.critic_tf.eval(feed)    
-        return val
+  def _sync_solvers(self):
+    self.actor_solver.sync()
+    self.critic_solver.sync()
+    return

-    def _record_flags(self):
-        flags = int(0)
-        if (self._exp_action):
-            flags = flags | self.EXP_ACTION_FLAG
-        return flags
+  def _decide_action(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      self._exp_action = False
+      a = self._eval_actor(s, g)[0]
+      logp = 0

-    def _train_step(self):
-        super()._train_step()
+      if self._enable_stoch_policy():
+        # epsilon-greedy
+        rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
+        if rand_action:
+          norm_exp_noise = np.random.randn(*a.shape)
+          norm_exp_noise *= self.exp_params_curr.noise
+          exp_noise = norm_exp_noise * self.a_norm.std
+          a += exp_noise

-        critic_loss = self._update_critic()
-        actor_loss = self._update_actor()
-        critic_loss = MPIUtil.reduce_avg(critic_loss)
-        actor_loss = MPIUtil.reduce_avg(actor_loss)
+          logp = self._calc_action_logp(norm_exp_noise)
+          self._exp_action = True

-        critic_stepsize = self.critic_solver.get_stepsize()
-        actor_stepsize = self.actor_solver.get_stepsize()
-        
-        self.logger.log_tabular('Critic_Loss', critic_loss)
-        self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
-        self.logger.log_tabular('Actor_Loss', actor_loss) 
-        self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
+    return a, logp

-        return
+  def _enable_stoch_policy(self):
+    return self.enable_training and (self._mode == self.Mode.TRAIN or
+                                     self._mode == self.Mode.TRAIN_END)

-    def _update_critic(self):
-        idx = self.replay_buffer.sample(self._local_mini_batch_size)
-        s = self.replay_buffer.get('states', idx)
-        g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
-        
-        tar_V = self._calc_updated_vals(idx)
-        tar_V = np.clip(tar_V, self.val_min, self.val_max)
+  def _eval_actor(self, s, g):
+    s = np.reshape(s, [-1, self.get_state_size()])
+    g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.tar_val_tf: tar_V
-        }
+    feed = {self.s_tf: s, self.g_tf: g}

-        loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
-        self.critic_solver.update(grads)
-        return loss
-    
-    def _update_actor(self):
-        key = self.EXP_ACTION_FLAG
-        idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
-        has_goal = self.has_goal()
+    a = self.actor_tf.eval(feed)
+    return a

-        s = self.replay_buffer.get('states', idx)
-        g = self.replay_buffer.get('goals', idx) if has_goal else None
-        a = self.replay_buffer.get('actions', idx)
+  def _eval_critic(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      s = np.reshape(s, [-1, self.get_state_size()])
+      g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        V_new = self._calc_updated_vals(idx)
-        V_old = self._eval_critic(s, g)
-        adv = V_new - V_old
+      feed = {self.s_tf: s, self.g_tf: g}

-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.a_tf: a,
-            self.adv_tf: adv
-        }
+      val = self.critic_tf.eval(feed)
+    return val

-        loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
-        self.actor_solver.update(grads)
+  def _record_flags(self):
+    flags = int(0)
+    if (self._exp_action):
+      flags = flags | self.EXP_ACTION_FLAG
+    return flags

-        return loss
+  def _train_step(self):
+    super()._train_step()

-    def _calc_updated_vals(self, idx):
-        r = self.replay_buffer.get('rewards', idx)
+    critic_loss = self._update_critic()
+    actor_loss = self._update_actor()
+    critic_loss = MPIUtil.reduce_avg(critic_loss)
+    actor_loss = MPIUtil.reduce_avg(actor_loss)

-        if self.discount == 0:
-            new_V = r
-        else:
-            next_idx = self.replay_buffer.get_next_idx(idx)
-            s_next = self.replay_buffer.get('states', next_idx)
-            g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
+    critic_stepsize = self.critic_solver.get_stepsize()
+    actor_stepsize = self.actor_solver.get_stepsize()

-            is_end = self.replay_buffer.is_path_end(idx)
-            is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
-            is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
-            is_fail = np.logical_and(is_end, is_fail) 
-            is_succ = np.logical_and(is_end, is_succ) 
+    self.logger.log_tabular('Critic_Loss', critic_loss)
+    self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
+    self.logger.log_tabular('Actor_Loss', actor_loss)
+    self.logger.log_tabular('Actor_Stepsize', actor_stepsize)

-            V_next = self._eval_critic(s_next, g_next)
-            V_next[is_fail] = self.val_fail
-            V_next[is_succ] = self.val_succ
+    return

-            new_V = r + self.discount * V_next
-        return new_V
+  def _update_critic(self):
+    idx = self.replay_buffer.sample(self._local_mini_batch_size)
+    s = self.replay_buffer.get('states', idx)
+    g = self.replay_buffer.get('goals', idx) if self.has_goal() else None

-    def _calc_action_logp(self, norm_action_deltas):
-        # norm action delta are for the normalized actions (scaled by self.a_norm.std) 
-        stdev = self.exp_params_curr.noise
-        assert stdev > 0
+    tar_V = self._calc_updated_vals(idx)
+    tar_V = np.clip(tar_V, self.val_min, self.val_max)

-        a_size = self.get_action_size()
-        logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
-        logp += -0.5 * a_size * np.log(2 * np.pi)
-        logp += -a_size * np.log(stdev)
-        return logp
+    feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_V}

-    def _log_val(self, s, g):
-        val = self._eval_critic(s, g)
-        norm_val = self.val_norm.normalize(val)
-        self.world.env.log_val(self.id, norm_val[0])
-        return
+    loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
+    self.critic_solver.update(grads)
+    return loss

-    def _build_replay_buffer(self, buffer_size):
-        super()._build_replay_buffer(buffer_size)
-        self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
-        return
+  def _update_actor(self):
+    key = self.EXP_ACTION_FLAG
+    idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
+    has_goal = self.has_goal()
+
+    s = self.replay_buffer.get('states', idx)
+    g = self.replay_buffer.get('goals', idx) if has_goal else None
+    a = self.replay_buffer.get('actions', idx)
+
+    V_new = self._calc_updated_vals(idx)
+    V_old = self._eval_critic(s, g)
+    adv = V_new - V_old
+
+    feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv}
+
+    loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
+    self.actor_solver.update(grads)
+
+    return loss
+
+  def _calc_updated_vals(self, idx):
+    r = self.replay_buffer.get('rewards', idx)
+
+    if self.discount == 0:
+      new_V = r
+    else:
+      next_idx = self.replay_buffer.get_next_idx(idx)
+      s_next = self.replay_buffer.get('states', next_idx)
+      g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
+
+      is_end = self.replay_buffer.is_path_end(idx)
+      is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
+      is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
+      is_fail = np.logical_and(is_end, is_fail)
+      is_succ = np.logical_and(is_end, is_succ)
+
+      V_next = self._eval_critic(s_next, g_next)
+      V_next[is_fail] = self.val_fail
+      V_next[is_succ] = self.val_succ
+
+      new_V = r + self.discount * V_next
+    return new_V
+
+  def _calc_action_logp(self, norm_action_deltas):
+    # norm action delta are for the normalized actions (scaled by self.a_norm.std)
+    stdev = self.exp_params_curr.noise
+    assert stdev > 0
+
+    a_size = self.get_action_size()
+    logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
+    logp += -0.5 * a_size * np.log(2 * np.pi)
+    logp += -a_size * np.log(stdev)
+    return logp
+
+  def _log_val(self, s, g):
+    val = self._eval_critic(s, g)
+    norm_val = self.val_norm.normalize(val)
+    self.world.env.log_val(self.id, norm_val[0])
+    return
+
+  def _build_replay_buffer(self, buffer_size):
+    super()._build_replay_buffer(buffer_size)
+    self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
@@ -10,359 +10,374 @@ from pybullet_utils.logger import Logger
 import pybullet_utils.mpi_util as MPIUtil
 import pybullet_utils.math_util as MathUtil
 from pybullet_envs.deep_mimic.env.env import Env
-
 '''
 Proximal Policy Optimization Agent
 '''

+
 class PPOAgent(PGAgent):
-    NAME = "PPO"
-    EPOCHS_KEY = "Epochs"
-    BATCH_SIZE_KEY = "BatchSize"
-    RATIO_CLIP_KEY = "RatioClip"
-    NORM_ADV_CLIP_KEY = "NormAdvClip"
-    TD_LAMBDA_KEY = "TDLambda"
-    TAR_CLIP_FRAC = "TarClipFrac"
-    ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
+  NAME = "PPO"
+  EPOCHS_KEY = "Epochs"
+  BATCH_SIZE_KEY = "BatchSize"
+  RATIO_CLIP_KEY = "RatioClip"
+  NORM_ADV_CLIP_KEY = "NormAdvClip"
+  TD_LAMBDA_KEY = "TDLambda"
+  TAR_CLIP_FRAC = "TarClipFrac"
+  ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"

-    def __init__(self, world, id, json_data): 
-        super().__init__(world, id, json_data)
-        return
+  def __init__(self, world, id, json_data):
+    super().__init__(world, id, json_data)
+    return

-    def _load_params(self, json_data):
-        super()._load_params(json_data)
+  def _load_params(self, json_data):
+    super()._load_params(json_data)

-        self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
-        self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
-        self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
-        self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
-        self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
-        self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
-        self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
+    self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
+    self.batch_size = 1024 if (
+        self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
+    self.ratio_clip = 0.2 if (
+        self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
+    self.norm_adv_clip = 5 if (
+        self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
+    self.td_lambda = 0.95 if (
+        self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
+    self.tar_clip_frac = -1 if (
+        self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
+    self.actor_stepsize_decay = 0.5 if (
+        self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]

-        num_procs = MPIUtil.get_num_procs()
-        local_batch_size = int(self.batch_size / num_procs)
-        min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
-        assert(self.replay_buffer_size > min_replay_size)
+    num_procs = MPIUtil.get_num_procs()
+    local_batch_size = int(self.batch_size / num_procs)
+    min_replay_size = 2 * local_batch_size  # needed to prevent buffer overflow
+    assert (self.replay_buffer_size > min_replay_size)

-        self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
+    self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)

-        return
+    return

-    def _build_nets(self, json_data):
-        assert self.ACTOR_NET_KEY in json_data
-        assert self.CRITIC_NET_KEY in json_data
+  def _build_nets(self, json_data):
+    assert self.ACTOR_NET_KEY in json_data
+    assert self.CRITIC_NET_KEY in json_data

-        actor_net_name = json_data[self.ACTOR_NET_KEY]
-        critic_net_name = json_data[self.CRITIC_NET_KEY]
-        actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
+    actor_net_name = json_data[self.ACTOR_NET_KEY]
+    critic_net_name = json_data[self.CRITIC_NET_KEY]
+    actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
+                                   ) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

-        s_size = self.get_state_size()
-        g_size = self.get_goal_size()
-        a_size = self.get_action_size()
+    s_size = self.get_state_size()
+    g_size = self.get_goal_size()
+    a_size = self.get_action_size()

-        # setup input tensors
-        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
-        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
-        self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
-        self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
-        self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
-        self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
-        self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
+    # setup input tensors
+    self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
+    self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
+    self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
+    self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
+    self.g_tf = tf.placeholder(tf.float32,
+                               shape=([None, g_size] if self.has_goal() else None),
+                               name="g")
+    self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
+    self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")

-        with tf.variable_scope('main'):
-            with tf.variable_scope('actor'):
-                self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
-            with tf.variable_scope('critic'):
-                self.critic_tf = self._build_net_critic(critic_net_name)
-                
-        if (self.a_mean_tf != None):
-            Logger.print2('Built actor net: ' + actor_net_name)
+    with tf.variable_scope('main'):
+      with tf.variable_scope('actor'):
+        self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
+      with tf.variable_scope('critic'):
+        self.critic_tf = self._build_net_critic(critic_net_name)

-        if (self.critic_tf != None):
-            Logger.print2('Built critic net: ' + critic_net_name)
-        
-        self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
-        norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
-        norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
-        self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
-        self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
+    if (self.a_mean_tf != None):
+      Logger.print2('Built actor net: ' + actor_net_name)

-        return
+    if (self.critic_tf != None):
+      Logger.print2('Built critic net: ' + critic_net_name)

-    def _build_losses(self, json_data):
-        actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
-        critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
-        
-        norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
-        self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
+    self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
+    norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
+    norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
+    self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
+    self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf,
+                                                      mean_tf=None,
+                                                      std_tf=self.norm_a_std_tf)

-        if (critic_weight_decay != 0):
-            self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
-        
-        norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
-        self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
+    return

-        self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
-        ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
-        actor_loss0 = self.adv_tf * ratio_tf
-        actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
-        self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
+  def _build_losses(self, json_data):
+    actor_weight_decay = 0 if (
+        self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
+    critic_weight_decay = 0 if (
+        self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]

-        norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
-        norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
-        a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
-        self.actor_loss_tf += a_bound_loss
+    norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
+        self.critic_tf)
+    self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))

-        if (actor_weight_decay != 0):
-            self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
-        
-        # for debugging
-        self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
+    if (critic_weight_decay != 0):
+      self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')

-        return
+    norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
+    self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)

-    def _build_solvers(self, json_data):
-        actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
-        actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
-        critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
-        critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
-        
-        critic_vars = self._tf_vars('main/critic')
-        critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
-        self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
-        self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
+    self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf,
+                                             self.norm_a_std_tf)
+    ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
+    actor_loss0 = self.adv_tf * ratio_tf
+    actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip,
+                                                 1 + self.ratio_clip)
+    self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))

-        self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
-        self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
-        self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
+    norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
+    norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
+    a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
+    self.actor_loss_tf += a_bound_loss

-        actor_vars = self._tf_vars('main/actor')
-        actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
-        self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
-        self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
-        
-        return
+    if (actor_weight_decay != 0):
+      self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')

-    def _decide_action(self, s, g):
-        with self.sess.as_default(), self.graph.as_default():
-            self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
-            #print("_decide_action._exp_action=",self._exp_action)
-            a, logp = self._eval_actor(s, g, self._exp_action)
-        return a[0], logp[0]
+    # for debugging
+    self.clip_frac_tf = tf.reduce_mean(
+        tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))

-    def _eval_actor(self, s, g, enable_exp):
-        s = np.reshape(s, [-1, self.get_state_size()])
-        g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
-          
-        feed = {
-            self.s_tf : s,
-            self.g_tf : g,
-            self.exp_mask_tf: np.array([1 if enable_exp else 0])
-        }
+    return

-        a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
-        return a, logp
+  def _build_solvers(self, json_data):
+    actor_stepsize = 0.001 if (
+        self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
+    actor_momentum = 0.9 if (
+        self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
+    critic_stepsize = 0.01 if (
+        self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
+    critic_momentum = 0.9 if (
+        self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]

-    def _train_step(self):
-        adv_eps = 1e-5
+    critic_vars = self._tf_vars('main/critic')
+    critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
+                                            momentum=critic_momentum)
+    self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
+    self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)

-        start_idx = self.replay_buffer.buffer_tail
-        end_idx = self.replay_buffer.buffer_head
-        assert(start_idx == 0)
-        assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
-        assert(start_idx < end_idx)
+    self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32,
+                                              name='actor_stepsize',
+                                              initializer=actor_stepsize,
+                                              trainable=False)
+    self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
+    self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)

-        idx = np.array(list(range(start_idx, end_idx)))        
-        end_mask = self.replay_buffer.is_path_end(idx)
-        end_mask = np.logical_not(end_mask) 
-        
-        vals = self._compute_batch_vals(start_idx, end_idx)
-        new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
+    actor_vars = self._tf_vars('main/actor')
+    actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf,
+                                           momentum=actor_momentum)
+    self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
+    self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)

-        valid_idx = idx[end_mask]
-        exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
-        num_valid_idx = valid_idx.shape[0]
-        num_exp_idx = exp_idx.shape[0]
-        exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
-        
-        local_sample_count = valid_idx.size
-        global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
-        mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
-        
-        adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
-        new_vals = np.clip(new_vals, self.val_min, self.val_max)
+    return

-        adv_mean = np.mean(adv)
-        adv_std = np.std(adv)
-        adv = (adv - adv_mean) / (adv_std + adv_eps)
-        adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
+  def _decide_action(self, s, g):
+    with self.sess.as_default(), self.graph.as_default():
+      self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(
+          self.exp_params_curr.rate)
+      #print("_decide_action._exp_action=",self._exp_action)
+      a, logp = self._eval_actor(s, g, self._exp_action)
+    return a[0], logp[0]

-        critic_loss = 0
-        actor_loss = 0
-        actor_clip_frac = 0
+  def _eval_actor(self, s, g, enable_exp):
+    s = np.reshape(s, [-1, self.get_state_size()])
+    g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None

-        for e in range(self.epochs):
-            np.random.shuffle(valid_idx)
-            np.random.shuffle(exp_idx)
+    feed = {self.s_tf: s, self.g_tf: g, self.exp_mask_tf: np.array([1 if enable_exp else 0])}

-            for b in range(mini_batches):
-                batch_idx_beg = b * self._local_mini_batch_size
-                batch_idx_end = batch_idx_beg + self._local_mini_batch_size
+    a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
+    return a, logp

-                critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
-                actor_batch = critic_batch.copy()
-                critic_batch = np.mod(critic_batch, num_valid_idx)
-                actor_batch = np.mod(actor_batch, num_exp_idx)
-                shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
+  def _train_step(self):
+    adv_eps = 1e-5

-                critic_batch = valid_idx[critic_batch]
-                actor_batch = exp_idx[actor_batch]
-                critic_batch_vals = new_vals[critic_batch]
-                actor_batch_adv = adv[actor_batch[:,1]]
+    start_idx = self.replay_buffer.buffer_tail
+    end_idx = self.replay_buffer.buffer_head
+    assert (start_idx == 0)
+    assert (self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size
+           )  # must avoid overflow
+    assert (start_idx < end_idx)

-                critic_s = self.replay_buffer.get('states', critic_batch)
-                critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
-                curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
+    idx = np.array(list(range(start_idx, end_idx)))
+    end_mask = self.replay_buffer.is_path_end(idx)
+    end_mask = np.logical_not(end_mask)

-                actor_s = self.replay_buffer.get("states", actor_batch[:,0])
-                actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
-                actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
-                actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
-                curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
-                
-                critic_loss += curr_critic_loss
-                actor_loss += np.abs(curr_actor_loss)
-                actor_clip_frac += curr_actor_clip_frac
+    vals = self._compute_batch_vals(start_idx, end_idx)
+    new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)

-                if (shuffle_actor):
-                    np.random.shuffle(exp_idx)
+    valid_idx = idx[end_mask]
+    exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
+    num_valid_idx = valid_idx.shape[0]
+    num_exp_idx = exp_idx.shape[0]
+    exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])

-        total_batches = mini_batches * self.epochs
-        critic_loss /= total_batches
-        actor_loss /= total_batches
-        actor_clip_frac /= total_batches
+    local_sample_count = valid_idx.size
+    global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
+    mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))

-        critic_loss = MPIUtil.reduce_avg(critic_loss)
-        actor_loss = MPIUtil.reduce_avg(actor_loss)
-        actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
+    adv = new_vals[exp_idx[:, 0]] - vals[exp_idx[:, 0]]
+    new_vals = np.clip(new_vals, self.val_min, self.val_max)

-        critic_stepsize = self.critic_solver.get_stepsize()
-        actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
+    adv_mean = np.mean(adv)
+    adv_std = np.std(adv)
+    adv = (adv - adv_mean) / (adv_std + adv_eps)
+    adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)

-        self.logger.log_tabular('Critic_Loss', critic_loss)
-        self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
-        self.logger.log_tabular('Actor_Loss', actor_loss) 
-        self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
-        self.logger.log_tabular('Clip_Frac', actor_clip_frac)
-        self.logger.log_tabular('Adv_Mean', adv_mean)
-        self.logger.log_tabular('Adv_Std', adv_std)
+    critic_loss = 0
+    actor_loss = 0
+    actor_clip_frac = 0

-        self.replay_buffer.clear()
+    for e in range(self.epochs):
+      np.random.shuffle(valid_idx)
+      np.random.shuffle(exp_idx)

-        return
+      for b in range(mini_batches):
+        batch_idx_beg = b * self._local_mini_batch_size
+        batch_idx_end = batch_idx_beg + self._local_mini_batch_size

-    def _get_iters_per_update(self):
-        return 1
+        critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
+        actor_batch = critic_batch.copy()
+        critic_batch = np.mod(critic_batch, num_valid_idx)
+        actor_batch = np.mod(actor_batch, num_exp_idx)
+        shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)

-    def _valid_train_step(self):
-        samples = self.replay_buffer.get_current_size()
-        exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
-        global_sample_count = int(MPIUtil.reduce_sum(samples))
-        global_exp_min = int(MPIUtil.reduce_min(exp_samples))
-        return (global_sample_count > self.batch_size) and (global_exp_min > 0)
+        critic_batch = valid_idx[critic_batch]
+        actor_batch = exp_idx[actor_batch]
+        critic_batch_vals = new_vals[critic_batch]
+        actor_batch_adv = adv[actor_batch[:, 1]]

-    def _compute_batch_vals(self, start_idx, end_idx):
-        states = self.replay_buffer.get_all("states")[start_idx:end_idx]
-        goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
-        
-        idx = np.array(list(range(start_idx, end_idx)))        
-        is_end = self.replay_buffer.is_path_end(idx)
-        is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
-        is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
-        is_fail = np.logical_and(is_end, is_fail) 
-        is_succ = np.logical_and(is_end, is_succ) 
+        critic_s = self.replay_buffer.get('states', critic_batch)
+        critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
+        curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)

-        vals = self._eval_critic(states, goals)
-        vals[is_fail] = self.val_fail
-        vals[is_succ] = self.val_succ
+        actor_s = self.replay_buffer.get("states", actor_batch[:, 0])
+        actor_g = self.replay_buffer.get("goals", actor_batch[:, 0]) if self.has_goal() else None
+        actor_a = self.replay_buffer.get("actions", actor_batch[:, 0])
+        actor_logp = self.replay_buffer.get("logps", actor_batch[:, 0])
+        curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a,
+                                                                   actor_logp, actor_batch_adv)

-        return vals
+        critic_loss += curr_critic_loss
+        actor_loss += np.abs(curr_actor_loss)
+        actor_clip_frac += curr_actor_clip_frac

-    def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
-        rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
+        if (shuffle_actor):
+          np.random.shuffle(exp_idx)

-        if self.discount == 0:
-            new_vals = rewards.copy()
+    total_batches = mini_batches * self.epochs
+    critic_loss /= total_batches
+    actor_loss /= total_batches
+    actor_clip_frac /= total_batches
+
+    critic_loss = MPIUtil.reduce_avg(critic_loss)
+    actor_loss = MPIUtil.reduce_avg(actor_loss)
+    actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
+
+    critic_stepsize = self.critic_solver.get_stepsize()
+    actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
+
+    self.logger.log_tabular('Critic_Loss', critic_loss)
+    self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
+    self.logger.log_tabular('Actor_Loss', actor_loss)
+    self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
+    self.logger.log_tabular('Clip_Frac', actor_clip_frac)
+    self.logger.log_tabular('Adv_Mean', adv_mean)
+    self.logger.log_tabular('Adv_Std', adv_std)
+
+    self.replay_buffer.clear()
+
+    return
+
+  def _get_iters_per_update(self):
+    return 1
+
+  def _valid_train_step(self):
+    samples = self.replay_buffer.get_current_size()
+    exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
+    global_sample_count = int(MPIUtil.reduce_sum(samples))
+    global_exp_min = int(MPIUtil.reduce_min(exp_samples))
+    return (global_sample_count > self.batch_size) and (global_exp_min > 0)
+
+  def _compute_batch_vals(self, start_idx, end_idx):
+    states = self.replay_buffer.get_all("states")[start_idx:end_idx]
+    goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
+
+    idx = np.array(list(range(start_idx, end_idx)))
+    is_end = self.replay_buffer.is_path_end(idx)
+    is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
+    is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
+    is_fail = np.logical_and(is_end, is_fail)
+    is_succ = np.logical_and(is_end, is_succ)
+
+    vals = self._eval_critic(states, goals)
+    vals[is_fail] = self.val_fail
+    vals[is_succ] = self.val_succ
+
+    return vals
+
+  def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
+    rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
+
+    if self.discount == 0:
+      new_vals = rewards.copy()
+    else:
+      new_vals = np.zeros_like(val_buffer)
+
+      curr_idx = start_idx
+      while curr_idx < end_idx:
+        idx0 = curr_idx - start_idx
+        idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
+        r = rewards[idx0:idx1]
+        v = val_buffer[idx0:(idx1 + 1)]
+
+        new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
+        curr_idx = idx1 + start_idx + 1
+
+    return new_vals
+
+  def _update_critic(self, s, g, tar_vals):
+    feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_vals}
+
+    loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
+    self.critic_solver.update(grads)
+    return loss
+
+  def _update_actor(self, s, g, a, logp, adv):
+    feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv, self.old_logp_tf: logp}
+
+    loss, grads, clip_frac = self.sess.run(
+        [self.actor_loss_tf, self.actor_grad_tf, self.clip_frac_tf], feed)
+    self.actor_solver.update(grads)
+
+    return loss, clip_frac
+
+  def update_actor_stepsize(self, clip_frac):
+    clip_tol = 1.5
+    step_scale = 2
+    max_stepsize = 1e-2
+    min_stepsize = 1e-8
+    warmup_iters = 5
+
+    actor_stepsize = self.actor_solver.get_stepsize()
+    if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
+      min_clip = self.tar_clip_frac / clip_tol
+      max_clip = self.tar_clip_frac * clip_tol
+      under_tol = clip_frac < min_clip
+      over_tol = clip_frac > max_clip
+
+      if (over_tol or under_tol):
+        if (over_tol):
+          actor_stepsize *= self.actor_stepsize_decay
        else:
-            new_vals = np.zeros_like(val_buffer)
+          actor_stepsize /= self.actor_stepsize_decay

-            curr_idx = start_idx
-            while curr_idx < end_idx:
-                idx0 = curr_idx - start_idx
-                idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
-                r = rewards[idx0:idx1]
-                v = val_buffer[idx0:(idx1 + 1)]
+        actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
+        self.set_actor_stepsize(actor_stepsize)

-                new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
-                curr_idx = idx1 + start_idx + 1
-        
-        return new_vals
+    return actor_stepsize

-    def _update_critic(self, s, g, tar_vals):
-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.tar_val_tf: tar_vals
-        }
-
-        loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
-        self.critic_solver.update(grads)
-        return loss
-    
-    def _update_actor(self, s, g, a, logp, adv):
-        feed = {
-            self.s_tf: s,
-            self.g_tf: g,
-            self.a_tf: a,
-            self.adv_tf: adv,
-            self.old_logp_tf: logp
-        }
-
-        loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
-                                                        self.clip_frac_tf], feed)
-        self.actor_solver.update(grads)
-
-        return loss, clip_frac
-
-    def update_actor_stepsize(self, clip_frac):
-        clip_tol = 1.5
-        step_scale = 2
-        max_stepsize = 1e-2
-        min_stepsize = 1e-8
-        warmup_iters = 5
-
-        actor_stepsize = self.actor_solver.get_stepsize()
-        if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
-            min_clip = self.tar_clip_frac / clip_tol
-            max_clip = self.tar_clip_frac * clip_tol
-            under_tol = clip_frac < min_clip
-            over_tol = clip_frac > max_clip
-
-            if (over_tol or under_tol):
-                if (over_tol):
-                    actor_stepsize *= self.actor_stepsize_decay
-                else:
-                    actor_stepsize /= self.actor_stepsize_decay
-
-                actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
-                self.set_actor_stepsize(actor_stepsize)
-
-        return actor_stepsize
-
-    def set_actor_stepsize(self, stepsize):
-        feed = {
-            self._actor_stepsize_ph: stepsize,
-        }
-        self.sess.run(self._actor_stepsize_update_op, feed)
-        return
+  def set_actor_stepsize(self, stepsize):
+    feed = {
+        self._actor_stepsize_ph: stepsize,
+    }
+    self.sess.run(self._actor_stepsize_update_op, feed)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py
@@ -5,347 +5,353 @@ import inspect as inspect
 from pybullet_envs.deep_mimic.env.env import Env
 import pybullet_utils.math_util as MathUtil

+
 class ReplayBuffer(object):
-    TERMINATE_KEY = 'terminate'
-    PATH_START_KEY = 'path_start'
-    PATH_END_KEY = 'path_end'
+  TERMINATE_KEY = 'terminate'
+  PATH_START_KEY = 'path_start'
+  PATH_END_KEY = 'path_end'

-    def __init__(self, buffer_size):
-        assert buffer_size > 0
+  def __init__(self, buffer_size):
+    assert buffer_size > 0

-        self.buffer_size = buffer_size
-        self.total_count = 0
-        self.buffer_head = 0
-        self.buffer_tail = MathUtil.INVALID_IDX
-        self.num_paths = 0
-        self._sample_buffers = dict()
-        self.buffers = None
+    self.buffer_size = buffer_size
+    self.total_count = 0
+    self.buffer_head = 0
+    self.buffer_tail = MathUtil.INVALID_IDX
+    self.num_paths = 0
+    self._sample_buffers = dict()
+    self.buffers = None

-        self.clear()
-        return
+    self.clear()
+    return

-    def sample(self, n):
-        curr_size = self.get_current_size()
-        assert curr_size > 0
+  def sample(self, n):
+    curr_size = self.get_current_size()
+    assert curr_size > 0

-        idx = np.empty(n, dtype=int)
-        # makes sure that the end states are not sampled
-        for i in range(n):
-            while True:
-                curr_idx = np.random.randint(0, curr_size, size=1)[0]
-                curr_idx += self.buffer_tail
-                curr_idx = np.mod(curr_idx, self.buffer_size)
+    idx = np.empty(n, dtype=int)
+    # makes sure that the end states are not sampled
+    for i in range(n):
+      while True:
+        curr_idx = np.random.randint(0, curr_size, size=1)[0]
+        curr_idx += self.buffer_tail
+        curr_idx = np.mod(curr_idx, self.buffer_size)

-                if not self.is_path_end(curr_idx):
-                    break
-            idx[i] = curr_idx
+        if not self.is_path_end(curr_idx):
+          break
+      idx[i] = curr_idx

-        return idx
+    return idx

-    def sample_filtered(self, n, key):
-        assert key in self._sample_buffers
-        curr_buffer = self._sample_buffers[key]
-        idx = curr_buffer.sample(n)
-        return idx
+  def sample_filtered(self, n, key):
+    assert key in self._sample_buffers
+    curr_buffer = self._sample_buffers[key]
+    idx = curr_buffer.sample(n)
+    return idx

-    def count_filtered(self, key):
-        curr_buffer = self._sample_buffers[key]
-        return curr_buffer.count
+  def count_filtered(self, key):
+    curr_buffer = self._sample_buffers[key]
+    return curr_buffer.count

-    def get(self, key, idx):
-        return self.buffers[key][idx]
+  def get(self, key, idx):
+    return self.buffers[key][idx]

-    def get_all(self, key):
-        return self.buffers[key]
+  def get_all(self, key):
+    return self.buffers[key]

-    def get_idx_filtered(self, key):
-        assert key in self._sample_buffers
-        curr_buffer = self._sample_buffers[key]
-        idx = curr_buffer.slot_to_idx[:curr_buffer.count]
-        return idx
-    
-    def get_path_start(self, idx):
-        return self.buffers[self.PATH_START_KEY][idx]
+  def get_idx_filtered(self, key):
+    assert key in self._sample_buffers
+    curr_buffer = self._sample_buffers[key]
+    idx = curr_buffer.slot_to_idx[:curr_buffer.count]
+    return idx

-    def get_path_end(self, idx):
-        return self.buffers[self.PATH_END_KEY][idx]
+  def get_path_start(self, idx):
+    return self.buffers[self.PATH_START_KEY][idx]

-    def get_pathlen(self, idx):
-        is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
-        if not is_array:
-            idx = [idx]
+  def get_path_end(self, idx):
+    return self.buffers[self.PATH_END_KEY][idx]

-        n = len(idx)
-        start_idx = self.get_path_start(idx)
-        end_idx = self.get_path_end(idx)
-        pathlen = np.empty(n, dtype=int)
+  def get_pathlen(self, idx):
+    is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
+    if not is_array:
+      idx = [idx]

-        for i in range(n):
-            curr_start = start_idx[i]
-            curr_end = end_idx[i]
-            if curr_start < curr_end:
-                curr_len = curr_end - curr_start
-            else:
-                curr_len = self.buffer_size - curr_start + curr_end
-            pathlen[i] = curr_len
+    n = len(idx)
+    start_idx = self.get_path_start(idx)
+    end_idx = self.get_path_end(idx)
+    pathlen = np.empty(n, dtype=int)

-        if not is_array:
-            pathlen = pathlen[0]
+    for i in range(n):
+      curr_start = start_idx[i]
+      curr_end = end_idx[i]
+      if curr_start < curr_end:
+        curr_len = curr_end - curr_start
+      else:
+        curr_len = self.buffer_size - curr_start + curr_end
+      pathlen[i] = curr_len

-        return pathlen
+    if not is_array:
+      pathlen = pathlen[0]

-    def is_valid_path(self, idx):
-        start_idx = self.get_path_start(idx)
-        valid = start_idx != MathUtil.INVALID_IDX
-        return valid
+    return pathlen

-    def store(self, path):
-        start_idx = MathUtil.INVALID_IDX
-        n = path.pathlength()
-        
-        if (n > 0):
-            assert path.is_valid()
+  def is_valid_path(self, idx):
+    start_idx = self.get_path_start(idx)
+    valid = start_idx != MathUtil.INVALID_IDX
+    return valid

-            if path.check_vals():
-                if self.buffers is None:
-                    self._init_buffers(path)
+  def store(self, path):
+    start_idx = MathUtil.INVALID_IDX
+    n = path.pathlength()

-                idx = self._request_idx(n + 1)
-                self._store_path(path, idx)
-                self._add_sample_buffers(idx)
+    if (n > 0):
+      assert path.is_valid()

-                self.num_paths += 1
-                self.total_count += n + 1
-                start_idx = idx[0]
-            else:
-                Logger.print2('Invalid path data value detected')
-        
-        return start_idx
+      if path.check_vals():
+        if self.buffers is None:
+          self._init_buffers(path)

-    def clear(self):
-        self.buffer_head = 0
-        self.buffer_tail = MathUtil.INVALID_IDX
-        self.num_paths = 0
+        idx = self._request_idx(n + 1)
+        self._store_path(path, idx)
+        self._add_sample_buffers(idx)

-        for key in self._sample_buffers:
-            self._sample_buffers[key].clear()
-        return
+        self.num_paths += 1
+        self.total_count += n + 1
+        start_idx = idx[0]
+      else:
+        Logger.print2('Invalid path data value detected')

-    def get_next_idx(self, idx):
-        next_idx = np.mod(idx + 1, self.buffer_size)
-        return next_idx
+    return start_idx

-    def is_terminal_state(self, idx):
-        terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
-        terminate = terminate_flags != Env.Terminate.Null.value
-        is_end = self.is_path_end(idx)
-        terminal_state = np.logical_and(terminate, is_end)
-        return terminal_state
+  def clear(self):
+    self.buffer_head = 0
+    self.buffer_tail = MathUtil.INVALID_IDX
+    self.num_paths = 0

-    def check_terminal_flag(self, idx, flag):
-        terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
-        terminate = terminate_flags == flag.value
-        return terminate
+    for key in self._sample_buffers:
+      self._sample_buffers[key].clear()
+    return

-    def is_path_end(self, idx):
-        is_end = self.buffers[self.PATH_END_KEY][idx] == idx
-        return is_end
+  def get_next_idx(self, idx):
+    next_idx = np.mod(idx + 1, self.buffer_size)
+    return next_idx

-    def add_filter_key(self, key):
-        assert self.get_current_size() == 0
-        if key not in self._sample_buffers:
-            self._sample_buffers[key] = SampleBuffer(self.buffer_size)
-        return
+  def is_terminal_state(self, idx):
+    terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
+    terminate = terminate_flags != Env.Terminate.Null.value
+    is_end = self.is_path_end(idx)
+    terminal_state = np.logical_and(terminate, is_end)
+    return terminal_state

-    def get_current_size(self):
-        if self.buffer_tail == MathUtil.INVALID_IDX:
-            return 0
-        elif self.buffer_tail < self.buffer_head:
-            return self.buffer_head - self.buffer_tail
+  def check_terminal_flag(self, idx, flag):
+    terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
+    terminate = terminate_flags == flag.value
+    return terminate
+
+  def is_path_end(self, idx):
+    is_end = self.buffers[self.PATH_END_KEY][idx] == idx
+    return is_end
+
+  def add_filter_key(self, key):
+    assert self.get_current_size() == 0
+    if key not in self._sample_buffers:
+      self._sample_buffers[key] = SampleBuffer(self.buffer_size)
+    return
+
+  def get_current_size(self):
+    if self.buffer_tail == MathUtil.INVALID_IDX:
+      return 0
+    elif self.buffer_tail < self.buffer_head:
+      return self.buffer_head - self.buffer_tail
+    else:
+      return self.buffer_size - self.buffer_tail + self.buffer_head
+
+  def _check_flags(self, key, flags):
+    return (flags & key) == key
+
+  def _add_sample_buffers(self, idx):
+    flags = self.buffers['flags']
+    for key in self._sample_buffers:
+      curr_buffer = self._sample_buffers[key]
+      filter_idx = [
+          i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))
+      ]
+      curr_buffer.add(filter_idx)
+    return
+
+  def _free_sample_buffers(self, idx):
+    for key in self._sample_buffers:
+      curr_buffer = self._sample_buffers[key]
+      curr_buffer.free(idx)
+    return
+
+  def _init_buffers(self, path):
+    self.buffers = dict()
+    self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
+    self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
+
+    for key in dir(path):
+      val = getattr(path, key)
+      if not key.startswith('__') and not inspect.ismethod(val):
+        if key == self.TERMINATE_KEY:
+          self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
        else:
-            return self.buffer_size - self.buffer_tail + self.buffer_head
+          val_type = type(val[0])
+          is_array = val_type == np.ndarray
+          if is_array:
+            shape = [self.buffer_size, val[0].shape[0]]
+            dtype = val[0].dtype
+          else:
+            shape = [self.buffer_size]
+            dtype = val_type

-    def _check_flags(self, key, flags):
-        return (flags & key) == key
+          self.buffers[key] = np.zeros(shape, dtype=dtype)
+    return

-    def _add_sample_buffers(self, idx):
-        flags = self.buffers['flags']
-        for key in self._sample_buffers:
-            curr_buffer = self._sample_buffers[key]
-            filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
-            curr_buffer.add(filter_idx)
-        return
+  def _request_idx(self, n):
+    assert n + 1 < self.buffer_size  # bad things can happen if path is too long

-    def _free_sample_buffers(self, idx):
-        for key in self._sample_buffers:
-            curr_buffer = self._sample_buffers[key]
-            curr_buffer.free(idx)    
-        return
+    remainder = n
+    idx = []

-    def _init_buffers(self, path):
-        self.buffers = dict()
-        self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
-        self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
+    start_idx = self.buffer_head
+    while remainder > 0:
+      end_idx = np.minimum(start_idx + remainder, self.buffer_size)
+      remainder -= (end_idx - start_idx)

-        for key in dir(path):
-            val = getattr(path, key)
-            if not key.startswith('__') and not inspect.ismethod(val):
-                if key == self.TERMINATE_KEY:
-                    self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
-                else:
-                    val_type = type(val[0])
-                    is_array = val_type == np.ndarray
-                    if is_array:
-                        shape = [self.buffer_size, val[0].shape[0]]
-                        dtype = val[0].dtype
-                    else:
-                        shape = [self.buffer_size]
-                        dtype = val_type
-                    
-                    self.buffers[key] = np.zeros(shape, dtype=dtype)
-        return
+      free_idx = list(range(start_idx, end_idx))
+      self._free_idx(free_idx)
+      idx += free_idx
+      start_idx = 0

-    def _request_idx(self, n):
-        assert n + 1 < self.buffer_size # bad things can happen if path is too long
+    self.buffer_head = (self.buffer_head + n) % self.buffer_size
+    return idx

-        remainder = n
-        idx = []
+  def _free_idx(self, idx):
+    assert (idx[0] <= idx[-1])
+    n = len(idx)
+    if self.buffer_tail != MathUtil.INVALID_IDX:
+      update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
+      update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or
+                                           idx[-1] >= self.buffer_tail)

-        start_idx = self.buffer_head
-        while remainder > 0:
-            end_idx = np.minimum(start_idx + remainder, self.buffer_size)
-            remainder -= (end_idx - start_idx)
+      if update_tail:
+        i = 0
+        while i < n:
+          curr_idx = idx[i]
+          if self.is_valid_path(curr_idx):
+            start_idx = self.get_path_start(curr_idx)
+            end_idx = self.get_path_end(curr_idx)
+            pathlen = self.get_pathlen(curr_idx)

-            free_idx = list(range(start_idx, end_idx))
-            self._free_idx(free_idx)
-            idx += free_idx
-            start_idx = 0
+            if start_idx < end_idx:
+              self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
+              self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
+            else:
+              self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
+              self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
+              self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
+              self._free_sample_buffers(list(range(0, end_idx + 1)))

-        self.buffer_head = (self.buffer_head + n) % self.buffer_size
-        return idx
+            self.num_paths -= 1
+            i += pathlen + 1
+            self.buffer_tail = (end_idx + 1) % self.buffer_size
+          else:
+            i += 1
+    else:
+      self.buffer_tail = idx[0]
+    return

-    def _free_idx(self, idx):
-        assert(idx[0] <= idx[-1])
-        n = len(idx)
-        if self.buffer_tail != MathUtil.INVALID_IDX:
-            update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
-            update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
-            
-            if update_tail:
-                i = 0
-                while i < n:
-                    curr_idx = idx[i]
-                    if self.is_valid_path(curr_idx):
-                        start_idx = self.get_path_start(curr_idx)
-                        end_idx = self.get_path_end(curr_idx)
-                        pathlen = self.get_pathlen(curr_idx)
+  def _store_path(self, path, idx):
+    n = path.pathlength()
+    for key, data in self.buffers.items():
+      if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
+        val = getattr(path, key)
+        val_len = len(val)
+        assert val_len == n or val_len == n + 1
+        data[idx[:val_len]] = val

-                        if start_idx < end_idx:
-                            self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
-                            self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
-                        else:
-                            self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
-                            self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
-                            self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
-                            self._free_sample_buffers(list(range(0, end_idx + 1)))
-                        
-                        self.num_paths -= 1
-                        i += pathlen + 1
-                        self.buffer_tail = (end_idx + 1) % self.buffer_size;
-                    else:
-                        i += 1
-        else:
-            self.buffer_tail = idx[0]
-        return
+    self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
+    self.buffers[self.PATH_START_KEY][idx] = idx[0]
+    self.buffers[self.PATH_END_KEY][idx] = idx[-1]
+    return

-    def _store_path(self, path, idx):
-        n = path.pathlength()
-        for key, data in self.buffers.items():
-            if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
-                val = getattr(path, key)
-                val_len = len(val)
-                assert val_len == n or val_len == n + 1
-                data[idx[:val_len]] = val
-
-        self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
-        self.buffers[self.PATH_START_KEY][idx] = idx[0]
-        self.buffers[self.PATH_END_KEY][idx] = idx[-1]
-        return

 class SampleBuffer(object):
-    def __init__(self, size):
-        self.idx_to_slot = np.empty(shape=[size], dtype=int)
-        self.slot_to_idx = np.empty(shape=[size], dtype=int)
-        self.count = 0
-        self.clear()
-        return
-    
-    def clear(self):
-        self.idx_to_slot.fill(MathUtil.INVALID_IDX)
-        self.slot_to_idx.fill(MathUtil.INVALID_IDX)
-        self.count = 0
-        return

-    def is_valid(self, idx):
-        return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
+  def __init__(self, size):
+    self.idx_to_slot = np.empty(shape=[size], dtype=int)
+    self.slot_to_idx = np.empty(shape=[size], dtype=int)
+    self.count = 0
+    self.clear()
+    return

-    def get_size(self):
-        return self.idx_to_slot.shape[0]
+  def clear(self):
+    self.idx_to_slot.fill(MathUtil.INVALID_IDX)
+    self.slot_to_idx.fill(MathUtil.INVALID_IDX)
+    self.count = 0
+    return

-    def add(self, idx):
-        for i in idx:
-            if not self.is_valid(i):
-                new_slot = self.count
-                assert new_slot >= 0
+  def is_valid(self, idx):
+    return self.idx_to_slot[idx] != MathUtil.INVALID_IDX

-                self.idx_to_slot[i] = new_slot
-                self.slot_to_idx[new_slot] = i
-                self.count += 1
-        return
+  def get_size(self):
+    return self.idx_to_slot.shape[0]

-    def free(self, idx):
-        for i in idx:
-            if self.is_valid(i):
-                slot = self.idx_to_slot[i]
-                last_slot = self.count - 1
-                last_idx = self.slot_to_idx[last_slot]
+  def add(self, idx):
+    for i in idx:
+      if not self.is_valid(i):
+        new_slot = self.count
+        assert new_slot >= 0

-                self.idx_to_slot[last_idx] = slot
-                self.slot_to_idx[slot] = last_idx
-                self.idx_to_slot[i] = MathUtil.INVALID_IDX
-                self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
-                self.count -= 1
-        return
+        self.idx_to_slot[i] = new_slot
+        self.slot_to_idx[new_slot] = i
+        self.count += 1
+    return

-    def sample(self, n):
-        if self.count > 0:
-            slots = np.random.randint(0, self.count, size=n)
-            idx = self.slot_to_idx[slots]
-        else:
-            idx = np.empty(shape=[0], dtype=int)
-        return idx
+  def free(self, idx):
+    for i in idx:
+      if self.is_valid(i):
+        slot = self.idx_to_slot[i]
+        last_slot = self.count - 1
+        last_idx = self.slot_to_idx[last_slot]

-    def check_consistency(self):
-        valid = True
-        if self.count < 0:
+        self.idx_to_slot[last_idx] = slot
+        self.slot_to_idx[slot] = last_idx
+        self.idx_to_slot[i] = MathUtil.INVALID_IDX
+        self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
+        self.count -= 1
+    return
+
+  def sample(self, n):
+    if self.count > 0:
+      slots = np.random.randint(0, self.count, size=n)
+      idx = self.slot_to_idx[slots]
+    else:
+      idx = np.empty(shape=[0], dtype=int)
+    return idx
+
+  def check_consistency(self):
+    valid = True
+    if self.count < 0:
+      valid = False
+
+    if valid:
+      for i in range(self.get_size()):
+        if self.is_valid(i):
+          s = self.idx_to_slot[i]
+          if self.slot_to_idx[s] != i:
            valid = False
+            break

-        if valid:
-            for i in range(self.get_size()):
-                if self.is_valid(i):
-                    s = self.idx_to_slot[i]
-                    if self.slot_to_idx[s] != i:
-                        valid = False
-                        break
+        s2i = self.slot_to_idx[i]
+        if s2i != MathUtil.INVALID_IDX:
+          i2s = self.idx_to_slot[s2i]
+          if i2s != i:
+            valid = False
+            break

-                s2i = self.slot_to_idx[i] 
-                if s2i != MathUtil.INVALID_IDX:
-                    i2s = self.idx_to_slot[s2i]
-                    if i2s != i:
-                        valid = False
-                        break
-
-        count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
-        count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
-        valid &= count0 == count1
-        return valid
+    count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
+    count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
+    valid &= count0 == count1
+    return valid
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py
@@ -1,18 +1,19 @@
 import numpy as np

+
 def compute_return(rewards, gamma, td_lambda, val_t):
-    # computes td-lambda return of path
-    path_len = len(rewards)
-    assert len(val_t) == path_len + 1
+  # computes td-lambda return of path
+  path_len = len(rewards)
+  assert len(val_t) == path_len + 1

-    return_t = np.zeros(path_len)
-    last_val = rewards[-1] + gamma * val_t[-1]
-    return_t[-1] = last_val
+  return_t = np.zeros(path_len)
+  last_val = rewards[-1] + gamma * val_t[-1]
+  return_t[-1] = last_val

-    for i in reversed(range(0, path_len - 1)):
-        curr_r = rewards[i]
-        next_ret = return_t[i + 1]
-        curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
-        return_t[i] = curr_val
-    
-    return return_t
+  for i in reversed(range(0, path_len - 1)):
+    curr_r = rewards[i]
+    next_ret = return_t[i + 1]
+    curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
+    return_t[i] = curr_val
+
+  return return_t
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
@@ -5,139 +5,140 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
 import pybullet_data

+
 class RLWorld(object):
-    def __init__(self, env, arg_parser):
-        TFUtil.disable_gpu()

-        self.env = env
-        self.arg_parser = arg_parser
-        self._enable_training = True
-        self.train_agents = []
-        self.parse_args(arg_parser)
+  def __init__(self, env, arg_parser):
+    TFUtil.disable_gpu()

-        self.build_agents()
-        
-        return
+    self.env = env
+    self.arg_parser = arg_parser
+    self._enable_training = True
+    self.train_agents = []
+    self.parse_args(arg_parser)

-    def get_enable_training(self):
-        return self._enable_training
-    
-    def set_enable_training(self, enable):
-        self._enable_training = enable
-        for i in range(len(self.agents)):
-            curr_agent = self.agents[i]
-            if curr_agent is not None:
-                enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
-                curr_agent.enable_training = self.enable_training and enable_curr_train
+    self.build_agents()

-        if (self._enable_training):
-            self.env.set_mode(RLAgent.Mode.TRAIN)
-        else:
-            self.env.set_mode(RLAgent.Mode.TEST)
+    return

-        return
+  def get_enable_training(self):
+    return self._enable_training

-    enable_training = property(get_enable_training, set_enable_training)
-    
-    def parse_args(self, arg_parser):
-        self.train_agents = self.arg_parser.parse_bools('train_agents')
-        num_agents = self.env.get_num_agents()
-        assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
+  def set_enable_training(self, enable):
+    self._enable_training = enable
+    for i in range(len(self.agents)):
+      curr_agent = self.agents[i]
+      if curr_agent is not None:
+        enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
+        curr_agent.enable_training = self.enable_training and enable_curr_train

-        return
+    if (self._enable_training):
+      self.env.set_mode(RLAgent.Mode.TRAIN)
+    else:
+      self.env.set_mode(RLAgent.Mode.TEST)

-    def shutdown(self):
-        self.env.shutdown()
-        return
+    return

-    def build_agents(self):
-        num_agents = self.env.get_num_agents()
-        print("num_agents=",num_agents)
-        self.agents = []
+  enable_training = property(get_enable_training, set_enable_training)

-        Logger.print2('')
-        Logger.print2('Num Agents: {:d}'.format(num_agents))
+  def parse_args(self, arg_parser):
+    self.train_agents = self.arg_parser.parse_bools('train_agents')
+    num_agents = self.env.get_num_agents()
+    assert (len(self.train_agents) == num_agents or len(self.train_agents) == 0)

-        agent_files = self.arg_parser.parse_strings('agent_files')
-        print("len(agent_files)=",len(agent_files))
-        assert(len(agent_files) == num_agents or len(agent_files) == 0)
+    return

-        model_files = self.arg_parser.parse_strings('model_files')
-        assert(len(model_files) == num_agents or len(model_files) == 0)
+  def shutdown(self):
+    self.env.shutdown()
+    return

-        output_path = self.arg_parser.parse_string('output_path')
-        int_output_path = self.arg_parser.parse_string('int_output_path')
+  def build_agents(self):
+    num_agents = self.env.get_num_agents()
+    print("num_agents=", num_agents)
+    self.agents = []

-        for i in range(num_agents):
-            curr_file = agent_files[i]
-            curr_agent = self._build_agent(i, curr_file)
+    Logger.print2('')
+    Logger.print2('Num Agents: {:d}'.format(num_agents))

-            if curr_agent is not None:
-                curr_agent.output_dir = output_path
-                curr_agent.int_output_dir = int_output_path
-                Logger.print2(str(curr_agent))
+    agent_files = self.arg_parser.parse_strings('agent_files')
+    print("len(agent_files)=", len(agent_files))
+    assert (len(agent_files) == num_agents or len(agent_files) == 0)

-                if (len(model_files) > 0):
-                    curr_model_file = model_files[i]
-                    if curr_model_file != 'none':
-                        curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
+    model_files = self.arg_parser.parse_strings('model_files')
+    assert (len(model_files) == num_agents or len(model_files) == 0)

-            self.agents.append(curr_agent)
-            Logger.print2('')
+    output_path = self.arg_parser.parse_string('output_path')
+    int_output_path = self.arg_parser.parse_string('int_output_path')

-        self.set_enable_training(self.enable_training)
+    for i in range(num_agents):
+      curr_file = agent_files[i]
+      curr_agent = self._build_agent(i, curr_file)

-        return
+      if curr_agent is not None:
+        curr_agent.output_dir = output_path
+        curr_agent.int_output_dir = int_output_path
+        Logger.print2(str(curr_agent))

-    def update(self, timestep):
-        #print("world update!\n")
-        self._update_agents(timestep)
-        self._update_env(timestep)
-        return
+        if (len(model_files) > 0):
+          curr_model_file = model_files[i]
+          if curr_model_file != 'none':
+            curr_agent.load_model(pybullet_data.getDataPath() + "/" + curr_model_file)

-    def reset(self):
-        self._reset_agents()
-        self._reset_env()
-        return
+      self.agents.append(curr_agent)
+      Logger.print2('')

-    def end_episode(self):
-        self._end_episode_agents();
-        return
+    self.set_enable_training(self.enable_training)

-    def _update_env(self, timestep):
-        self.env.update(timestep)
-        return
+    return

-    def _update_agents(self, timestep):
-        #print("len(agents)=",len(self.agents))
-        for agent in self.agents:
-            if (agent is not None):
-                agent.update(timestep)
-        return
+  def update(self, timestep):
+    #print("world update!\n")
+    self._update_agents(timestep)
+    self._update_env(timestep)
+    return

-    def _reset_env(self):
-        self.env.reset()
-        return
+  def reset(self):
+    self._reset_agents()
+    self._reset_env()
+    return

-    def _reset_agents(self):
-        for agent in self.agents:
-            if (agent != None):
-                agent.reset()
-        return
+  def end_episode(self):
+    self._end_episode_agents()
+    return

-    def _end_episode_agents(self):
-        for agent in self.agents:
-            if (agent != None):
-                agent.end_episode()
-        return
+  def _update_env(self, timestep):
+    self.env.update(timestep)
+    return

-    def _build_agent(self, id, agent_file):
-        Logger.print2('Agent {:d}: {}'.format(id, agent_file))
-        if (agent_file == 'none'):
-            agent = None
-        else:
-            agent = AgentBuilder.build_agent(self, id, agent_file)
-            assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
-        
-        return agent
-        
+  def _update_agents(self, timestep):
+    #print("len(agents)=",len(self.agents))
+    for agent in self.agents:
+      if (agent is not None):
+        agent.update(timestep)
+    return
+
+  def _reset_env(self):
+    self.env.reset()
+    return
+
+  def _reset_agents(self):
+    for agent in self.agents:
+      if (agent != None):
+        agent.reset()
+    return
+
+  def _end_episode_agents(self):
+    for agent in self.agents:
+      if (agent != None):
+        agent.end_episode()
+    return
+
+  def _build_agent(self, id, agent_file):
+    Logger.print2('Agent {:d}: {}'.format(id, agent_file))
+    if (agent_file == 'none'):
+      agent = None
+    else:
+      agent = AgentBuilder.build_agent(self, id, agent_file)
+      assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
+
+    return agent
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py
@@ -8,96 +8,97 @@ from pybullet_utils.logger import Logger

 from pybullet_envs.deep_mimic.learning.solvers.solver import Solver

+
 class MPISolver(Solver):
-    CHECK_SYNC_ITERS = 1000
+  CHECK_SYNC_ITERS = 1000

-    def __init__(self, sess, optimizer, vars):
-        super().__init__(vars)
-        self.sess = sess
-        self.optimizer = optimizer
-        self._build_grad_feed(vars)
-        self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
-        self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
-        self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
+  def __init__(self, sess, optimizer, vars):
+    super().__init__(vars)
+    self.sess = sess
+    self.optimizer = optimizer
+    self._build_grad_feed(vars)
+    self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
+    self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
+    self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)

-        self.iter = 0
-        grad_dim = self._calc_grad_dim()
-        self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
-        self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
-        
-        return
+    self.iter = 0
+    grad_dim = self._calc_grad_dim()
+    self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
+    self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)

-    def get_stepsize(self):
-        return self.optimizer._learning_rate_tensor.eval()
+    return

-    def update(self, grads=None, grad_scale=1.0):
-        if grads is not None:
-            self._flat_grad = MathUtil.flatten(grads)
-        else:
-            self._flat_grad.fill(0)
-        return self.update_flatgrad(self._flat_grad, grad_scale)
+  def get_stepsize(self):
+    return self.optimizer._learning_rate_tensor.eval()

-    def update_flatgrad(self, flat_grad, grad_scale=1.0):
-        if self.iter % self.CHECK_SYNC_ITERS == 0:
-            assert self.check_synced(), Logger.print2('Network parameters desynchronized')
-        
-        if grad_scale != 1.0:
-            flat_grad *= grad_scale
+  def update(self, grads=None, grad_scale=1.0):
+    if grads is not None:
+      self._flat_grad = MathUtil.flatten(grads)
+    else:
+      self._flat_grad.fill(0)
+    return self.update_flatgrad(self._flat_grad, grad_scale)

-        MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
-        self._global_flat_grad /= MPIUtil.get_num_procs()
+  def update_flatgrad(self, flat_grad, grad_scale=1.0):
+    if self.iter % self.CHECK_SYNC_ITERS == 0:
+      assert self.check_synced(), Logger.print2('Network parameters desynchronized')

-        self._load_flat_grad(self._global_flat_grad)
-        self.sess.run([self._update], self._grad_feed)
-        self.iter += 1
+    if grad_scale != 1.0:
+      flat_grad *= grad_scale

-        return
+    MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
+    self._global_flat_grad /= MPIUtil.get_num_procs()

-    def sync(self):
-        vars = self._get_flat_vars()
-        MPIUtil.bcast(vars)
-        self._set_flat_vars(vars)
-        return
+    self._load_flat_grad(self._global_flat_grad)
+    self.sess.run([self._update], self._grad_feed)
+    self.iter += 1

-    def check_synced(self):
-        synced = True
-        if self._is_root():
-            vars = self._get_flat_vars()
-            MPIUtil.bcast(vars)
-        else:
-            vars_local = self._get_flat_vars()
-            vars_root = np.empty_like(vars_local)
-            MPIUtil.bcast(vars_root)
-            synced = (vars_local == vars_root).all()
-        return synced
+    return

-    def _is_root(self):
-        return MPIUtil.is_root_proc()
-    
-    def _build_grad_feed(self, vars):
-        self._grad_tf_list = []
-        self._grad_buffers = []
-        for v in self.vars:
-            shape = v.get_shape()
-            grad = np.zeros(shape)
-            grad_tf = tf.placeholder(tf.float32, shape=shape)
-            self._grad_buffers.append(grad)
-            self._grad_tf_list.append(grad_tf)
+  def sync(self):
+    vars = self._get_flat_vars()
+    MPIUtil.bcast(vars)
+    self._set_flat_vars(vars)
+    return

-        self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
-        
-        return
+  def check_synced(self):
+    synced = True
+    if self._is_root():
+      vars = self._get_flat_vars()
+      MPIUtil.bcast(vars)
+    else:
+      vars_local = self._get_flat_vars()
+      vars_root = np.empty_like(vars_local)
+      MPIUtil.bcast(vars_root)
+      synced = (vars_local == vars_root).all()
+    return synced

-    def _calc_grad_dim(self):
-        grad_dim = 0
-        for grad in self._grad_buffers:
-            grad_dim += grad.size
-        return grad_dim
+  def _is_root(self):
+    return MPIUtil.is_root_proc()

-    def _load_flat_grad(self, flat_grad):
-        start = 0
-        for g in self._grad_buffers:
-            size = g.size
-            np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
-            start += size
-        return
+  def _build_grad_feed(self, vars):
+    self._grad_tf_list = []
+    self._grad_buffers = []
+    for v in self.vars:
+      shape = v.get_shape()
+      grad = np.zeros(shape)
+      grad_tf = tf.placeholder(tf.float32, shape=shape)
+      self._grad_buffers.append(grad)
+      self._grad_tf_list.append(grad_tf)
+
+    self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
+
+    return
+
+  def _calc_grad_dim(self):
+    grad_dim = 0
+    for grad in self._grad_buffers:
+      grad_dim += grad.size
+    return grad_dim
+
+  def _load_flat_grad(self, flat_grad):
+    start = 0
+    for g in self._grad_buffers:
+      size = g.size
+      np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
+      start += size
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py
@@ -1,15 +1,17 @@
 from abc import abstractmethod
 import sys, abc
 if sys.version_info >= (3, 4):
-    ABC = abc.ABC
+  ABC = abc.ABC
 else:
-    ABC = abc.ABCMeta('ABC', (), {})
+  ABC = abc.ABCMeta('ABC', (), {})
+

 class Solver(ABC):
-    def __init__(self, vars):
-        self.vars = vars
-        return

-    @abstractmethod
-    def update(self, grads):
-        pass
+  def __init__(self, vars):
+    self.vars = vars
+    return
+
+  @abstractmethod
+  def update(self, grads):
+    pass
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py
@@ -6,144 +6,148 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
 from pybullet_envs.deep_mimic.learning.tf_normalizer import TFNormalizer

+
 class TFAgent(RLAgent):
-    RESOURCE_SCOPE = 'resource'
-    SOLVER_SCOPE = 'solvers'
+  RESOURCE_SCOPE = 'resource'
+  SOLVER_SCOPE = 'solvers'

-    def __init__(self, world, id, json_data):
-        self.tf_scope = 'agent'
-        self.graph = tf.Graph()
-        self.sess = tf.Session(graph=self.graph)
+  def __init__(self, world, id, json_data):
+    self.tf_scope = 'agent'
+    self.graph = tf.Graph()
+    self.sess = tf.Session(graph=self.graph)

-        super().__init__(world, id, json_data)
-        self._build_graph(json_data)
-        self._init_normalizers()
-        return
+    super().__init__(world, id, json_data)
+    self._build_graph(json_data)
+    self._init_normalizers()
+    return

-    def __del__(self):
-        self.sess.close()
-        return
+  def __del__(self):
+    self.sess.close()
+    return

-    def save_model(self, out_path):
-        with self.sess.as_default(), self.graph.as_default():
-            try:
-                save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
-                Logger.print2('Model saved to: ' + save_path)
-            except:
-                Logger.print2("Failed to save model to: " + save_path)
-        return
+  def save_model(self, out_path):
+    with self.sess.as_default(), self.graph.as_default():
+      try:
+        save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
+        Logger.print2('Model saved to: ' + save_path)
+      except:
+        Logger.print2("Failed to save model to: " + save_path)
+    return

-    def load_model(self, in_path):
-        with self.sess.as_default(), self.graph.as_default():
-            self.saver.restore(self.sess, in_path)
-            self._load_normalizers()
-            Logger.print2('Model loaded from: ' + in_path)
-        return
+  def load_model(self, in_path):
+    with self.sess.as_default(), self.graph.as_default():
+      self.saver.restore(self.sess, in_path)
+      self._load_normalizers()
+      Logger.print2('Model loaded from: ' + in_path)
+    return

-    def _get_output_path(self):
-        assert(self.output_dir != '')
-        file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
-        return file_path
+  def _get_output_path(self):
+    assert (self.output_dir != '')
+    file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
+    return file_path

-    def _get_int_output_path(self):
-        assert(self.int_output_dir != '')
-        file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
-        return file_path
+  def _get_int_output_path(self):
+    assert (self.int_output_dir != '')
+    file_path = self.int_output_dir + (
+        '/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
+    return file_path

-    def _build_graph(self, json_data):
-        with self.sess.as_default(), self.graph.as_default():
-            with tf.variable_scope(self.tf_scope):
-                self._build_nets(json_data)
-                
-                with tf.variable_scope(self.SOLVER_SCOPE):
-                    self._build_losses(json_data)
-                    self._build_solvers(json_data)
+  def _build_graph(self, json_data):
+    with self.sess.as_default(), self.graph.as_default():
+      with tf.variable_scope(self.tf_scope):
+        self._build_nets(json_data)

-                self._initialize_vars()
-                self._build_saver()
-        return
+        with tf.variable_scope(self.SOLVER_SCOPE):
+          self._build_losses(json_data)
+          self._build_solvers(json_data)

-    def _init_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default():
-            # update normalizers to sync the tensorflow tensors
-            self.s_norm.update()
-            self.g_norm.update()
-            self.a_norm.update()
-        return
+        self._initialize_vars()
+        self._build_saver()
+    return

-    @abstractmethod
-    def _build_nets(self, json_data):
-        pass
+  def _init_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default():
+      # update normalizers to sync the tensorflow tensors
+      self.s_norm.update()
+      self.g_norm.update()
+      self.a_norm.update()
+    return

-    @abstractmethod
-    def _build_losses(self, json_data):
-        pass
+  @abstractmethod
+  def _build_nets(self, json_data):
+    pass

-    @abstractmethod
-    def _build_solvers(self, json_data):
-        pass
+  @abstractmethod
+  def _build_losses(self, json_data):
+    pass

-    def _tf_vars(self, scope=''):
-        with self.sess.as_default(), self.graph.as_default():
-            res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
-            assert len(res) > 0
-        return res
+  @abstractmethod
+  def _build_solvers(self, json_data):
+    pass

-    def _build_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
-            with tf.variable_scope(self.RESOURCE_SCOPE):
-                self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
-                state_offset = -self.world.env.build_state_offset(self.id)
-                print("state_offset=",state_offset)
-                state_scale = 1 / self.world.env.build_state_scale(self.id)
-                print("state_scale=",state_scale)
-                self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id), 
-                                         1 / self.world.env.build_state_scale(self.id))
-                
-                self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
-                self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id), 
-                                         1 / self.world.env.build_goal_scale(self.id))
+  def _tf_vars(self, scope=''):
+    with self.sess.as_default(), self.graph.as_default():
+      res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
+      assert len(res) > 0
+    return res

-                self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
-                self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id), 
-                                         1 / self.world.env.build_action_scale(self.id))
-        return
+  def _build_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
+      with tf.variable_scope(self.RESOURCE_SCOPE):
+        self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(),
+                                   self.world.env.build_state_norm_groups(self.id))
+        state_offset = -self.world.env.build_state_offset(self.id)
+        print("state_offset=", state_offset)
+        state_scale = 1 / self.world.env.build_state_scale(self.id)
+        print("state_scale=", state_scale)
+        self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
+                                 1 / self.world.env.build_state_scale(self.id))

-    def _load_normalizers(self):
-        self.s_norm.load()
-        self.g_norm.load()
-        self.a_norm.load()
-        return
+        self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(),
+                                   self.world.env.build_goal_norm_groups(self.id))
+        self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
+                                 1 / self.world.env.build_goal_scale(self.id))

-    def _update_normalizers(self):
-        with self.sess.as_default(), self.graph.as_default():
-            super()._update_normalizers()
-        return
+        self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
+        self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
+                                 1 / self.world.env.build_action_scale(self.id))
+    return

-    def _initialize_vars(self):
-        self.sess.run(tf.global_variables_initializer())
-        return
+  def _load_normalizers(self):
+    self.s_norm.load()
+    self.g_norm.load()
+    self.a_norm.load()
+    return

-    def _build_saver(self):
-        vars = self._get_saver_vars()
-        self.saver = tf.train.Saver(vars, max_to_keep=0)
-        return
+  def _update_normalizers(self):
+    with self.sess.as_default(), self.graph.as_default():
+      super()._update_normalizers()
+    return

-    def _get_saver_vars(self):
-        with self.sess.as_default(), self.graph.as_default():
-            vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
-            vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
-            #vars = [v for v in vars if '/target/' not in v.name]
-            assert len(vars) > 0
-        return vars
-    
-    def _weight_decay_loss(self, scope):
-        vars = self._tf_vars(scope)
-        vars_no_bias = [v for v in vars if 'bias' not in v.name]
-        loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
-        return loss
+  def _initialize_vars(self):
+    self.sess.run(tf.global_variables_initializer())
+    return

-    def _train(self):
-        with self.sess.as_default(), self.graph.as_default():
-            super()._train()
-        return
+  def _build_saver(self):
+    vars = self._get_saver_vars()
+    self.saver = tf.train.Saver(vars, max_to_keep=0)
+    return
+
+  def _get_saver_vars(self):
+    with self.sess.as_default(), self.graph.as_default():
+      vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
+      vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
+      #vars = [v for v in vars if '/target/' not in v.name]
+      assert len(vars) > 0
+    return vars
+
+  def _weight_decay_loss(self, scope):
+    vars = self._tf_vars(scope)
+    vars_no_bias = [v for v in vars if 'bias' not in v.name]
+    loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
+    return loss
+
+  def _train(self):
+    with self.sess.as_default(), self.graph.as_default():
+      super()._train()
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py
@@ -3,65 +3,72 @@ import copy
 import tensorflow as tf
 from pybullet_envs.deep_mimic.learning.normalizer import Normalizer

+
 class TFNormalizer(Normalizer):

-    def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
-        self.sess = sess
-        self.scope = scope
-        super().__init__(size, groups_ids, eps, clip)
+  def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
+    self.sess = sess
+    self.scope = scope
+    super().__init__(size, groups_ids, eps, clip)

-        with tf.variable_scope(self.scope):
-            self._build_resource_tf()
-        return
+    with tf.variable_scope(self.scope):
+      self._build_resource_tf()
+    return

-    # initialze count when loading saved values so that things don't change to quickly during updates
-    def load(self):
-        self.count = self.count_tf.eval()[0]
-        self.mean = self.mean_tf.eval()
-        self.std = self.std_tf.eval()
-        self.mean_sq = self.calc_mean_sq(self.mean, self.std)
-        return
+  # initialze count when loading saved values so that things don't change to quickly during updates
+  def load(self):
+    self.count = self.count_tf.eval()[0]
+    self.mean = self.mean_tf.eval()
+    self.std = self.std_tf.eval()
+    self.mean_sq = self.calc_mean_sq(self.mean, self.std)
+    return

-    def update(self):
-        super().update()
-        self._update_resource_tf()
-        return
+  def update(self):
+    super().update()
+    self._update_resource_tf()
+    return

-    def set_mean_std(self, mean, std):
-        super().set_mean_std(mean, std)
-        self._update_resource_tf()
-        return
+  def set_mean_std(self, mean, std):
+    super().set_mean_std(mean, std)
+    self._update_resource_tf()
+    return

-    def normalize_tf(self, x):
-        norm_x = (x - self.mean_tf) / self.std_tf
-        norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
-        return norm_x
+  def normalize_tf(self, x):
+    norm_x = (x - self.mean_tf) / self.std_tf
+    norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
+    return norm_x

-    def unnormalize_tf(self, norm_x):
-        x = norm_x * self.std_tf + self.mean_tf
-        return x
-    
-    def _build_resource_tf(self):
-        self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
-        self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
-        self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
-        
-        self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
-        self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
-        self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
-        
-        self._update_op = tf.group(
-            self.count_tf.assign(self.count_ph),
-            self.mean_tf.assign(self.mean_ph),
-            self.std_tf.assign(self.std_ph)
-        )
-        return
+  def unnormalize_tf(self, norm_x):
+    x = norm_x * self.std_tf + self.mean_tf
+    return x

-    def _update_resource_tf(self):
-        feed = {
-            self.count_ph: np.array([self.count], dtype=np.int32),
-            self.mean_ph: self.mean,
-            self.std_ph: self.std
-        }
-        self.sess.run(self._update_op, feed_dict=feed)
-        return
+  def _build_resource_tf(self):
+    self.count_tf = tf.get_variable(dtype=tf.int32,
+                                    name='count',
+                                    initializer=np.array([self.count], dtype=np.int32),
+                                    trainable=False)
+    self.mean_tf = tf.get_variable(dtype=tf.float32,
+                                   name='mean',
+                                   initializer=self.mean.astype(np.float32),
+                                   trainable=False)
+    self.std_tf = tf.get_variable(dtype=tf.float32,
+                                  name='std',
+                                  initializer=self.std.astype(np.float32),
+                                  trainable=False)
+
+    self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
+    self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
+    self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
+
+    self._update_op = tf.group(self.count_tf.assign(self.count_ph),
+                               self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
+    return
+
+  def _update_resource_tf(self):
+    feed = {
+        self.count_ph: np.array([self.count], dtype=np.int32),
+        self.mean_ph: self.mean,
+        self.std_ph: self.std
+    }
+    self.sess.run(self._update_op, feed_dict=feed)
+    return
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py
@@ -4,101 +4,116 @@ import os

 xavier_initializer = tf.contrib.layers.xavier_initializer()

+
 def disable_gpu():
-    os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
-    return
+  os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
+  return
+

 def var_shape(x):
-    out = [k.value for k in x.get_shape()]
-    assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
-    return out
+  out = [k.value for k in x.get_shape()]
+  assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
+  return out
+

 def intprod(x):
-    return int(np.prod(x))
+  return int(np.prod(x))
+

 def numel(x):
-    n = intprod(var_shape(x))
-    return n
+  n = intprod(var_shape(x))
+  return n
+

 def flat_grad(loss, var_list, grad_ys=None):
-    grads = tf.gradients(loss, var_list, grad_ys)
-    return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
+  grads = tf.gradients(loss, var_list, grad_ys)
+  return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)

-def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
-    curr_tf = input
-    for i, size in enumerate(layers_sizes):
-        with tf.variable_scope(str(i), reuse=reuse):
-            curr_tf = tf.layers.dense(inputs=curr_tf,
-                                    units=size,
-                                    kernel_initializer=xavier_initializer,
-                                    activation = activation if i < len(layers_sizes)-1 else None)
-    if flatten:
-        assert layers_sizes[-1] == 1
-        curr_tf = tf.reshape(curr_tf, [-1])

-    return curr_tf
+def fc_net(input, layers_sizes, activation, reuse=None,
+           flatten=False):  # build fully connected network
+  curr_tf = input
+  for i, size in enumerate(layers_sizes):
+    with tf.variable_scope(str(i), reuse=reuse):
+      curr_tf = tf.layers.dense(inputs=curr_tf,
+                                units=size,
+                                kernel_initializer=xavier_initializer,
+                                activation=activation if i < len(layers_sizes) - 1 else None)
+  if flatten:
+    assert layers_sizes[-1] == 1
+    curr_tf = tf.reshape(curr_tf, [-1])
+
+  return curr_tf
+

 def copy(sess, src, dst):
-    assert len(src) == len(dst)
-    sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
-    return
+  assert len(src) == len(dst)
+  sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
+  return
+

 def flat_grad(loss, var_list):
-    grads = tf.gradients(loss, var_list)
-    return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
-        for (v, grad) in zip(var_list, grads)])
+  grads = tf.gradients(loss, var_list)
+  return tf.concat(axis=0,
+                   values=[tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)])


 def calc_logp_gaussian(x_tf, mean_tf, std_tf):
-    dim = tf.to_float(tf.shape(x_tf)[-1])
+  dim = tf.to_float(tf.shape(x_tf)[-1])

-    if mean_tf is None:
-        diff_tf = x_tf
-    else:
-        diff_tf = x_tf - mean_tf
+  if mean_tf is None:
+    diff_tf = x_tf
+  else:
+    diff_tf = x_tf - mean_tf
+
+  logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
+  logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
+
+  return logp_tf

-    logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
-    logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
-    
-    return logp_tf

 def calc_bound_loss(x_tf, bound_min, bound_max):
-    # penalty for violating bounds
-    violation_min = tf.minimum(x_tf - bound_min, 0)
-    violation_max = tf.maximum(x_tf - bound_max, 0)
-    violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
-    loss = 0.5 * tf.reduce_mean(violation)
-    return loss
+  # penalty for violating bounds
+  violation_min = tf.minimum(x_tf - bound_min, 0)
+  violation_max = tf.maximum(x_tf - bound_max, 0)
+  violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(
+      tf.square(violation_max), axis=-1)
+  loss = 0.5 * tf.reduce_mean(violation)
+  return loss
+

 class SetFromFlat(object):
-    def __init__(self, sess, var_list, dtype=tf.float32):
-        assigns = []
-        shapes = list(map(var_shape, var_list))
-        total_size = np.sum([intprod(shape) for shape in shapes])

-        self.sess = sess
-        self.theta = tf.placeholder(dtype,[total_size])
-        start=0
-        assigns = []
+  def __init__(self, sess, var_list, dtype=tf.float32):
+    assigns = []
+    shapes = list(map(var_shape, var_list))
+    total_size = np.sum([intprod(shape) for shape in shapes])

-        for (shape,v) in zip(shapes,var_list):
-            size = intprod(shape)
-            assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
-            start += size
+    self.sess = sess
+    self.theta = tf.placeholder(dtype, [total_size])
+    start = 0
+    assigns = []

-        self.op = tf.group(*assigns)
+    for (shape, v) in zip(shapes, var_list):
+      size = intprod(shape)
+      assigns.append(tf.assign(v, tf.reshape(self.theta[start:start + size], shape)))
+      start += size

-        return
+    self.op = tf.group(*assigns)
+
+    return
+
+  def __call__(self, theta):
+    self.sess.run(self.op, feed_dict={self.theta: theta})
+    return

-    def __call__(self, theta):
-        self.sess.run(self.op, feed_dict={self.theta:theta})
-        return

 class GetFlat(object):
-    def __init__(self, sess, var_list):
-        self.sess = sess
-        self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
-        return

-    def __call__(self):
-        return self.sess.run(self.op)
+  def __init__(self, sess, var_list):
+    self.sess = sess
+    self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
+    return
+
+  def __call__(self):
+    return self.sess.run(self.op)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/camera.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/camera.py
@@ -1,26 +1,28 @@
-import numpy as np 
+import numpy as np

 from quaternion import qrot, qinverse


-def normalize_screen_coordinates(X, w, h): 
-    assert X.shape[-1] == 2
-    # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
-    return X/w*2 - [1, h/w]
+def normalize_screen_coordinates(X, w, h):
+  assert X.shape[-1] == 2
+  # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
+  return X / w * 2 - [1, h / w]
+

 def image_coordinates(X, w, h):
-    assert X.shape[-1] == 2
-    # Reverse camera frame normalization
-    return (X + [1, h/w])*w/2
+  assert X.shape[-1] == 2
+  # Reverse camera frame normalization
+  return (X + [1, h / w]) * w / 2
+

 def world_to_camera(X, R, t):
-    Rt = qinverse(R)
-    Q = np.tile(Rt, (*X.shape[:-1], 1))
-    V = X - t
-    return qrot(Q, V)
-    
-def camera_to_world(X, R, t):
-    Q = np.tile(R, (*X.shape[:-1], 1))
-    V = X
-    return qrot(Q, V) + t
+  Rt = qinverse(R)
+  Q = np.tile(Rt, (*X.shape[:-1], 1))
+  V = X - t
+  return qrot(Q, V)

+
+def camera_to_world(X, R, t):
+  Q = np.tile(R, (*X.shape[:-1], 1))
+  V = X
+  return qrot(Q, V) + t
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/deepmimic_json_generator.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/deepmimic_json_generator.py
@@ -3,54 +3,63 @@ from h36m_dataset import Human36mDataset
 from camera import *

 import numpy as np
-# In[2]: 
+# In[2]:

 joint_info = {
-	'joint_name':['root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle', 'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder', 'right_elbow', 'right_wrist'],
-    'father':[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
-    'side':['middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle', 'middle', 'left', 'left', 'left', 'right', 'right', 'right'] 
+    'joint_name': [
+        'root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle',
+        'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist',
+        'right_shoulder', 'right_elbow', 'right_wrist'
+    ],
+    'father': [0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
+    'side': [
+        'middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle',
+        'middle', 'left', 'left', 'left', 'right', 'right', 'right'
+    ]
 }

 # In[3]:

+
 def init_fb_h36m_dataset(dataset_path):
-    dataset = Human36mDataset(dataset_path)
-    print('Preparing Facebook Human3.6M Dataset...')
-    for subject in dataset.subjects():
-        for action in dataset[subject].keys():
-            anim = dataset[subject][action]
-            positions_3d = []
-            for cam in anim['cameras']:
-                pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
-                pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
-                positions_3d.append(pos_3d)
-            anim['positions_3d'] = positions_3d
-    return dataset
+  dataset = Human36mDataset(dataset_path)
+  print('Preparing Facebook Human3.6M Dataset...')
+  for subject in dataset.subjects():
+    for action in dataset[subject].keys():
+      anim = dataset[subject][action]
+      positions_3d = []
+      for cam in anim['cameras']:
+        pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
+        pos_3d[:, 1:] -= pos_3d[:, :
+                                1]  # Remove global offset, but keep trajectory in first position
+        positions_3d.append(pos_3d)
+      anim['positions_3d'] = positions_3d
+  return dataset
+

 def pose3D_from_fb_h36m(dataset, subject, action, shift):
-    pose_seq = dataset[subject][action]['positions_3d'][0].copy()
-    trajectory = pose_seq[:, :1]
-    pose_seq[:, 1:] += trajectory
-    # Invert camera transformation
-    cam = dataset.cameras()[subject][0]
-    pose_seq = camera_to_world(pose_seq, 
-                                   R=cam['orientation'], 
-                                   t=cam['translation'])
-    x = pose_seq[:,:,0:1]
-    y = pose_seq[:,:,1:2] * -1
-    z = pose_seq[:,:,2:3] 
-    pose_seq = np.concatenate((x,z,y),axis=2)
-    # plus shift
-    pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])])
-    return pose_seq
+  pose_seq = dataset[subject][action]['positions_3d'][0].copy()
+  trajectory = pose_seq[:, :1]
+  pose_seq[:, 1:] += trajectory
+  # Invert camera transformation
+  cam = dataset.cameras()[subject][0]
+  pose_seq = camera_to_world(pose_seq, R=cam['orientation'], t=cam['translation'])
+  x = pose_seq[:, :, 0:1]
+  y = pose_seq[:, :, 1:2] * -1
+  z = pose_seq[:, :, 2:3]
+  pose_seq = np.concatenate((x, z, y), axis=2)
+  # plus shift
+  pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])
+                       ])
+  return pose_seq
+

 def rot_seq_to_deepmimic_json(rot_seq, loop, json_path):
-    to_json = {"Loop": loop, "Frames":[]}
-    rot_seq = np.around(rot_seq, decimals=6)
-    to_json["Frames"] = rot_seq.tolist()
-    # In[14]:
-    to_file = json.dumps(to_json)
-    file = open(json_path,"w")
-    file.write(to_file)
-    file.close()
-
+  to_json = {"Loop": loop, "Frames": []}
+  rot_seq = np.around(rot_seq, decimals=6)
+  to_json["Frames"] = rot_seq.tolist()
+  # In[14]:
+  to_file = json.dumps(to_json)
+  file = open(json_path, "w")
+  file.write(to_file)
+  file.close()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/h36m_dataset.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/h36m_dataset.py
@@ -3,11 +3,13 @@ import copy
 from skeleton import Skeleton
 from mocap_dataset import MocapDataset
 from camera import normalize_screen_coordinates, image_coordinates
-       
-h36m_skeleton = Skeleton(parents=[-1,  0,  1,  2,  3,  4,  0,  6,  7,  8,  9,  0, 11, 12, 13, 14, 12,
-       16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
-       joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
-       joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
+
+h36m_skeleton = Skeleton(parents=[
+    -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12, 16, 17, 18, 19, 20, 19, 22, 12, 24,
+    25, 26, 27, 28, 27, 30
+],
+                         joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
+                         joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])

 h36m_cameras_intrinsic_params = [
    {
@@ -18,7 +20,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
        'res_w': 1000,
        'res_h': 1002,
-        'azimuth': 70, # Only used for visualization
+        'azimuth': 70,  # Only used for visualization
    },
    {
        'id': '55011271',
@@ -28,7 +30,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
        'res_w': 1000,
        'res_h': 1000,
-        'azimuth': -70, # Only used for visualization
+        'azimuth': -70,  # Only used for visualization
    },
    {
        'id': '58860488',
@@ -38,7 +40,7 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
        'res_w': 1000,
        'res_h': 1000,
-        'azimuth': 110, # Only used for visualization
+        'azimuth': 110,  # Only used for visualization
    },
    {
        'id': '60457274',
@@ -48,26 +50,34 @@ h36m_cameras_intrinsic_params = [
        'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
        'res_w': 1000,
        'res_h': 1002,
-        'azimuth': -110, # Only used for visualization
+        'azimuth': -110,  # Only used for visualization
    },
 ]

 h36m_cameras_extrinsic_params = {
    'S1': [
        {
-            'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
+            'orientation': [
+                0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088
+            ],
            'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
        },
        {
-            'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
+            'orientation': [
+                0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205
+            ],
            'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
        },
        {
-            'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
+            'orientation': [
+                0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696
+            ],
            'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
        },
        {
-            'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
+            'orientation': [
+                0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435
+            ],
            'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
        },
    ],
@@ -91,158 +101,206 @@ h36m_cameras_extrinsic_params = {
    ],
    'S5': [
        {
-            'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
+            'orientation': [
+                0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332
+            ],
            'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
        },
        {
-            'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
+            'orientation': [
+                0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915
+            ],
            'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
        },
        {
-            'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
+            'orientation': [
+                0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576
+            ],
            'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
        },
        {
-            'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
+            'orientation': [
+                0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092
+            ],
            'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
        },
    ],
    'S6': [
        {
-            'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
+            'orientation': [
+                0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938
+            ],
            'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
        },
        {
-            'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
+            'orientation': [
+                0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428
+            ],
            'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
        },
        {
-            'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
+            'orientation': [
+                0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334
+            ],
            'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
        },
        {
-            'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
+            'orientation': [
+                0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802
+            ],
            'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
        },
    ],
    'S7': [
        {
-            'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
+            'orientation': [
+                0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778
+            ],
            'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
        },
        {
-            'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
+            'orientation': [
+                0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508
+            ],
            'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
        },
        {
-            'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
+            'orientation': [
+                0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278
+            ],
            'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
        },
        {
-            'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
+            'orientation': [
+                0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523
+            ],
            'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
        },
    ],
    'S8': [
        {
-            'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
+            'orientation': [
+                0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773
+            ],
            'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
        },
        {
-            'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
+            'orientation': [
+                0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615
+            ],
            'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
        },
        {
-            'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
+            'orientation': [
+                0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755
+            ],
            'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
        },
        {
-            'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
+            'orientation': [
+                0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708
+            ],
            'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
        },
    ],
    'S9': [
        {
-            'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
+            'orientation': [
+                0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243
+            ],
            'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
        },
        {
-            'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
+            'orientation': [
+                0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801
+            ],
            'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
        },
        {
-            'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
+            'orientation': [
+                0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915
+            ],
            'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
        },
        {
-            'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
+            'orientation': [
+                0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822
+            ],
            'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
        },
    ],
    'S11': [
        {
-            'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
+            'orientation': [
+                0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467
+            ],
            'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
        },
        {
-            'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
+            'orientation': [
+                0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085
+            ],
            'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
        },
        {
-            'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
+            'orientation': [
+                0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407
+            ],
            'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
        },
        {
-            'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
+            'orientation': [
+                0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809
+            ],
            'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
        },
    ],
 }

+
 class Human36mDataset(MocapDataset):
-    def __init__(self, path, remove_static_joints=True):
-        super().__init__(fps=50, skeleton=h36m_skeleton)
-        
-        self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
-        for cameras in self._cameras.values():
-            for i, cam in enumerate(cameras):
-                cam.update(h36m_cameras_intrinsic_params[i])
-                for k, v in cam.items():
-                    if k not in ['id', 'res_w', 'res_h']:
-                        cam[k] = np.array(v, dtype='float32')
-                
-                # Normalize camera frame
-                cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype('float32')
-                cam['focal_length'] = cam['focal_length']/cam['res_w']*2
-                if 'translation' in cam:
-                    cam['translation'] = cam['translation']/1000 # mm to meters
-                
-                # Add intrinsic parameters vector
-                cam['intrinsic'] = np.concatenate((cam['focal_length'],
-                                                   cam['center'],
-                                                   cam['radial_distortion'],
-                                                   cam['tangential_distortion']))
-        
-        # Load serialized dataset
-        data = np.load(path)['positions_3d'].item()
-        
-        self._data = {}
-        for subject, actions in data.items():
-            self._data[subject] = {}
-            for action_name, positions in actions.items():
-                self._data[subject][action_name] = {
-                    'positions': positions,
-                    'cameras': self._cameras[subject],
-                }
-                
-        if remove_static_joints:
-            # Bring the skeleton to 17 joints instead of the original 32
-            self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
-            
-            # Rewire shoulders to the correct parents
-            self._skeleton._parents[11] = 8
-            self._skeleton._parents[14] = 8
-            
-    def supports_semi_supervised(self):
-        return True
-    
+
+  def __init__(self, path, remove_static_joints=True):
+    super().__init__(fps=50, skeleton=h36m_skeleton)
+
+    self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
+    for cameras in self._cameras.values():
+      for i, cam in enumerate(cameras):
+        cam.update(h36m_cameras_intrinsic_params[i])
+        for k, v in cam.items():
+          if k not in ['id', 'res_w', 'res_h']:
+            cam[k] = np.array(v, dtype='float32')
+
+        # Normalize camera frame
+        cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'],
+                                                     h=cam['res_h']).astype('float32')
+        cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
+        if 'translation' in cam:
+          cam['translation'] = cam['translation'] / 1000  # mm to meters
+
+        # Add intrinsic parameters vector
+        cam['intrinsic'] = np.concatenate((cam['focal_length'], cam['center'],
+                                           cam['radial_distortion'], cam['tangential_distortion']))
+
+    # Load serialized dataset
+    data = np.load(path)['positions_3d'].item()
+
+    self._data = {}
+    for subject, actions in data.items():
+      self._data[subject] = {}
+      for action_name, positions in actions.items():
+        self._data[subject][action_name] = {
+            'positions': positions,
+            'cameras': self._cameras[subject],
+        }
+
+    if remove_static_joints:
+      # Bring the skeleton to 17 joints instead of the original 32
+      self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
+
+      # Rewire shoulders to the correct parents
+      self._skeleton._parents[11] = 8
+      self._skeleton._parents[14] = 8
+
+  def supports_semi_supervised(self):
+    return True
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/humanoid.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/humanoid.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/inverse_kinematics.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/inverse_kinematics.py
@@ -9,142 +9,138 @@ from transformation import *

 from pyquaternion import Quaternion

+
 def get_angle(vec1, vec2):
-    cos_theta = np.dot(vec1, vec2)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))
-    return acos(cos_theta) 
+  cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+  return acos(cos_theta)


 def get_quaternion(ox, oy, oz, x, y, z):
-    # given transformed axis in x-y-z order return a quaternion
-    ox /= np.linalg.norm(ox)
-    oy /= np.linalg.norm(oy)
-    oz /= np.linalg.norm(oz)
+  # given transformed axis in x-y-z order return a quaternion
+  ox /= np.linalg.norm(ox)
+  oy /= np.linalg.norm(oy)
+  oz /= np.linalg.norm(oz)

-    set1 = np.vstack((ox,oy,oz))
+  set1 = np.vstack((ox, oy, oz))

-    x /= np.linalg.norm(x)
-    y /= np.linalg.norm(y)
-    z /= np.linalg.norm(z)
+  x /= np.linalg.norm(x)
+  y /= np.linalg.norm(y)
+  z /= np.linalg.norm(z)

-    set2 = np.vstack((x,y,z))
-    rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
-    rot_qua = quaternion_from_matrix(rot_mat)
+  set2 = np.vstack((x, y, z))
+  rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
+  rot_qua = quaternion_from_matrix(rot_mat)

-    return rot_qua
+  return rot_qua


 # 3D coord to deepmimic rotations
 def coord_to_rot(frameNum, frame, frame_duration):
-    eps = 0.001
-    axis_rotate_rate = 0.3
+  eps = 0.001
+  axis_rotate_rate = 0.3

-    frame = np.array(frame)
-    tmp = [[] for i in range(15)]
-    # duration of frame in seconds (1D),
-    tmp[0] = [frame_duration]
-    # root position (3D),
-    tmp[1] = frame[0]
-    # root rotation (4D),
-    root_y = (frame[7] - frame[0])
-    root_z = (frame[1] - frame[0]) 
-    root_x = np.cross(root_y, root_z)
+  frame = np.array(frame)
+  tmp = [[] for i in range(15)]
+  # duration of frame in seconds (1D),
+  tmp[0] = [frame_duration]
+  # root position (3D),
+  tmp[1] = frame[0]
+  # root rotation (4D),
+  root_y = (frame[7] - frame[0])
+  root_z = (frame[1] - frame[0])
+  root_x = np.cross(root_y, root_z)

-    x = np.array([1.0,0,0])
-    y = np.array([0,1.0,0])
-    z = np.array([0,0,1.0])
-    
-    rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
-    tmp[2] = list(rot_qua)
+  x = np.array([1.0, 0, 0])
+  y = np.array([0, 1.0, 0])
+  z = np.array([0, 0, 1.0])

-    # chest rotation (4D),
-    chest_y = (frame[8] - frame[7])
-    chest_z = (frame[14] - frame[8]) 
-    chest_x = np.cross(chest_y, chest_z)
-    rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
-    tmp[3] = list(rot_qua)
+  rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
+  tmp[2] = list(rot_qua)

-    # neck rotation (4D),
-    neck_y = (frame[10] - frame[8])
-    neck_z = np.cross(frame[10]-frame[9], frame[8]-frame[9]) 
-    neck_x = np.cross(neck_y, neck_z)
-    rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
-    tmp[4] = list(rot_qua)
+  # chest rotation (4D),
+  chest_y = (frame[8] - frame[7])
+  chest_z = (frame[14] - frame[8])
+  chest_x = np.cross(chest_y, chest_z)
+  rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
+  tmp[3] = list(rot_qua)

-    # right hip rotation (4D),
-    r_hip_y = (frame[1] - frame[2])
-    r_hip_z = np.cross(frame[1]-frame[2], frame[3]-frame[2]) 
-    r_hip_x = np.cross(r_hip_y, r_hip_z)
-    rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
-    tmp[5] = list(rot_qua)
+  # neck rotation (4D),
+  neck_y = (frame[10] - frame[8])
+  neck_z = np.cross(frame[10] - frame[9], frame[8] - frame[9])
+  neck_x = np.cross(neck_y, neck_z)
+  rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
+  tmp[4] = list(rot_qua)

-    # right knee rotation (1D),
-    vec1 = frame[1] - frame[2]
-    vec2 = frame[3] - frame[2]
-    angle1 = get_angle(vec1, vec2)
-    tmp[6] = [angle1-pi]
+  # right hip rotation (4D),
+  r_hip_y = (frame[1] - frame[2])
+  r_hip_z = np.cross(frame[1] - frame[2], frame[3] - frame[2])
+  r_hip_x = np.cross(r_hip_y, r_hip_z)
+  rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
+  tmp[5] = list(rot_qua)

-    # right ankle rotation (4D),
-    tmp[7] = [1,0,0,0]
+  # right knee rotation (1D),
+  vec1 = frame[1] - frame[2]
+  vec2 = frame[3] - frame[2]
+  angle1 = get_angle(vec1, vec2)
+  tmp[6] = [angle1 - pi]

-    #  right shoulder rotation (4D),
-    r_shou_y = (frame[14] - frame[15])
-    r_shou_z = np.cross(frame[16]-frame[15], frame[14]-frame[15]) 
-    r_shou_x = np.cross(r_shou_y, r_shou_z)
-    rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
-    tmp[8] = list(rot_qua)
+  # right ankle rotation (4D),
+  tmp[7] = [1, 0, 0, 0]

-    # right elbow rotation (1D),
-    vec1 = frame[14] - frame[15]
-    vec2 = frame[16] - frame[15]
-    angle1 = get_angle(vec1, vec2)
-    tmp[9] = [pi-angle1]
+  #  right shoulder rotation (4D),
+  r_shou_y = (frame[14] - frame[15])
+  r_shou_z = np.cross(frame[16] - frame[15], frame[14] - frame[15])
+  r_shou_x = np.cross(r_shou_y, r_shou_z)
+  rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
+  tmp[8] = list(rot_qua)

-    # left hip rotation (4D),
-    l_hip_y = (frame[4] - frame[5])
-    l_hip_z = np.cross(frame[4]-frame[5], frame[6]-frame[5]) 
-    l_hip_x = np.cross(l_hip_y, l_hip_z)
-    rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)    
-    tmp[10] = list(rot_qua)
-    
-    # left knee rotation (1D),
-    vec1 = frame[4] - frame[5]
-    vec2 = frame[6] - frame[5]
-    angle1 = get_angle(vec1, vec2)
-    tmp[11] = [angle1-pi]
-    
-    # left ankle rotation (4D),
-    tmp[12] = [1,0,0,0]
+  # right elbow rotation (1D),
+  vec1 = frame[14] - frame[15]
+  vec2 = frame[16] - frame[15]
+  angle1 = get_angle(vec1, vec2)
+  tmp[9] = [pi - angle1]

-    # left shoulder rotation (4D),
-    l_shou_y = (frame[11] - frame[12])
-    l_shou_z = np.cross(frame[13]-frame[12], frame[11]-frame[12]) 
-    l_shou_x = np.cross(l_shou_y, l_shou_z)
-    rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
-    tmp[13] = list(rot_qua)
+  # left hip rotation (4D),
+  l_hip_y = (frame[4] - frame[5])
+  l_hip_z = np.cross(frame[4] - frame[5], frame[6] - frame[5])
+  l_hip_x = np.cross(l_hip_y, l_hip_z)
+  rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
+  tmp[10] = list(rot_qua)

-    # left elbow rotation (1D)
-    vec1 = frame[11] - frame[12]
-    vec2 = frame[13] - frame[12]
-    angle1 = get_angle(vec1, vec2)
-    tmp[14] = [pi-angle1]
+  # left knee rotation (1D),
+  vec1 = frame[4] - frame[5]
+  vec2 = frame[6] - frame[5]
+  angle1 = get_angle(vec1, vec2)
+  tmp[11] = [angle1 - pi]
+
+  # left ankle rotation (4D),
+  tmp[12] = [1, 0, 0, 0]
+
+  # left shoulder rotation (4D),
+  l_shou_y = (frame[11] - frame[12])
+  l_shou_z = np.cross(frame[13] - frame[12], frame[11] - frame[12])
+  l_shou_x = np.cross(l_shou_y, l_shou_z)
+  rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
+  tmp[13] = list(rot_qua)
+
+  # left elbow rotation (1D)
+  vec1 = frame[11] - frame[12]
+  vec2 = frame[13] - frame[12]
+  angle1 = get_angle(vec1, vec2)
+  tmp[14] = [pi - angle1]
+
+  ret = []
+  for i in tmp:
+    ret += list(i)
+  return np.array(ret)

-    ret = []
-    for i in tmp:
-        ret += list(i)
-    return np.array(ret)

 # In[6]:


 def coord_seq_to_rot_seq(coord_seq, frame_duration):
-    ret = []
-    for i in range(len(coord_seq)):
-        tmp = coord_to_rot( i, coord_seq[i], frame_duration)
-        ret.append(list(tmp))
-    return ret
-
-
-
-
-
-
+  ret = []
+  for i in range(len(coord_seq)):
+    tmp = coord_to_rot(i, coord_seq[i], frame_duration)
+    ret.append(list(tmp))
+  return ret
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/mocap_dataset.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/mocap_dataset.py
@@ -1,36 +1,37 @@
 import numpy as np
 from skeleton import Skeleton

+
 class MocapDataset:
-    def __init__(self, fps, skeleton):
-        self._skeleton = skeleton
-        self._fps = fps
-        self._data = None # Must be filled by subclass
-        self._cameras = None # Must be filled by subclass
-    
-    def remove_joints(self, joints_to_remove):
-        kept_joints = self._skeleton.remove_joints(joints_to_remove)
-        for subject in self._data.keys():
-            for action in self._data[subject].keys():
-                s = self._data[subject][action]
-                s['positions'] = s['positions'][:, kept_joints]
-                
-        
-    def __getitem__(self, key):
-        return self._data[key]
-        
-    def subjects(self):
-        return self._data.keys()
-    
-    def fps(self):
-        return self._fps
-    
-    def skeleton(self):
-        return self._skeleton
-        
-    def cameras(self):
-        return self._cameras
-    
-    def supports_semi_supervised(self):
-        # This method can be overridden
-        return False 
+
+  def __init__(self, fps, skeleton):
+    self._skeleton = skeleton
+    self._fps = fps
+    self._data = None  # Must be filled by subclass
+    self._cameras = None  # Must be filled by subclass
+
+  def remove_joints(self, joints_to_remove):
+    kept_joints = self._skeleton.remove_joints(joints_to_remove)
+    for subject in self._data.keys():
+      for action in self._data[subject].keys():
+        s = self._data[subject][action]
+        s['positions'] = s['positions'][:, kept_joints]
+
+  def __getitem__(self, key):
+    return self._data[key]
+
+  def subjects(self):
+    return self._data.keys()
+
+  def fps(self):
+    return self._fps
+
+  def skeleton(self):
+    return self._skeleton
+
+  def cameras(self):
+    return self._cameras
+
+  def supports_semi_supervised(self):
+    # This method can be overridden
+    return False
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/quaternion.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/quaternion.py
@@ -1,30 +1,31 @@
 import numpy as np

+
 def qrot(q, v):
-    """
+  """
    Rotate vector(s) v about the rotation described by quaternion(s) q.
    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
    where * denotes any number of dimensions.
    Returns a tensor of shape (*, 3).
    """
-    assert q.shape[-1] == 4
-    assert v.shape[-1] == 3
-    assert q.shape[:-1] == v.shape[:-1]
+  assert q.shape[-1] == 4
+  assert v.shape[-1] == 3
+  assert q.shape[:-1] == v.shape[:-1]
+
+  qvec = q[..., 1:]
+
+  uv = np.cross(qvec, v)
+  uuv = np.cross(qvec, uv)
+
+  return (v + 2 * (q[..., :1] * uv + uuv))

-    qvec = q[..., 1:]

-    uv = np.cross(qvec, v)
-    uuv = np.cross(qvec, uv)
-    
-    return (v + 2 * (q[..., :1] * uv + uuv))
-    
 def qinverse(q, inplace=False):
-    # We assume the quaternion to be normalized
-    if inplace:
-        q[..., 1:] *= -1
-        return q
-    else:
-        w = q[..., :1]
-        xyz = q[..., 1:]
-        return np.hstack((w, -xyz))
- 
+  # We assume the quaternion to be normalized
+  if inplace:
+    q[..., 1:] *= -1
+    return q
+  else:
+    w = q[..., :1]
+    xyz = q[..., 1:]
+    return np.hstack((w, -xyz))
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/render_reference.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/render_reference.py
@@ -1,7 +1,7 @@
-import os,  inspect
+import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 print('parent:', parentdir)

@@ -10,7 +10,6 @@ import pybullet
 import time
 import random

-
 from pybullet_utils.bullet_client import BulletClient
 from deep_mimic.env.motion_capture_data import MotionCaptureData

@@ -26,21 +25,28 @@ import pybullet as p
 import numpy as np
 import argparse

-
-
 parser = argparse.ArgumentParser(description='Arguments for loading reference for learning.')

 # General arguments
-parser.add_argument('--dataset_path', default='data/data_3d_h36m.npz', type=str, help='target dataset') # h36m or humaneva
-parser.add_argument('--json_path', default='data/Walking.json', type=str, help='json file path for storing the deepmimic-format json created by inverse-kinect.')
+parser.add_argument('--dataset_path',
+                    default='data/data_3d_h36m.npz',
+                    type=str,
+                    help='target dataset')  # h36m or humaneva
+parser.add_argument(
+    '--json_path',
+    default='data/Walking.json',
+    type=str,
+    help='json file path for storing the deepmimic-format json created by inverse-kinect.')
 parser.add_argument('--fps', default=24, type=int, help='frame per second')
 parser.add_argument('--subject', default='S11', type=str, help='camera subject.')
-parser.add_argument('--action', default='Walking', type=str,  help='name of the action.')
-parser.add_argument('--loop', default='wrap', type=str, help='loop information in deepmimic, wrap or none.')
+parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
+parser.add_argument('--loop',
+                    default='wrap',
+                    type=str,
+                    help='loop information in deepmimic, wrap or none.')
 parser.add_argument('--draw_gt', action='store_true', help='draw ground truth or not.')

-args = parser.parse_args() 
-
+args = parser.parse_args()

 dataset_path = args.dataset_path
 json_path = args.json_path
@@ -52,93 +58,75 @@ draw_gt = args.draw_gt


 def draw_ground_truth(coord_seq, frame, duration, shift):
-    global joint_info
-    joint = coord_seq[frame]
-    shift = np.array(shift)
-    for i in range(1, 17):
-        # print(x[11], x[14])
-        joint_fa = joint_info['father'][i]
-        if joint_info['side'][i] == 'right':
-            p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
-                               lineToXYZ=joint[joint_fa]+shift,
-                               lineColorRGB=(255,0,0),
-                               lineWidth=1,
-                               lifeTime=duration)
-        else:
-            p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
-                               lineToXYZ=joint[joint_fa]+shift,
-                               lineColorRGB=(0,0,0),
-                               lineWidth=1,
-                               lifeTime=duration)
+  global joint_info
+  joint = coord_seq[frame]
+  shift = np.array(shift)
+  for i in range(1, 17):
+    # print(x[11], x[14])
+    joint_fa = joint_info['father'][i]
+    if joint_info['side'][i] == 'right':
+      p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
+                         lineToXYZ=joint[joint_fa] + shift,
+                         lineColorRGB=(255, 0, 0),
+                         lineWidth=1,
+                         lifeTime=duration)
+    else:
+      p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
+                         lineToXYZ=joint[joint_fa] + shift,
+                         lineColorRGB=(0, 0, 0),
+                         lineWidth=1,
+                         lifeTime=duration)


 dataset = init_fb_h36m_dataset(dataset_path)
-ground_truth = pose3D_from_fb_h36m(dataset, 
-                                   subject = subject, 
-                                   action = action,
-                                   shift = [1.0,0.0,0.0])
-
-rot_seq =  coord_seq_to_rot_seq(coord_seq = ground_truth, 
-                                frame_duration = 1/fps)
-
-
-rot_seq_to_deepmimic_json(  rot_seq = rot_seq, 
-                            loop = loop,
-                            json_path = json_path)
+ground_truth = pose3D_from_fb_h36m(dataset, subject=subject, action=action, shift=[1.0, 0.0, 0.0])

+rot_seq = coord_seq_to_rot_seq(coord_seq=ground_truth, frame_duration=1 / fps)

+rot_seq_to_deepmimic_json(rot_seq=rot_seq, loop=loop, json_path=json_path)

 bc = BulletClient(connection_mode=pybullet.GUI)
 bc.setAdditionalSearchPath(pybullet_data.getDataPath())
-bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP,1)
-bc.setGravity(0,-9.8,0)
-motion=MotionCaptureData()
+bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP, 1)
+bc.setGravity(0, -9.8, 0)
+motion = MotionCaptureData()

 motionPath = json_path
 motion.Load(motionPath)
 print("numFrames = ", motion.NumFrames())

+simTimeId = bc.addUserDebugParameter("simTime", 0, motion.NumFrames() - 1.1, 0)

+y2zOrn = bc.getQuaternionFromEuler([-1.57, 0, 0])
+bc.loadURDF("plane.urdf", [0, -0.04, 0], y2zOrn)

-simTimeId= bc.addUserDebugParameter("simTime",0,motion.NumFrames()-1.1,0)
-
-y2zOrn = bc.getQuaternionFromEuler([-1.57,0,0])
-bc.loadURDF("plane.urdf",[0,-0.04,0], y2zOrn)
-
-humanoid = Humanoid(bc, motion, [0,0,0]) #这是初始位置的坐标
-
-
+humanoid = Humanoid(bc, motion, [0, 0, 0])  #这是初始位置的坐标

 print(p.getBasePositionAndOrientation(humanoid._humanoid))

-
-
 simTime = 0
 keyFrameDuration = motion.KeyFrameDuraction()
-print("keyFrameDuration=",keyFrameDuration)
+print("keyFrameDuration=", keyFrameDuration)
 for utNum in range(motion.NumFrames()):
-    bc.stepSimulation()
-    humanoid.RenderReference(utNum * keyFrameDuration)
-    if draw_gt:
-        draw_ground_truth(coord_seq = ground_truth, 
-                        frame = utNum, 
-                        duration = keyFrameDuration,
-                        shift = [-1.0, 0.0, 1.0])
-    time.sleep(0.001)
+  bc.stepSimulation()
+  humanoid.RenderReference(utNum * keyFrameDuration)
+  if draw_gt:
+    draw_ground_truth(coord_seq=ground_truth,
+                      frame=utNum,
+                      duration=keyFrameDuration,
+                      shift=[-1.0, 0.0, 1.0])
+  time.sleep(0.001)
 stage = 0


-
 def Reset(humanoid):
-	global simTime
-	humanoid.Reset()
-	simTime = 0 
-	humanoid.SetSimTime(simTime)
-	pose = humanoid.InitializePoseFromMotionData()
-	humanoid.ApplyPose(pose, True, True, humanoid._humanoid,bc)
+  global simTime
+  humanoid.Reset()
+  simTime = 0
+  humanoid.SetSimTime(simTime)
+  pose = humanoid.InitializePoseFromMotionData()
+  humanoid.ApplyPose(pose, True, True, humanoid._humanoid, bc)


 Reset(humanoid)
 p.disconnect()
-
-
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/skeleton.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/skeleton.py
@@ -1,81 +1,82 @@
 import numpy as np

+
 class Skeleton:
-    def __init__(self, parents, joints_left, joints_right):
-        assert len(joints_left) == len(joints_right)
-        
-        self._parents = np.array(parents)
-        self._joints_left = joints_left
-        self._joints_right = joints_right
-        self._compute_metadata()
-    
-    def num_joints(self):
-        return len(self._parents)
-    
-    def parents(self):
-        return self._parents
-    
-    def has_children(self):
-        return self._has_children
-    
-    def children(self):
-        return self._children
-    
-    def remove_joints(self, joints_to_remove):
-        """
+
+  def __init__(self, parents, joints_left, joints_right):
+    assert len(joints_left) == len(joints_right)
+
+    self._parents = np.array(parents)
+    self._joints_left = joints_left
+    self._joints_right = joints_right
+    self._compute_metadata()
+
+  def num_joints(self):
+    return len(self._parents)
+
+  def parents(self):
+    return self._parents
+
+  def has_children(self):
+    return self._has_children
+
+  def children(self):
+    return self._children
+
+  def remove_joints(self, joints_to_remove):
+    """
        Remove the joints specified in 'joints_to_remove'.
        """
-        valid_joints = []
-        for joint in range(len(self._parents)):
-            if joint not in joints_to_remove:
-                valid_joints.append(joint)
+    valid_joints = []
+    for joint in range(len(self._parents)):
+      if joint not in joints_to_remove:
+        valid_joints.append(joint)

-        for i in range(len(self._parents)):
-            while self._parents[i] in joints_to_remove:
-                self._parents[i] = self._parents[self._parents[i]]
-                
-        index_offsets = np.zeros(len(self._parents), dtype=int)
-        new_parents = []
-        for i, parent in enumerate(self._parents):
-            if i not in joints_to_remove:
-                new_parents.append(parent - index_offsets[parent])
-            else:
-                index_offsets[i:] += 1
-        self._parents = np.array(new_parents)
-        
-        
-        if self._joints_left is not None:
-            new_joints_left = []
-            for joint in self._joints_left:
-                if joint in valid_joints:
-                    new_joints_left.append(joint - index_offsets[joint])
-            self._joints_left = new_joints_left
-        if self._joints_right is not None:
-            new_joints_right = []
-            for joint in self._joints_right:
-                if joint in valid_joints:
-                    new_joints_right.append(joint - index_offsets[joint])
-            self._joints_right = new_joints_right
+    for i in range(len(self._parents)):
+      while self._parents[i] in joints_to_remove:
+        self._parents[i] = self._parents[self._parents[i]]

-        self._compute_metadata()
-        
-        return valid_joints
-    
-    def joints_left(self):
-        return self._joints_left
-    
-    def joints_right(self):
-        return self._joints_right
-        
-    def _compute_metadata(self):
-        self._has_children = np.zeros(len(self._parents)).astype(bool)
-        for i, parent in enumerate(self._parents):
-            if parent != -1:
-                self._has_children[parent] = True
+    index_offsets = np.zeros(len(self._parents), dtype=int)
+    new_parents = []
+    for i, parent in enumerate(self._parents):
+      if i not in joints_to_remove:
+        new_parents.append(parent - index_offsets[parent])
+      else:
+        index_offsets[i:] += 1
+    self._parents = np.array(new_parents)

-        self._children = []
-        for i, parent in enumerate(self._parents):
-            self._children.append([])
-        for i, parent in enumerate(self._parents):
-            if parent != -1:
-                self._children[parent].append(i) 
+    if self._joints_left is not None:
+      new_joints_left = []
+      for joint in self._joints_left:
+        if joint in valid_joints:
+          new_joints_left.append(joint - index_offsets[joint])
+      self._joints_left = new_joints_left
+    if self._joints_right is not None:
+      new_joints_right = []
+      for joint in self._joints_right:
+        if joint in valid_joints:
+          new_joints_right.append(joint - index_offsets[joint])
+      self._joints_right = new_joints_right
+
+    self._compute_metadata()
+
+    return valid_joints
+
+  def joints_left(self):
+    return self._joints_left
+
+  def joints_right(self):
+    return self._joints_right
+
+  def _compute_metadata(self):
+    self._has_children = np.zeros(len(self._parents)).astype(bool)
+    for i, parent in enumerate(self._parents):
+      if parent != -1:
+        self._has_children[parent] = True
+
+    self._children = []
+    for i, parent in enumerate(self._parents):
+      self._children.append([])
+    for i, parent in enumerate(self._parents):
+      if parent != -1:
+        self._children[parent].append(i)
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/transformation.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mocap/transformation.py
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
@@ -3,21 +3,23 @@ import subprocess
 from pybullet_utils.arg_parser import ArgParser
 from pybullet_utils.logger import Logger

+
 def main():
-    # Command line arguments
-    args = sys.argv[1:]
-    arg_parser = ArgParser()
-    arg_parser.load_args(args)
+  # Command line arguments
+  args = sys.argv[1:]
+  arg_parser = ArgParser()
+  arg_parser.load_args(args)

-    num_workers = arg_parser.parse_int('num_workers', 1)
-    assert(num_workers > 0)
+  num_workers = arg_parser.parse_int('num_workers', 1)
+  assert (num_workers > 0)
+
+  Logger.print2('Running with {:d} workers'.format(num_workers))
+  cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
+  cmd += ' '.join(args)
+  Logger.print2('cmd: ' + cmd)
+  subprocess.call(cmd, shell=True)
+  return

-    Logger.print2('Running with {:d} workers'.format(num_workers))
-    cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
-    cmd += ' '.join(args)
-    Logger.print2('cmd: ' + cmd)
-    subprocess.call(cmd, shell=True)
-    return

 if __name__ == '__main__':
-    main()
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
@@ -2,8 +2,8 @@ import os
 import inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
-print("parentdir=",parentdir)
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)
 import json
 from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
 from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
@@ -15,81 +15,78 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim
 import sys
 import random

-update_timestep = 1./240.
+update_timestep = 1. / 240.
 animating = True

+
 def update_world(world, time_elapsed):
-    timeStep = update_timestep
-    world.update(timeStep)
-    reward = world.env.calc_reward(agent_id=0)
-    #print("reward=",reward)
-    end_episode = world.env.is_episode_end()
-    if (end_episode):
-      world.end_episode()
-      world.reset()
-    return
+  timeStep = update_timestep
+  world.update(timeStep)
+  reward = world.env.calc_reward(agent_id=0)
+  #print("reward=",reward)
+  end_episode = world.env.is_episode_end()
+  if (end_episode):
+    world.end_episode()
+    world.reset()
+  return
+

 def build_arg_parser(args):
-    arg_parser = ArgParser()
-    arg_parser.load_args(args)
+  arg_parser = ArgParser()
+  arg_parser.load_args(args)
+
+  arg_file = arg_parser.parse_string('arg_file', '')
+  if (arg_file != ''):
+    path = pybullet_data.getDataPath() + "/args/" + arg_file
+    succ = arg_parser.load_file(path)
+    Logger.print2(arg_file)
+    assert succ, Logger.print2('Failed to load args from: ' + arg_file)
+  return arg_parser

-    arg_file = arg_parser.parse_string('arg_file', '')
-    if (arg_file != ''):
-        path = pybullet_data.getDataPath()+"/args/"+arg_file
-        succ = arg_parser.load_file(path)
-        Logger.print2(arg_file)
-        assert succ, Logger.print2('Failed to load args from: ' + arg_file)
-    return arg_parser

 args = sys.argv[1:]


-
 def build_world(args, enable_draw):
-    arg_parser = build_arg_parser(args)
-    print("enable_draw=",enable_draw)
-    env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
-    world = RLWorld(env, arg_parser)
-    #world.env.set_playback_speed(playback_speed)
+  arg_parser = build_arg_parser(args)
+  print("enable_draw=", enable_draw)
+  env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
+  world = RLWorld(env, arg_parser)
+  #world.env.set_playback_speed(playback_speed)

-    motion_file = arg_parser.parse_string("motion_file")
-    print("motion_file=",motion_file)
-    bodies = arg_parser.parse_ints("fall_contact_bodies")
-    print("bodies=",bodies)
-    int_output_path = arg_parser.parse_string("int_output_path")
-    print("int_output_path=",int_output_path)
-    agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
+  motion_file = arg_parser.parse_string("motion_file")
+  print("motion_file=", motion_file)
+  bodies = arg_parser.parse_ints("fall_contact_bodies")
+  print("bodies=", bodies)
+  int_output_path = arg_parser.parse_string("int_output_path")
+  print("int_output_path=", int_output_path)
+  agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")

-    AGENT_TYPE_KEY = "AgentType"
+  AGENT_TYPE_KEY = "AgentType"

-    print("agent_file=",agent_files)
-    with open(agent_files) as data_file:
-        json_data = json.load(data_file)
-        print("json_data=",json_data)
-        assert AGENT_TYPE_KEY in json_data
-        agent_type = json_data[AGENT_TYPE_KEY]
-        print("agent_type=",agent_type)
-        agent = PPOAgent(world, id, json_data)
-    
-        agent.set_enable_training(False)
-        world.reset()
-    return world    
+  print("agent_file=", agent_files)
+  with open(agent_files) as data_file:
+    json_data = json.load(data_file)
+    print("json_data=", json_data)
+    assert AGENT_TYPE_KEY in json_data
+    agent_type = json_data[AGENT_TYPE_KEY]
+    print("agent_type=", agent_type)
+    agent = PPOAgent(world, id, json_data)
+
+    agent.set_enable_training(False)
+    world.reset()
+  return world


-
-  
-    
 if __name__ == '__main__':
-    
-    world = build_world(args, True)
-    while (world.env._pybullet_client.isConnected()):
-      timeStep = update_timestep
-      keys = world.env.getKeyboardEvents()
-      
-        
-      if world.env.isKeyTriggered(keys, ' '):
-        animating = not animating
-      if (animating):
-        update_world(world, timeStep)
-        #animating=False

+  world = build_world(args, True)
+  while (world.env._pybullet_client.isConnected()):
+    timeStep = update_timestep
+    keys = world.env.getKeyboardEvents()
+
+    if world.env.isKeyTriggered(keys, ' '):
+      animating = not animating
+    if (animating):
+      update_world(world, timeStep)
+      #animating=False
--- a/examples/pybullet/gym/pybullet_envs/env_bases.py
+++ b/examples/pybullet/gym/pybullet_envs/env_bases.py
@@ -5,134 +5,134 @@ from pybullet_utils import bullet_client

 from pkg_resources import parse_version

+
 class MJCFBaseBulletEnv(gym.Env):
-	"""
+  """
 	Base class for Bullet physics simulation loading MJCF (MuJoCo .xml) environments in a Scene.
 	These environments create single-player scenes and behave like normal Gym environments, if
 	you don't use multiplayer.
 	"""

-	metadata = {
-		'render.modes': ['human', 'rgb_array'],
-		'video.frames_per_second': 60
-		}
+  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60}

-	def __init__(self, robot, render=False):
-		self.scene = None
-		self.physicsClientId = -1
-		self.ownsPhysicsClient = 0
-		self.camera = Camera()
-		self.isRender = render
-		self.robot = robot
-		self.seed()
-		self._cam_dist = 3
-		self._cam_yaw = 0
-		self._cam_pitch = -30
-		self._render_width =320
-		self._render_height = 240
+  def __init__(self, robot, render=False):
+    self.scene = None
+    self.physicsClientId = -1
+    self.ownsPhysicsClient = 0
+    self.camera = Camera()
+    self.isRender = render
+    self.robot = robot
+    self.seed()
+    self._cam_dist = 3
+    self._cam_yaw = 0
+    self._cam_pitch = -30
+    self._render_width = 320
+    self._render_height = 240

-		self.action_space = robot.action_space
-		self.observation_space = robot.observation_space
+    self.action_space = robot.action_space
+    self.observation_space = robot.observation_space

-	def configure(self, args):
-		self.robot.args = args
+  def configure(self, args):
+    self.robot.args = args

-	def seed(self, seed=None):
-		self.np_random, seed = gym.utils.seeding.np_random(seed)
-		self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
-		return [seed]
+  def seed(self, seed=None):
+    self.np_random, seed = gym.utils.seeding.np_random(seed)
+    self.robot.np_random = self.np_random  # use the same np_randomizer for robot as for env
+    return [seed]

-	def reset(self):
-		if (self.physicsClientId<0):
-			self.ownsPhysicsClient = True
+  def reset(self):
+    if (self.physicsClientId < 0):
+      self.ownsPhysicsClient = True

-			if self.isRender:
-				self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
-			else:
-				self._p = bullet_client.BulletClient()
+      if self.isRender:
+        self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
+      else:
+        self._p = bullet_client.BulletClient()

-			self.physicsClientId = self._p._client
-			self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0)
+      self.physicsClientId = self._p._client
+      self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)

-		if self.scene is None:
-			self.scene = self.create_single_player_scene(self._p)
-		if not self.scene.multiplayer and self.ownsPhysicsClient:
-			self.scene.episode_restart(self._p)
+    if self.scene is None:
+      self.scene = self.create_single_player_scene(self._p)
+    if not self.scene.multiplayer and self.ownsPhysicsClient:
+      self.scene.episode_restart(self._p)

-		self.robot.scene = self.scene
+    self.robot.scene = self.scene

-		self.frame = 0
-		self.done = 0
-		self.reward = 0
-		dump = 0
-		s = self.robot.reset(self._p)
-		self.potential = self.robot.calc_potential()
-		return s
+    self.frame = 0
+    self.done = 0
+    self.reward = 0
+    dump = 0
+    s = self.robot.reset(self._p)
+    self.potential = self.robot.calc_potential()
+    return s

-	def render(self, mode='human', close=False):
-		if mode == "human":
-			self.isRender = True
-		if mode != "rgb_array":
-			return np.array([])
+  def render(self, mode='human', close=False):
+    if mode == "human":
+      self.isRender = True
+    if mode != "rgb_array":
+      return np.array([])

-		base_pos=[0,0,0]
-		if (hasattr(self,'robot')):
-			if (hasattr(self.robot,'body_xyz')):
-				base_pos = self.robot.body_xyz
+    base_pos = [0, 0, 0]
+    if (hasattr(self, 'robot')):
+      if (hasattr(self.robot, 'body_xyz')):
+        base_pos = self.robot.body_xyz

-		view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-			cameraTargetPosition=base_pos,
-			distance=self._cam_dist,
-			yaw=self._cam_yaw,
-			pitch=self._cam_pitch,
-			roll=0,
-			upAxisIndex=2)
-		proj_matrix = self._p.computeProjectionMatrixFOV(
-			fov=60, aspect=float(self._render_width)/self._render_height,
-			nearVal=0.1, farVal=100.0)
-		(_, _, px, _, _) = self._p.getCameraImage(
-		width=self._render_width, height=self._render_height, viewMatrix=view_matrix,
-			projectionMatrix=proj_matrix,
-			renderer=pybullet.ER_BULLET_HARDWARE_OPENGL
-			)
-		rgb_array = np.array(px)
-		rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
-		rgb_array = rgb_array[:, :, :3]
-		return rgb_array
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
+                                                            distance=self._cam_dist,
+                                                            yaw=self._cam_yaw,
+                                                            pitch=self._cam_pitch,
+                                                            roll=0,
+                                                            upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
+                                                     aspect=float(self._render_width) /
+                                                     self._render_height,
+                                                     nearVal=0.1,
+                                                     farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
+                                              height=self._render_height,
+                                              viewMatrix=view_matrix,
+                                              projectionMatrix=proj_matrix,
+                                              renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array

+  def close(self):
+    if (self.ownsPhysicsClient):
+      if (self.physicsClientId >= 0):
+        self._p.disconnect()
+    self.physicsClientId = -1

-	def close(self):
-		if (self.ownsPhysicsClient):
-			if (self.physicsClientId>=0):
-				self._p.disconnect()
-		self.physicsClientId = -1
+  def HUD(self, state, a, done):
+    pass

-	def HUD(self, state, a, done):
-		pass
+  # def step(self, *args, **kwargs):
+  # 	if self.isRender:
+  # 		base_pos=[0,0,0]
+  # 		if (hasattr(self,'robot')):
+  # 			if (hasattr(self.robot,'body_xyz')):
+  # 				base_pos = self.robot.body_xyz
+  # 				# Keep the previous orientation of the camera set by the user.
+  # 				#[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
+  # 				self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
+  #
+  #
+  # 	return self.step(*args, **kwargs)
+  if parse_version(gym.__version__) < parse_version('0.9.6'):
+    _render = render
+    _reset = reset
+    _seed = seed

-	# def step(self, *args, **kwargs):
-	# 	if self.isRender:
-	# 		base_pos=[0,0,0]
-	# 		if (hasattr(self,'robot')):
-	# 			if (hasattr(self.robot,'body_xyz')):
-	# 				base_pos = self.robot.body_xyz
-	# 				# Keep the previous orientation of the camera set by the user.
-	# 				#[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
-	# 				self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
-	#
-	#
-	# 	return self.step(*args, **kwargs)
-	if parse_version(gym.__version__) < parse_version('0.9.6'):
-		_render = render
-		_reset = reset
-		_seed = seed

 class Camera:
-	def __init__(self):
-		pass

-	def move_and_look_at(self,i,j,k,x,y,z):
-		lookat = [x,y,z]
-		distance = 10
-		yaw = 10
-		self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
+  def __init__(self):
+    pass
+
+  def move_and_look_at(self, i, j, k, x, y, z):
+    lookat = [x, y, z]
+    distance = 10
+    yaw = 10
+    self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
--- a/examples/pybullet/gym/pybullet_envs/examples/dominoes.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/dominoes.py
@@ -7,31 +7,38 @@ import time
 p = bc.BulletClient(connection_mode=pybullet.GUI)
 p.setAdditionalSearchPath(pd.getDataPath())
 p.loadURDF("plane_transparent.urdf", useMaximalCoordinates=True)
-p#.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)
+p  #.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)

+p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION, 1)
+p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 0)

-p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION,1)
-p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,0)
+y2z = p.getQuaternionFromEuler([0, 0, 1.57])
+meshScale = [1, 1, 1]
+visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,
+                                    fileName="domino/domino.obj",
+                                    rgbaColor=[1, 1, 1, 1],
+                                    specularColor=[0.4, .4, 0],
+                                    visualFrameOrientation=y2z,
+                                    meshScale=meshScale)

-y2z = p.getQuaternionFromEuler([0,0,1.57])
-meshScale = [1,1,1]
-visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,fileName="domino/domino.obj", rgbaColor=[1,1,1,1], specularColor=[0.4,.4,0], visualFrameOrientation=y2z, meshScale=meshScale)
+boxDimensions = [0.5 * 0.00635, 0.5 * 0.0254, 0.5 * 0.0508]
+collisionShapeId = p.createCollisionShape(p.GEOM_BOX, halfExtents=boxDimensions)

-boxDimensions = [0.5*0.00635, 0.5*0.0254, 0.5*0.0508]
-collisionShapeId = p.createCollisionShape(p.GEOM_BOX,halfExtents=boxDimensions)
-
-
-for j in range (12):
-  print("j=",j)
-  for i in range (35):
+for j in range(12):
+  print("j=", j)
+  for i in range(35):
    #p.loadURDF("domino/domino.urdf",[i*0.04,0, 0.06])
-    p.createMultiBody(baseMass=0.025,baseCollisionShapeIndex = collisionShapeId,baseVisualShapeIndex = visualShapeId, basePosition = [i*0.04,j*0.05, 0.06], useMaximalCoordinates=True)
-  
-p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
+    p.createMultiBody(baseMass=0.025,
+                      baseCollisionShapeIndex=collisionShapeId,
+                      baseVisualShapeIndex=visualShapeId,
+                      basePosition=[i * 0.04, j * 0.05, 0.06],
+                      useMaximalCoordinates=True)

-p.setGravity(0,0,-9.8)
+p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 1)
+
+p.setGravity(0, 0, -9.8)
 p.setRealTimeSimulation(1)
 while (1):
-  p.setGravity(0,0,-9.8)
+  p.setGravity(0, 0, -9.8)
  #p.stepSimulation(1./100.)
-  time.sleep(1./240.)
+  time.sleep(1. / 240.)
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HalfCheetahBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HalfCheetahBulletEnv_v0_2017may.py
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HopperBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HopperBulletEnv_v0_2017may.py
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HumanoidBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HumanoidBulletEnv_v0_2017may.py
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HumanoidFlagrunHarderBulletEnv_v1_2017jul.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_HumanoidFlagrunHarderBulletEnv_v1_2017jul.py
--- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_InvertedDoublePendulumBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_InvertedDoublePendulumBulletEnv_v0_2017may.py
@@ -2,179 +2,582 @@
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
-os.sys.path.insert(0,parentdir)
+os.sys.path.insert(0, parentdir)

 import gym
 import numpy as np
 import pybullet_envs
 import time

+
 def relu(x):
-    return np.maximum(x, 0)
+  return np.maximum(x, 0)
+

 class SmallReactivePolicy:
-    "Simple multi-layer perceptron policy, no internal state"
-    def __init__(self, observation_space, action_space):
-        assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
-        assert weights_dense2_w.shape == (64.0, 32.0)
-        assert weights_final_w.shape  == (32.0, action_space.shape[0])
+  "Simple multi-layer perceptron policy, no internal state"
+
+  def __init__(self, observation_space, action_space):
+    assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
+    assert weights_dense2_w.shape == (64.0, 32.0)
+    assert weights_final_w.shape == (32.0, action_space.shape[0])
+
+  def act(self, ob):
+    x = ob
+    x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
+    x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
+    x = np.dot(x, weights_final_w) + weights_final_b
+    return x

-    def act(self, ob):
-        x = ob
-        x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
-        x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
-        x = np.dot(x, weights_final_w) + weights_final_b
-        return x

 def main():
-    env = gym.make("InvertedDoublePendulumBulletEnv-v0")
-    env.render(mode="human")
-   
-    pi = SmallReactivePolicy(env.observation_space, env.action_space)
+  env = gym.make("InvertedDoublePendulumBulletEnv-v0")
+  env.render(mode="human")
+
+  pi = SmallReactivePolicy(env.observation_space, env.action_space)
+
+  while 1:
+    frame = 0
+    score = 0
+    restart_delay = 0
+    obs = env.reset()

    while 1:
-        frame = 0
-        score = 0
-        restart_delay = 0
-        obs = env.reset()
+      time.sleep(1. / 60.)
+      a = pi.act(obs)
+      obs, r, done, _ = env.step(a)
+      score += r
+      frame += 1
+      still_open = env.render("human")
+      if still_open == False:
+        return
+      if not done: continue
+      if restart_delay == 0:
+        print("score=%0.2f in %i frames" % (score, frame))
+        restart_delay = 60 * 2  # 2 sec at 60 fps
+      else:
+        restart_delay -= 1
+        if restart_delay > 0: continue
+        break

-        while 1:
-            time.sleep(1./60.)
-            a = pi.act(obs)
-            obs, r, done, _ = env.step(a)
-            score += r
-            frame += 1
-            still_open = env.render("human")
-            if still_open==False:
-                return
-            if not done: continue
-            if restart_delay==0:
-                print("score=%0.2f in %i frames" % (score, frame))
-                restart_delay = 60*2  # 2 sec at 60 fps
-            else:
-                restart_delay -= 1
-                if restart_delay > 0: continue
-                break

-weights_dense1_w = np.array([
-[ -0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635, +0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103, -0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873, +0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775, +0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772, -0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238, +0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136, +0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053],
-[ +0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329, +0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789, +0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111, -0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025, -0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544, -0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160, -0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338, +0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542, +0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497],
-[ +0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275, -0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475, +0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116, +0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376, -0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006, +0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254, -0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988, -0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917, +0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461],
-[ -0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620, -0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623, -0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583, -0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976, +0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028, -0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796, -0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178, +0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677, -0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499],
-[ -0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923, +0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939, +0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605, +0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779, -1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488, +0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217, +0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856, +0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319, -1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379],
-[ -0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654, -0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597, +0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612, -0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645, -0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316, -0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299, +0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317, +0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417, +0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510],
-[ -0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278, -0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225, -0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054, -0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766, -0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353, +0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276, -0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668, -0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096, +0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095],
-[ -0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748, -0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466, +0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886, +0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910, +1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375, +0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775, +0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551, +2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858, -0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526],
-[ -0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696, +0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166, +0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092, -0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364, +0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155, +1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972, +0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436, +0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860, -0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653]
+# yapf: disable
+weights_dense1_w = np.array(
+    [[
+        -0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635,
+        +0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103,
+        -0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873,
+        +0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775,
+        +0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772,
+        -0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238,
+        +0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136,
+        +0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053
+    ],
+     [
+         +0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329,
+         +0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789,
+         +0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111,
+         -0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025,
+         -0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544,
+         -0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160,
+         -0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338,
+         +0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542,
+         +0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497
+     ],
+     [
+         +0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275,
+         -0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475,
+         +0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116,
+         +0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376,
+         -0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006,
+         +0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254,
+         -0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988,
+         -0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917,
+         +0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461
+     ],
+     [
+         -0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620,
+         -0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623,
+         -0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583,
+         -0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976,
+         +0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028,
+         -0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796,
+         -0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178,
+         +0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677,
+         -0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499
+     ],
+     [
+         -0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923,
+         +0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939,
+         +0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605,
+         +0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779,
+         -1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488,
+         +0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217,
+         +0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856,
+         +0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319,
+         -1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379
+     ],
+     [
+         -0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654,
+         -0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597,
+         +0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612,
+         -0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645,
+         -0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316,
+         -0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299,
+         +0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317,
+         +0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417,
+         +0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510
+     ],
+     [
+         -0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278,
+         -0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225,
+         -0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054,
+         -0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766,
+         -0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353,
+         +0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276,
+         -0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668,
+         -0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096,
+         +0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095
+     ],
+     [
+         -0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748,
+         -0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466,
+         +0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886,
+         +0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910,
+         +1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375,
+         +0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775,
+         +0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551,
+         +2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858,
+         -0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526
+     ],
+     [
+         -0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696,
+         +0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166,
+         +0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092,
+         -0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364,
+         +0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155,
+         +1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972,
+         +0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436,
+         +0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860,
+         -0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653
+     ]])
+
+weights_dense1_b = np.array([
+    -0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653,
+    -0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766,
+    -0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020,
+    +0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861,
+    -0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341,
+    +0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965,
+    +0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015,
+    -0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002
 ])

-weights_dense1_b = np.array([ -0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653, -0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766, -0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020, +0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861, -0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341, +0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965, +0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015, -0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002])
-
 weights_dense2_w = np.array([
-[ +0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487, -0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587, +0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651, +0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464],
-[ -0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537, +0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456, -1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150, +0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069],
-[ -0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368, -0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538, -0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137, +0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103],
-[ +0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471, -0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623, -0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380, +0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408],
-[ -0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138, -0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430, +0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884, +0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970],
-[ -0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775, +0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001, -2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046, -0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834],
-[ +0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120, -0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580, -0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292, +0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216],
-[ -0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024, -0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753, -0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135, -0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326],
-[ -0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622, -0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433, +0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523, -0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888],
-[ -0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912, -0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493, +0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154, +0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376],
-[ +0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421, +0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814, -0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114, +0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472],
-[ -0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903, +0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340, -0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040, +0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559],
-[ -2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148, +0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294, -0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810, -0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121],
-[ +0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243, -0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378, +0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373, -0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342],
-[ -0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869, -0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602, +0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859, +0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606],
-[ -0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674, +0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866, +0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561, -0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292],
-[ +0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751, -0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726, -0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577, +0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703],
-[ +0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834, -0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480, -1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690, +0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458],
-[ -0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322, -0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259, +0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802, -0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385],
-[ -0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096, -0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244, -0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607, -0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395],
-[ -0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933, -0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092, +0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312, -1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362],
-[ -0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326, +0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396, -0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956, -0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031],
-[ -0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019, +0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483, +0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817, +0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293],
-[ -0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420, -0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275, +0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132, +0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180],
-[ +0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570, +0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583, -0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517, +0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957],
-[ -0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281, -0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552, +0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115, +0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348],
-[ +0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334, -0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206, +0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920, +0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610],
-[ -0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155, +0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819, -0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387, -0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126],
-[ -0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029, +0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843, +0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216, -0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118],
-[ +0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258, -0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226, -0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867, +0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963],
-[ -0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855, -0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356, -0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960, +0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031],
-[ -0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370, +0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931, -1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618, +0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057],
-[ -0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112, +0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134, -0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309, -0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595],
-[ -0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012, -0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965, -0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998, -0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348],
-[ -0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324, -0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282, -0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981, +0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953],
-[ -1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996, +0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737, +0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547, -0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028],
-[ -0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505, -0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640, +0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320, -0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598],
-[ +0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197, +0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527, -0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201, +0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352],
-[ +0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081, -0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971, +0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751, -0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704],
-[ +0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459, -0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804, +0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337, +0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116],
-[ -0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673, -0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075, +0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300, +0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208],
-[ -0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458, +0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083, +0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889, -0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233],
-[ -1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200, +0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202, -0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213, -0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751],
-[ -0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622, -0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433, -0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791, -0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889],
-[ -0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755, -0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882, -1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337, +0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094],
-[ -0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384, +0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254, -1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178, -0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780],
-[ -0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675, +0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719, +0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575, +0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501],
-[ +0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824, -0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309, -1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339, -0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057],
-[ -1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777, +0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094, +0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276, +0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592],
-[ -0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112, +0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131, +0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610, +0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988],
-[ +0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340, -0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615, -0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777, -0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007],
-[ -0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609, -0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724, -0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266, -0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877],
-[ +0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262, -0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311, +0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197, +0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254],
-[ -0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151, -0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641, -0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869, +0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954],
-[ -0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488, -0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223, -0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526, -0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021],
-[ -1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977, +0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785, +0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432, +0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737],
-[ +0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536, +0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048, +0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776, -0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029],
-[ -0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881, -0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195, -0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149, +0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349],
-[ +0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919, -0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240, +0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926, +0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688],
-[ +0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318, +0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741, +0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212, +0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240],
-[ -0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519, -0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904, -0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612, +0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115],
-[ -0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213, -0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276, +0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660, -0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325],
-[ +0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695, +0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828, -0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883, +0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273],
-[ +0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365, -0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427, -0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372, +0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229]
+    [
+        +0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487,
+        -0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587,
+        +0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651,
+        +0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464
+    ],
+    [
+        -0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537,
+        +0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456,
+        -1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150,
+        +0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069
+    ],
+    [
+        -0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368,
+        -0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538,
+        -0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137,
+        +0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103
+    ],
+    [
+        +0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471,
+        -0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623,
+        -0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380,
+        +0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408
+    ],
+    [
+        -0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138,
+        -0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430,
+        +0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884,
+        +0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970
+    ],
+    [
+        -0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775,
+        +0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001,
+        -2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046,
+        -0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834
+    ],
+    [
+        +0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120,
+        -0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580,
+        -0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292,
+        +0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216
+    ],
+    [
+        -0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024,
+        -0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753,
+        -0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135,
+        -0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326
+    ],
+    [
+        -0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622,
+        -0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433,
+        +0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523,
+        -0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888
+    ],
+    [
+        -0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912,
+        -0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493,
+        +0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154,
+        +0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376
+    ],
+    [
+        +0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421,
+        +0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814,
+        -0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114,
+        +0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472
+    ],
+    [
+        -0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903,
+        +0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340,
+        -0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040,
+        +0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559
+    ],
+    [
+        -2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148,
+        +0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294,
+        -0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810,
+        -0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121
+    ],
+    [
+        +0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243,
+        -0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378,
+        +0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373,
+        -0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342
+    ],
+    [
+        -0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869,
+        -0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602,
+        +0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859,
+        +0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606
+    ],
+    [
+        -0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674,
+        +0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866,
+        +0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561,
+        -0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292
+    ],
+    [
+        +0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751,
+        -0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726,
+        -0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577,
+        +0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703
+    ],
+    [
+        +0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834,
+        -0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480,
+        -1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690,
+        +0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458
+    ],
+    [
+        -0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322,
+        -0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259,
+        +0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802,
+        -0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385
+    ],
+    [
+        -0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096,
+        -0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244,
+        -0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607,
+        -0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395
+    ],
+    [
+        -0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933,
+        -0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092,
+        +0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312,
+        -1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362
+    ],
+    [
+        -0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326,
+        +0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396,
+        -0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956,
+        -0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031
+    ],
+    [
+        -0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019,
+        +0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483,
+        +0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817,
+        +0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293
+    ],
+    [
+        -0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420,
+        -0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275,
+        +0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132,
+        +0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180
+    ],
+    [
+        +0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570,
+        +0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583,
+        -0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517,
+        +0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957
+    ],
+    [
+        -0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281,
+        -0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552,
+        +0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115,
+        +0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348
+    ],
+    [
+        +0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334,
+        -0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206,
+        +0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920,
+        +0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610
+    ],
+    [
+        -0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155,
+        +0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819,
+        -0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387,
+        -0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126
+    ],
+    [
+        -0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029,
+        +0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843,
+        +0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216,
+        -0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118
+    ],
+    [
+        +0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258,
+        -0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226,
+        -0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867,
+        +0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963
+    ],
+    [
+        -0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855,
+        -0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356,
+        -0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960,
+        +0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031
+    ],
+    [
+        -0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370,
+        +0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931,
+        -1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618,
+        +0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057
+    ],
+    [
+        -0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112,
+        +0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134,
+        -0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309,
+        -0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595
+    ],
+    [
+        -0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012,
+        -0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965,
+        -0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998,
+        -0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348
+    ],
+    [
+        -0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324,
+        -0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282,
+        -0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981,
+        +0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953
+    ],
+    [
+        -1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996,
+        +0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737,
+        +0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547,
+        -0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028
+    ],
+    [
+        -0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505,
+        -0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640,
+        +0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320,
+        -0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598
+    ],
+    [
+        +0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197,
+        +0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527,
+        -0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201,
+        +0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352
+    ],
+    [
+        +0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081,
+        -0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971,
+        +0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751,
+        -0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704
+    ],
+    [
+        +0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459,
+        -0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804,
+        +0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337,
+        +0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116
+    ],
+    [
+        -0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673,
+        -0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075,
+        +0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300,
+        +0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208
+    ],
+    [
+        -0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458,
+        +0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083,
+        +0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889,
+        -0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233
+    ],
+    [
+        -1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200,
+        +0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202,
+        -0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213,
+        -0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751
+    ],
+    [
+        -0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622,
+        -0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433,
+        -0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791,
+        -0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889
+    ],
+    [
+        -0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755,
+        -0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882,
+        -1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337,
+        +0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094
+    ],
+    [
+        -0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384,
+        +0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254,
+        -1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178,
+        -0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780
+    ],
+    [
+        -0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675,
+        +0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719,
+        +0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575,
+        +0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501
+    ],
+    [
+        +0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824,
+        -0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309,
+        -1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339,
+        -0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057
+    ],
+    [
+        -1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777,
+        +0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094,
+        +0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276,
+        +0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592
+    ],
+    [
+        -0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112,
+        +0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131,
+        +0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610,
+        +0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988
+    ],
+    [
+        +0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340,
+        -0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615,
+        -0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777,
+        -0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007
+    ],
+    [
+        -0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609,
+        -0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724,
+        -0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266,
+        -0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877
+    ],
+    [
+        +0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262,
+        -0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311,
+        +0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197,
+        +0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254
+    ],
+    [
+        -0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151,
+        -0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641,
+        -0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869,
+        +0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954
+    ],
+    [
+        -0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488,
+        -0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223,
+        -0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526,
+        -0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021
+    ],
+    [
+        -1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977,
+        +0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785,
+        +0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432,
+        +0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737
+    ],
+    [
+        +0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536,
+        +0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048,
+        +0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776,
+        -0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029
+    ],
+    [
+        -0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881,
+        -0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195,
+        -0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149,
+        +0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349
+    ],
+    [
+        +0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919,
+        -0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240,
+        +0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926,
+        +0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688
+    ],
+    [
+        +0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318,
+        +0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741,
+        +0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212,
+        +0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240
+    ],
+    [
+        -0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519,
+        -0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904,
+        -0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612,
+        +0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115
+    ],
+    [
+        -0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213,
+        -0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276,
+        +0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660,
+        -0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325
+    ],
+    [
+        +0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695,
+        +0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828,
+        -0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883,
+        +0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273
+    ],
+    [
+        +0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365,
+        -0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427,
+        -0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372,
+        +0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229
+    ]
 ])

-weights_dense2_b = np.array([ -0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403, -0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473, +0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726, -0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111])
-
-weights_final_w = np.array([
-[ +1.0397],
-[ +0.7049],
-[ -0.2128],
-[ +0.2172],
-[ +0.3027],
-[ -0.1991],
-[ +0.3398],
-[ -0.5932],
-[ -0.1439],
-[ -0.0236],
-[ +0.5679],
-[ +0.8571],
-[ +0.1934],
-[ -0.1652],
-[ +0.6933],
-[ -0.5510],
-[ -1.0587],
-[ +0.6996],
-[ -0.5009],
-[ -0.4000],
-[ -0.6958],
-[ +0.7716],
-[ -0.5342],
-[ -0.5095],
-[ +0.3040],
-[ -1.1986],
-[ -0.4900],
-[ +0.7726],
-[ +0.5871],
-[ -0.2533],
-[ +0.2633],
-[ -0.0004]
+weights_dense2_b = np.array([
+    -0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403,
+    -0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473,
+    +0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726,
+    -0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111
 ])

-weights_final_b = np.array([ +0.0190])
+weights_final_w = np.array([[+1.0397], [+0.7049], [-0.2128], [+0.2172],
+                            [+0.3027], [-0.1991], [+0.3398], [-0.5932],
+                            [-0.1439], [-0.0236], [+0.5679], [+0.8571],
+                            [+0.1934], [-0.1652], [+0.6933], [-0.5510],
+                            [-1.0587], [+0.6996], [-0.5009], [-0.4000],
+                            [-0.6958], [+0.7716], [-0.5342], [-0.5095],
+                            [+0.3040], [-1.1986], [-0.4900], [+0.7726],
+                            [+0.5871], [-0.2533], [+0.2633], [-0.0004]])

-if __name__=="__main__":
-    main()
+weights_final_b = np.array([+0.0190])
+# yapf: enable
+
+if __name__ == "__main__":
+  main()
--- a/Show More
+++ b/Show More