Merge remote-tracking branch 'upstream/master'

2017-09-27 15:20:11 -07:00
parent 113e103bc2 8e08a9ff16
commit 85be3b43dd
75 changed files with 6253 additions and 767 deletions
--- a/examples/pybullet/gym/pybullet_envs/agents/init.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/init.py
@@ -0,0 +1,2 @@
+
+
--- a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
@@ -0,0 +1,110 @@
+"""The PPO training configuration file for minitaur environments."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+from agents import ppo
+from agents.scripts import networks
+from pybullet_envs.bullet import minitaur_gym_env
+from pybullet_envs.bullet import minitaur_env_randomizer
+import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
+import pybullet_envs
+
+
+# pylint: disable=unused-variable
+def default():
+  """The default configurations."""
+  # General
+  algorithm = ppo.PPOAlgorithm
+  num_agents = 25
+  eval_episodes = 25
+  use_gpu = False
+  # Network
+  network = networks.ForwardGaussianPolicy
+  weight_summaries = dict(
+      all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
+  policy_layers = 200, 100
+  value_layers = 200, 100
+  init_mean_factor = 0.2
+  init_logstd = -1
+  network_config = dict()
+  # Optimization
+  update_every = 25
+  policy_optimizer = 'AdamOptimizer'
+  value_optimizer = 'AdamOptimizer'
+  update_epochs_policy = 25
+  update_epochs_value = 25
+  value_lr = 1e-3
+  policy_lr = 1e-4
+  # Losses
+  discount = 0.99
+  kl_target = 1e-2
+  kl_cutoff_factor = 2
+  kl_cutoff_coef = 1000
+  kl_init_penalty = 1
+  return locals()
+
+
+def pybullet_pendulum():
+  locals().update(default())
+  env = 'InvertedPendulumBulletEnv-v0'
+  max_length = 200
+  steps = 5e7  # 50M
+  return locals()
+
+def pybullet_doublependulum():
+  locals().update(default())
+  env = 'InvertedDoublePendulumBulletEnv-v0'
+  max_length = 1000
+  steps = 5e7  # 50M
+  return locals()
+
+def pybullet_pendulumswingup():
+  locals().update(default())
+  env = 'InvertedPendulumSwingupBulletEnv-v0'
+  max_length = 1000
+  steps = 5e7  # 50M
+  return locals()
+
+def pybullet_cheetah():
+  """Configuration for MuJoCo's half cheetah task."""
+  locals().update(default())
+  # Environment
+  env = 'HalfCheetahBulletEnv-v0'
+  max_length = 1000
+  steps = 1e8  # 100M
+  return locals()
+
+def pybullet_ant():
+  locals().update(default())
+  env = 'AntBulletEnv-v0'
+  max_length = 1000
+  steps = 5e7  # 50M
+  return locals()
+
+def pybullet_racecar():
+  """Configuration for Bullet MIT Racecar task."""
+  locals().update(default())
+  # Environment
+  env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
+  max_length = 10
+  steps = 1e7  # 10M
+  return locals()
+
+
+def pybullet_minitaur():
+  """Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
+  locals().update(default())
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  env = functools.partial(
+      minitaur_gym_env.MinitaurBulletEnv,
+      accurate_motor_model_enabled=True,
+      motor_overheat_protection=True,
+      pd_control_enabled=True,
+      env_randomizer=randomizer,
+      render=False)
+  max_length = 1000
+  steps = 3e7  # 30M
+  return locals()
+
+
--- a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
@@ -0,0 +1,48 @@
+r"""Script to use Proximal Policy Gradient for the minitaur environments.
+
+Run:
+ python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+import os
+import tensorflow as tf
+
+from agents import tools
+from agents.scripts import train
+from agents.scripts import utility
+from . import config_ppo
+
+
+flags = tf.app.flags
+FLAGS = tf.app.flags.FLAGS
+
+flags.DEFINE_string(
+    'logdir', None,
+    'Base directory to store logs.')
+flags.DEFINE_string(
+    'config', None,
+    'Configuration to execute.')
+flags.DEFINE_string(
+    'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
+    'Sub directory to store logs.')
+
+
+def main(_):
+  """Create or load configuration and launch the trainer."""
+  config = tools.AttrDict(getattr(config_ppo, FLAGS.config)())
+  logdir = FLAGS.logdir and os.path.join(
+      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))
+  utility.save_config(config, logdir)
+  for score in train.train(config, env_processes=True):
+    tf.logging.info(str(score))
+
+
+if __name__ == '__main__':
+  tf.app.run()
+
--- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
@@ -0,0 +1,42 @@
+
+r"""Script to visualize the trained PPO agent.
+
+python -m pybullet_envs.agents.visualize \
+--logdir=ppo
+--outdir=/tmp/video/
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from agents.scripts import visualize
+
+
+flags = tf.app.flags
+FLAGS = tf.app.flags.FLAGS
+flags.DEFINE_string("logdir", None,
+                    "Directory to the checkpoint of a training run.")
+flags.DEFINE_string("outdir", None,
+                    "Local directory for storing the monitoring outdir.")
+flags.DEFINE_string("checkpoint", None,
+                    "Checkpoint name to load; defaults to most recent.")
+flags.DEFINE_integer("num_agents", 1,
+                     "How many environments to step in parallel.")
+flags.DEFINE_integer("num_episodes", 1, "Minimum number of episodes to render.")
+flags.DEFINE_boolean(
+    "env_processes", False,
+    "Step environments in separate processes to circumvent the GIL.")
+
+
+def main(_):
+  visualize.visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents,
+                      FLAGS.num_episodes, FLAGS.checkpoint, FLAGS.env_processes)
+
+
+if __name__ == "__main__":
+  tf.app.run()
+
--- a/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
@@ -0,0 +1,25 @@
+"""Abstract base class for environment randomizer."""
+
+import abc
+
+
+class EnvRandomizerBase(object):
+  """Abstract base class for environment randomizer.
+
+  An EnvRandomizer is called in environment.reset(). It will
+  randomize physical parameters of the objects in the simulation.
+  The physical parameters will be fixed for that episode and be
+  randomized again in the next environment.reset().
+  """
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def randomize_env(self, env):
+    """Randomize the simulated_objects in the environment.
+
+    Args:
+      env: The environment to be randomized.
+    """
+    pass
+
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
@@ -0,0 +1,68 @@
+"""Randomize the minitaur_gym_env when reset() is called."""
+import random
+import numpy as np
+from . import env_randomizer_base
+
+# Relative range.
+MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2)  # 0.2 means 20%
+MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2)  # 0.2 means 20%
+# Absolute range.
+BATTERY_VOLTAGE_RANGE = (14.8, 16.8)  # Unit: Volt
+MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01)  # Unit: N*m*s/rad (torque/angular vel)
+MINITAUR_LEG_FRICTION = (0.8, 1.5)  # Unit: dimensionless
+
+
+class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
+  """A randomizer that change the minitaur_gym_env during every reset."""
+
+  def __init__(self,
+               minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
+               minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
+               battery_voltage_range=BATTERY_VOLTAGE_RANGE,
+               motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
+    self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
+    self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
+    self._battery_voltage_range = battery_voltage_range
+    self._motor_viscous_damping_range = motor_viscous_damping_range
+
+  def randomize_env(self, env):
+    self._randomize_minitaur(env.minitaur)
+
+  def _randomize_minitaur(self, minitaur):
+    """Randomize various physical properties of minitaur.
+
+    It randomizes the mass/inertia of the base, mass/inertia of the legs,
+    friction coefficient of the feet, the battery voltage and the motor damping
+    at each reset() of the environment.
+
+    Args:
+      minitaur: the Minitaur instance in minitaur_gym_env environment.
+    """
+    base_mass = minitaur.GetBaseMassFromURDF()
+    randomized_base_mass = random.uniform(
+        base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
+        base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
+    minitaur.SetBaseMass(randomized_base_mass)
+
+    leg_masses = minitaur.GetLegMassesFromURDF()
+    leg_masses_lower_bound = np.array(leg_masses) * (
+        1.0 + self._minitaur_leg_mass_err_range[0])
+    leg_masses_upper_bound = np.array(leg_masses) * (
+        1.0 + self._minitaur_leg_mass_err_range[1])
+    randomized_leg_masses = [
+        np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
+        for i in range(len(leg_masses))
+    ]
+    minitaur.SetLegMasses(randomized_leg_masses)
+
+    randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
+                                                BATTERY_VOLTAGE_RANGE[1])
+    minitaur.SetBatteryVoltage(randomized_battery_voltage)
+
+    randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
+                                              MOTOR_VISCOUS_DAMPING_RANGE[1])
+    minitaur.SetMotorViscousDamping(randomized_motor_damping)
+
+    randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
+                                              MINITAUR_LEG_FRICTION[1])
+    minitaur.SetFootFriction(randomized_foot_friction)
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -19,6 +19,7 @@ from . import bullet_client
 from . import minitaur
 import os
 import pybullet_data
+from . import minitaur_env_randomizer

 NUM_SUBSTEPS = 5
 NUM_MOTORS = 8
@@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
               on_rack=False,
               render=False,
               kd_for_pd_controllers=0.3,
-               env_randomizer=None):
+               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
    """Initialize the minitaur gym environment.

    Args:
--- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
@@ -0,0 +1,164 @@
+r"""An example to run of the minitaur gym environment with sine gaits.
+"""
+
+import math
+import numpy as np
+from pybullet_envs.bullet import minitaur_gym_env
+import argparse
+from pybullet_envs.bullet import minitaur_env_randomizer
+
+def ResetPoseExample():
+  """An example that the minitaur stands still using the reset pose."""
+  steps = 1000
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=True,
+      accurate_motor_model_enabled=True,
+      motor_overheat_protection=True,
+      env_randomizer =  randomizer,
+      hard_reset=False)
+  action = [math.pi / 2] * 8
+  for _ in range(steps):
+    _, _, done, _ = environment.step(action)
+    if done:
+      break
+  environment.reset()
+
+def MotorOverheatExample():
+  """An example of minitaur motor overheat protection is triggered.
+
+  The minitaur is leaning forward and the motors are getting obove threshold
+  torques. The overheat protection will be triggered in ~1 sec.
+  """
+
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      motor_overheat_protection=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.20,
+      motor_kd=0.00,
+      on_rack=False)
+
+  action = [2.0] * 8
+  for i in range(8):
+    action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
+
+  steps = 500
+  actions_and_observations = []
+  for step_counter in range(steps):
+    # Matches the internal timestep.
+    time_step = 0.01
+    t = step_counter * time_step
+    current_row = [t]
+    current_row.extend(action)
+
+    observation, _, _, _ = environment.step(action)
+    current_row.extend(observation.tolist())
+    actions_and_observations.append(current_row)
+  environment.reset()
+
+def SineStandExample():
+  """An example of minitaur standing and squatting on the floor.
+
+  To validate the accurate motor model we command the robot and sit and stand up
+  periodically in both simulation and experiment. We compare the measured motor
+  trajectories, torques and gains.
+  """
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      motor_overheat_protection=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.20,
+      motor_kd=0.02,
+      on_rack=False)
+  steps = 1000
+  amplitude = 0.5
+  speed = 3
+
+  actions_and_observations = []
+
+  for step_counter in range(steps):
+    # Matches the internal timestep.
+    time_step = 0.01
+    t = step_counter * time_step
+    current_row = [t]
+
+    action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
+    current_row.extend(action)
+
+    observation, _, _, _ = environment.step(action)
+    current_row.extend(observation.tolist())
+    actions_and_observations.append(current_row)
+  
+  environment.reset()
+
+
+def SinePolicyExample():
+  """An example of minitaur walking with a sine gait."""
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=True,
+      hard_reset=False,
+      env_randomizer =  randomizer,
+      on_rack=False)
+  sum_reward = 0
+  steps = 20000
+  amplitude_1_bound = 0.5
+  amplitude_2_bound = 0.5
+  speed = 40
+
+  for step_counter in range(steps):
+    time_step = 0.01
+    t = step_counter * time_step
+
+    amplitude1 = amplitude_1_bound
+    amplitude2 = amplitude_2_bound
+    steering_amplitude = 0
+    if t < 10:
+      steering_amplitude = 0.5
+    elif t < 20:
+      steering_amplitude = -0.5
+    else:
+      steering_amplitude = 0
+
+    # Applying asymmetrical sine gaits to different legs can steer the minitaur.
+    a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
+    a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
+    a3 = math.sin(t * speed) * amplitude2
+    a4 = math.sin(t * speed + math.pi) * amplitude2
+    action = [a1, a2, a2, a1, a3, a4, a4, a3]
+    _, reward, done, _ = environment.step(action)
+    sum_reward += reward
+    if done:
+      break
+  environment.reset()
+
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int,  default=0)
+    args = parser.parse_args()
+    print("--env=" + str(args.env))
+      
+    if (args.env == 0):
+      SinePolicyExample()
+    if (args.env == 1):
+      SineStandExample()
+    if (args.env == 2):
+      ResetPoseExample()
+    if (args.env == 3):
+      MotorOverheatExample()
+
+if __name__ == '__main__':
+    main()
+
+