add minitaur environment randomizer, enable it by default

this improves PPO training ( more stable)
2017-09-27 09:07:21 -07:00
parent 12f28c5f76
commit c37919604f
4 changed files with 262 additions and 1 deletions
--- a/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py
@@ -0,0 +1,25 @@
 """Abstract base class for environment randomizer."""
 import abc
 class EnvRandomizerBase(object):
  """Abstract base class for environment randomizer.
  An EnvRandomizer is called in environment.reset(). It will
  randomize physical parameters of the objects in the simulation.
  The physical parameters will be fixed for that episode and be
  randomized again in the next environment.reset().
  """
  __metaclass__ = abc.ABCMeta
  @abc.abstractmethod
  def randomize_env(self, env):
    """Randomize the simulated_objects in the environment.
    Args:
      env: The environment to be randomized.
    """
    pass
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py
@@ -0,0 +1,68 @@
 """Randomize the minitaur_gym_env when reset() is called."""
 import random
 import numpy as np
 from . import env_randomizer_base
 # Relative range.
 MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2)  # 0.2 means 20%
 MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2)  # 0.2 means 20%
 # Absolute range.
 BATTERY_VOLTAGE_RANGE = (14.8, 16.8)  # Unit: Volt
 MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01)  # Unit: N*m*s/rad (torque/angular vel)
 MINITAUR_LEG_FRICTION = (0.8, 1.5)  # Unit: dimensionless
 class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
  """A randomizer that change the minitaur_gym_env during every reset."""
  def __init__(self,
               minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
               minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
               battery_voltage_range=BATTERY_VOLTAGE_RANGE,
               motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
    self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
    self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
    self._battery_voltage_range = battery_voltage_range
    self._motor_viscous_damping_range = motor_viscous_damping_range
  def randomize_env(self, env):
    self._randomize_minitaur(env.minitaur)
  def _randomize_minitaur(self, minitaur):
    """Randomize various physical properties of minitaur.
    It randomizes the mass/inertia of the base, mass/inertia of the legs,
    friction coefficient of the feet, the battery voltage and the motor damping
    at each reset() of the environment.
    Args:
      minitaur: the Minitaur instance in minitaur_gym_env environment.
    """
    base_mass = minitaur.GetBaseMassFromURDF()
    randomized_base_mass = random.uniform(
        base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
        base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
    minitaur.SetBaseMass(randomized_base_mass)
    leg_masses = minitaur.GetLegMassesFromURDF()
    leg_masses_lower_bound = np.array(leg_masses) * (
        1.0 + self._minitaur_leg_mass_err_range[0])
    leg_masses_upper_bound = np.array(leg_masses) * (
        1.0 + self._minitaur_leg_mass_err_range[1])
    randomized_leg_masses = [
        np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
        for i in range(len(leg_masses))
    ]
    minitaur.SetLegMasses(randomized_leg_masses)
    randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
                                                BATTERY_VOLTAGE_RANGE[1])
    minitaur.SetBatteryVoltage(randomized_battery_voltage)
    randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
                                              MOTOR_VISCOUS_DAMPING_RANGE[1])
    minitaur.SetMotorViscousDamping(randomized_motor_damping)
    randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
                                              MINITAUR_LEG_FRICTION[1])
    minitaur.SetFootFriction(randomized_foot_friction)
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -19,6 +19,7 @@ from . import bullet_client
 from . import minitaur
 import os
 import pybullet_data
 from . import minitaur_env_randomizer
 NUM_SUBSTEPS = 5
 NUM_MOTORS = 8
@@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
               on_rack=False,
               render=False,
               kd_for_pd_controllers=0.3,
-               env_randomizer=None):
+               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
    """Initialize the minitaur gym environment.
    Args:
--- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
@@ -0,0 +1,167 @@
 r"""An example to run of the minitaur gym environment with sine gaits.
 blaze run -c opt //robotics/reinforcement_learning/minitaur/\
 envs:minitaur_gym_env_example
 """
 import math
 import numpy as np
 from pybullet_envs.bullet import minitaur_gym_env
 import argparse
 from pybullet_envs.bullet import minitaur_env_randomizer
 def ResetPoseExample():
  """An example that the minitaur stands still using the reset pose."""
  steps = 1000
  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
  environment = minitaur_gym_env.MinitaurBulletEnv(
      render=True,
      leg_model_enabled=False,
      motor_velocity_limit=np.inf,
      pd_control_enabled=True,
      accurate_motor_model_enabled=True,
      motor_overheat_protection=True,
      env_randomizer =  randomizer,
      hard_reset=False)
  action = [math.pi / 2] * 8
  for _ in range(steps):
    _, _, done, _ = environment.step(action)
    if done:
      break
  environment.reset()
 def MotorOverheatExample():
  """An example of minitaur motor overheat protection is triggered.
  The minitaur is leaning forward and the motors are getting obove threshold
  torques. The overheat protection will be triggered in ~1 sec.
  """
  environment = minitaur_gym_env.MinitaurBulletEnv(
      render=True,
      leg_model_enabled=False,
      motor_velocity_limit=np.inf,
      motor_overheat_protection=True,
      accurate_motor_model_enabled=True,
      motor_kp=1.20,
      motor_kd=0.00,
      on_rack=False)
  action = [2.0] * 8
  for i in range(8):
    action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
  steps = 500
  actions_and_observations = []
  for step_counter in range(steps):
    # Matches the internal timestep.
    time_step = 0.01
    t = step_counter * time_step
    current_row = [t]
    current_row.extend(action)
    observation, _, _, _ = environment.step(action)
    current_row.extend(observation.tolist())
    actions_and_observations.append(current_row)
  environment.reset()
 def SineStandExample():
  """An example of minitaur standing and squatting on the floor.
  To validate the accurate motor model we command the robot and sit and stand up
  periodically in both simulation and experiment. We compare the measured motor
  trajectories, torques and gains.
  """
  environment = minitaur_gym_env.MinitaurBulletEnv(
      render=True,
      leg_model_enabled=False,
      motor_velocity_limit=np.inf,
      motor_overheat_protection=True,
      accurate_motor_model_enabled=True,
      motor_kp=1.20,
      motor_kd=0.02,
      on_rack=False)
  steps = 1000
  amplitude = 0.5
  speed = 3
  actions_and_observations = []
  for step_counter in range(steps):
    # Matches the internal timestep.
    time_step = 0.01
    t = step_counter * time_step
    current_row = [t]
    action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
    current_row.extend(action)
    observation, _, _, _ = environment.step(action)
    current_row.extend(observation.tolist())
    actions_and_observations.append(current_row)
  environment.reset()
 def SinePolicyExample():
  """An example of minitaur walking with a sine gait."""
  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
  environment = minitaur_gym_env.MinitaurBulletEnv(
      render=True,
      motor_velocity_limit=np.inf,
      pd_control_enabled=True,
      hard_reset=False,
      env_randomizer =  randomizer,
      on_rack=False)
  sum_reward = 0
  steps = 20000
  amplitude_1_bound = 0.5
  amplitude_2_bound = 0.5
  speed = 40
  for step_counter in range(steps):
    time_step = 0.01
    t = step_counter * time_step
    amplitude1 = amplitude_1_bound
    amplitude2 = amplitude_2_bound
    steering_amplitude = 0
    if t < 10:
      steering_amplitude = 0.5
    elif t < 20:
      steering_amplitude = -0.5
    else:
      steering_amplitude = 0
    # Applying asymmetrical sine gaits to different legs can steer the minitaur.
    a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
    a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
    a3 = math.sin(t * speed) * amplitude2
    a4 = math.sin(t * speed + math.pi) * amplitude2
    action = [a1, a2, a2, a1, a3, a4, a4, a3]
    _, reward, done, _ = environment.step(action)
    sum_reward += reward
    if done:
      break
  environment.reset()
 def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int,  default=0)
    args = parser.parse_args()
    print("--env=" + str(args.env))
    if (args.env == 0):
      SinePolicyExample()
    if (args.env == 1):
      SineStandExample()
    if (args.env == 2):
      ResetPoseExample()
    if (args.env == 3):
      MotorOverheatExample()
 if __name__ == '__main__':
    main()