add minitaur environment randomizer, enable it by default

this improves PPO training ( more stable)
2017-09-27 09:07:21 -07:00
parent 12f28c5f76
commit c37919604f
4 changed files with 262 additions and 1 deletions
--- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
@@ -0,0 +1,167 @@
+r"""An example to run of the minitaur gym environment with sine gaits.
+
+blaze run -c opt //robotics/reinforcement_learning/minitaur/\
+envs:minitaur_gym_env_example
+"""
+
+import math
+import numpy as np
+from pybullet_envs.bullet import minitaur_gym_env
+import argparse
+from pybullet_envs.bullet import minitaur_env_randomizer
+
+def ResetPoseExample():
+  """An example that the minitaur stands still using the reset pose."""
+  steps = 1000
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=True,
+      accurate_motor_model_enabled=True,
+      motor_overheat_protection=True,
+      env_randomizer =  randomizer,
+      hard_reset=False)
+  action = [math.pi / 2] * 8
+  for _ in range(steps):
+    _, _, done, _ = environment.step(action)
+    if done:
+      break
+  environment.reset()
+
+def MotorOverheatExample():
+  """An example of minitaur motor overheat protection is triggered.
+
+  The minitaur is leaning forward and the motors are getting obove threshold
+  torques. The overheat protection will be triggered in ~1 sec.
+  """
+
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      motor_overheat_protection=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.20,
+      motor_kd=0.00,
+      on_rack=False)
+
+  action = [2.0] * 8
+  for i in range(8):
+    action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
+
+  steps = 500
+  actions_and_observations = []
+  for step_counter in range(steps):
+    # Matches the internal timestep.
+    time_step = 0.01
+    t = step_counter * time_step
+    current_row = [t]
+    current_row.extend(action)
+
+    observation, _, _, _ = environment.step(action)
+    current_row.extend(observation.tolist())
+    actions_and_observations.append(current_row)
+  environment.reset()
+
+def SineStandExample():
+  """An example of minitaur standing and squatting on the floor.
+
+  To validate the accurate motor model we command the robot and sit and stand up
+  periodically in both simulation and experiment. We compare the measured motor
+  trajectories, torques and gains.
+  """
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      leg_model_enabled=False,
+      motor_velocity_limit=np.inf,
+      motor_overheat_protection=True,
+      accurate_motor_model_enabled=True,
+      motor_kp=1.20,
+      motor_kd=0.02,
+      on_rack=False)
+  steps = 1000
+  amplitude = 0.5
+  speed = 3
+
+  actions_and_observations = []
+
+  for step_counter in range(steps):
+    # Matches the internal timestep.
+    time_step = 0.01
+    t = step_counter * time_step
+    current_row = [t]
+
+    action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
+    current_row.extend(action)
+
+    observation, _, _, _ = environment.step(action)
+    current_row.extend(observation.tolist())
+    actions_and_observations.append(current_row)
+  
+  environment.reset()
+
+
+def SinePolicyExample():
+  """An example of minitaur walking with a sine gait."""
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  environment = minitaur_gym_env.MinitaurBulletEnv(
+      render=True,
+      motor_velocity_limit=np.inf,
+      pd_control_enabled=True,
+      hard_reset=False,
+      env_randomizer =  randomizer,
+      on_rack=False)
+  sum_reward = 0
+  steps = 20000
+  amplitude_1_bound = 0.5
+  amplitude_2_bound = 0.5
+  speed = 40
+
+  for step_counter in range(steps):
+    time_step = 0.01
+    t = step_counter * time_step
+
+    amplitude1 = amplitude_1_bound
+    amplitude2 = amplitude_2_bound
+    steering_amplitude = 0
+    if t < 10:
+      steering_amplitude = 0.5
+    elif t < 20:
+      steering_amplitude = -0.5
+    else:
+      steering_amplitude = 0
+
+    # Applying asymmetrical sine gaits to different legs can steer the minitaur.
+    a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
+    a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
+    a3 = math.sin(t * speed) * amplitude2
+    a4 = math.sin(t * speed + math.pi) * amplitude2
+    action = [a1, a2, a2, a1, a3, a4, a4, a3]
+    _, reward, done, _ = environment.step(action)
+    sum_reward += reward
+    if done:
+      break
+  environment.reset()
+
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int,  default=0)
+    args = parser.parse_args()
+    print("--env=" + str(args.env))
+      
+    if (args.env == 0):
+      SinePolicyExample()
+    if (args.env == 1):
+      SineStandExample()
+    if (args.env == 2):
+      ResetPoseExample()
+    if (args.env == 3):
+      MotorOverheatExample()
+
+if __name__ == '__main__':
+    main()
+
+