make sure that the pre-trained galloping and trotting policies work for the minitaur_reactive_env and minitaur_trotting_env environments.

2018-04-24 21:48:27 -07:00
parent 982453abc6
commit a375a349ce
43 changed files with 4447 additions and 42 deletions
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/README
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/README
@@ -0,0 +1,11 @@
+This folder contains a number of simulated Minitaur environments implemented using pybullet.
+The following two environments are used in the RSS paper "Sim-to-Real: Learning Agile Locomotion For Quadruped Robots":
+1) Galloping example: minitaur_reactive_env.py
+python minitaur_reactive_env_example.py runs a pre-trained PPO agent that performs galloping gait.
+
+2) Trotting example: minitaur_trotting_env.py
+python minitaur_trotting_env_example.py runs a pre-trained PPO agent that performs trotting gait.
+
+The rest are experimental environments.
+
+
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py
@@ -216,7 +216,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
    self._env_step_counter += 1
    done = self._termination()
    obs = self._get_true_observation()
-    reward = self._reward(action, obs)
+    reward = self._reward()
    if self._log_path is not None:
      minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur,
                                            action, self._env_step_counter)
@@ -272,7 +272,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
        np.asarray([0, 0, 1]), np.asarray(local_up))
    return local_global_up_dot_product < 0.85 or height < 0.15

-  def _reward(self, action, obs):
+  def _reward(self):
    roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw()
    return 1.0 / (0.001 + math.fabs(roll) + math.fabs(pitch))

--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_reactive_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_reactive_env_example.py
@@ -1,8 +1,4 @@
-r"""An example to use simple_ppo_agent.
-
-blaze run -c opt \
-//robotics/reinforcement_learning/minitaur/envs:minitaur_reactive_env_example
-"""
+r"""Running a pre-trained ppo agent on minitaur_reactive_env."""

 from __future__ import absolute_import
 from __future__ import division
@@ -11,13 +7,13 @@ from __future__ import print_function
 import os
 import time
 import tensorflow as tf
-from agents.scripts import utility
+from pybullet_envs.minitaur.agents.scripts import utility
+import pybullet_data
 import simple_ppo_agent

 flags = tf.app.flags
 FLAGS = tf.app.flags.FLAGS
-LOG_DIR = (
-    "testdata/minitaur_reactive_env_test")
+LOG_DIR = os.path.join(pybullet_data.getDataPath(), "policies/ppo/minitaur_reactive_env")
 CHECKPOINT = "model.ckpt-14000000"


@@ -43,7 +39,6 @@ def main(argv):
    while True:
      action = agent.get_action([observation])
      observation, reward, done, _ = env.step(action[0])
-      # This sleep is to prevent serial communication error on the real robot.
      time.sleep(0.002)
      sum_reward += reward
      if done:
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py
@@ -1,41 +1,51 @@
-"""An example to run the minitaur environment of trotting gait.
+r"""Running a pre-trained ppo agent on minitaur_trotting_env."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function

-"""
-import time
 import os
-import numpy as np
+import time
 import tensorflow as tf
-from pybullet_envs.minitaur.envs import minitaur_gym_env
-from pybullet_envs.minitaur.envs import minitaur_trotting_env
+from pybullet_envs.minitaur.agents.scripts import utility
+import pybullet_data
+import simple_ppo_agent

-#FLAGS = tf.flags.FLAGS
-#tf.flags.DEFINE_string("log_path", None, "The directory to write the log file.")
+flags = tf.app.flags
+FLAGS = tf.app.flags.FLAGS
+LOG_DIR = os.path.join(pybullet_data.getDataPath(), "policies/ppo/minitaur_trotting_env")
+CHECKPOINT = "model.ckpt-14000000"


-def main(unused_argv):
-  environment = minitaur_trotting_env.MinitaurTrottingEnv(
-      urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION,
-      use_signal_in_observation=False,
-      use_angle_in_observation=False,
-      render=True,
-      log_path=os.getcwd())
+def main(argv):
+  del argv  # Unused.
+  config = utility.load_config(LOG_DIR)
+  policy_layers = config.policy_layers
+  value_layers = config.value_layers
+  env = config.env(render=True)
+  network = config.network

-  np.random.seed(100)
-  sum_reward = 0
-  environment.reset()
+  with tf.Session() as sess:
+    agent = simple_ppo_agent.SimplePPOPolicy(
+        sess,
+        env,
+        network,
+        policy_layers=policy_layers,
+        value_layers=value_layers,
+        checkpoint=os.path.join(LOG_DIR, CHECKPOINT))

-  steps = 5000
-  for _ in range(steps):
-    # Sleep to prevent serial buffer overflow on microcontroller.
-    time.sleep(0.002)
-    action = [0] * 8
-    _, reward, done, _ = environment.step(action)
-    sum_reward += reward
-    if done:
-      break
-  tf.logging.info("reward: {}".format(sum_reward))
+    sum_reward = 0
+    observation = env.reset()
+    while True:
+      action = agent.get_action([observation])
+      observation, reward, done, _ = env.step(action[0])
+      time.sleep(0.002)
+      sum_reward += reward
+      if done:
+        break
+    tf.logging.info("reward: %s", sum_reward)


 if __name__ == "__main__":
-  tf.logging.set_verbosity(tf.logging.INFO)
-  tf.app.run()
+  tf.app.run(main)
+