make sure that the pre-trained galloping and trotting policies work for the minitaur_reactive_env and minitaur_trotting_env environments.
This commit is contained in:
11
examples/pybullet/gym/pybullet_envs/minitaur/envs/README
Normal file
11
examples/pybullet/gym/pybullet_envs/minitaur/envs/README
Normal file
@@ -0,0 +1,11 @@
|
||||
This folder contains a number of simulated Minitaur environments implemented using pybullet.
|
||||
The following two environments are used in the RSS paper "Sim-to-Real: Learning Agile Locomotion For Quadruped Robots":
|
||||
1) Galloping example: minitaur_reactive_env.py
|
||||
python minitaur_reactive_env_example.py runs a pre-trained PPO agent that performs galloping gait.
|
||||
|
||||
2) Trotting example: minitaur_trotting_env.py
|
||||
python minitaur_trotting_env_example.py runs a pre-trained PPO agent that performs trotting gait.
|
||||
|
||||
The rest are experimental environments.
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
|
||||
self._env_step_counter += 1
|
||||
done = self._termination()
|
||||
obs = self._get_true_observation()
|
||||
reward = self._reward(action, obs)
|
||||
reward = self._reward()
|
||||
if self._log_path is not None:
|
||||
minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur,
|
||||
action, self._env_step_counter)
|
||||
@@ -272,7 +272,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
|
||||
np.asarray([0, 0, 1]), np.asarray(local_up))
|
||||
return local_global_up_dot_product < 0.85 or height < 0.15
|
||||
|
||||
def _reward(self, action, obs):
|
||||
def _reward(self):
|
||||
roll, pitch, _ = self.minitaur.GetBaseRollPitchYaw()
|
||||
return 1.0 / (0.001 + math.fabs(roll) + math.fabs(pitch))
|
||||
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
r"""An example to use simple_ppo_agent.
|
||||
|
||||
blaze run -c opt \
|
||||
//robotics/reinforcement_learning/minitaur/envs:minitaur_reactive_env_example
|
||||
"""
|
||||
r"""Running a pre-trained ppo agent on minitaur_reactive_env."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
@@ -11,13 +7,13 @@ from __future__ import print_function
|
||||
import os
|
||||
import time
|
||||
import tensorflow as tf
|
||||
from agents.scripts import utility
|
||||
from pybullet_envs.minitaur.agents.scripts import utility
|
||||
import pybullet_data
|
||||
import simple_ppo_agent
|
||||
|
||||
flags = tf.app.flags
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
LOG_DIR = (
|
||||
"testdata/minitaur_reactive_env_test")
|
||||
LOG_DIR = os.path.join(pybullet_data.getDataPath(), "policies/ppo/minitaur_reactive_env")
|
||||
CHECKPOINT = "model.ckpt-14000000"
|
||||
|
||||
|
||||
@@ -43,7 +39,6 @@ def main(argv):
|
||||
while True:
|
||||
action = agent.get_action([observation])
|
||||
observation, reward, done, _ = env.step(action[0])
|
||||
# This sleep is to prevent serial communication error on the real robot.
|
||||
time.sleep(0.002)
|
||||
sum_reward += reward
|
||||
if done:
|
||||
|
||||
@@ -1,41 +1,51 @@
|
||||
"""An example to run the minitaur environment of trotting gait.
|
||||
r"""Running a pre-trained ppo agent on minitaur_trotting_env."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
"""
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
import time
|
||||
import tensorflow as tf
|
||||
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
||||
from pybullet_envs.minitaur.envs import minitaur_trotting_env
|
||||
from pybullet_envs.minitaur.agents.scripts import utility
|
||||
import pybullet_data
|
||||
import simple_ppo_agent
|
||||
|
||||
#FLAGS = tf.flags.FLAGS
|
||||
#tf.flags.DEFINE_string("log_path", None, "The directory to write the log file.")
|
||||
flags = tf.app.flags
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
LOG_DIR = os.path.join(pybullet_data.getDataPath(), "policies/ppo/minitaur_trotting_env")
|
||||
CHECKPOINT = "model.ckpt-14000000"
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
environment = minitaur_trotting_env.MinitaurTrottingEnv(
|
||||
urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION,
|
||||
use_signal_in_observation=False,
|
||||
use_angle_in_observation=False,
|
||||
render=True,
|
||||
log_path=os.getcwd())
|
||||
def main(argv):
|
||||
del argv # Unused.
|
||||
config = utility.load_config(LOG_DIR)
|
||||
policy_layers = config.policy_layers
|
||||
value_layers = config.value_layers
|
||||
env = config.env(render=True)
|
||||
network = config.network
|
||||
|
||||
np.random.seed(100)
|
||||
sum_reward = 0
|
||||
environment.reset()
|
||||
with tf.Session() as sess:
|
||||
agent = simple_ppo_agent.SimplePPOPolicy(
|
||||
sess,
|
||||
env,
|
||||
network,
|
||||
policy_layers=policy_layers,
|
||||
value_layers=value_layers,
|
||||
checkpoint=os.path.join(LOG_DIR, CHECKPOINT))
|
||||
|
||||
steps = 5000
|
||||
for _ in range(steps):
|
||||
# Sleep to prevent serial buffer overflow on microcontroller.
|
||||
time.sleep(0.002)
|
||||
action = [0] * 8
|
||||
_, reward, done, _ = environment.step(action)
|
||||
sum_reward += reward
|
||||
if done:
|
||||
break
|
||||
tf.logging.info("reward: {}".format(sum_reward))
|
||||
sum_reward = 0
|
||||
observation = env.reset()
|
||||
while True:
|
||||
action = agent.get_action([observation])
|
||||
observation, reward, done, _ = env.step(action[0])
|
||||
time.sleep(0.002)
|
||||
sum_reward += reward
|
||||
if done:
|
||||
break
|
||||
tf.logging.info("reward: %s", sum_reward)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run()
|
||||
tf.app.run(main)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user