Merge remote-tracking branch 'upstream/master'
This commit is contained in:
2
examples/pybullet/gym/pybullet_envs/agents/__init__.py
Normal file
2
examples/pybullet/gym/pybullet_envs/agents/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
|
||||
110
examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
Normal file
110
examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""The PPO training configuration file for minitaur environments."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import functools
|
||||
from agents import ppo
|
||||
from agents.scripts import networks
|
||||
from pybullet_envs.bullet import minitaur_gym_env
|
||||
from pybullet_envs.bullet import minitaur_env_randomizer
|
||||
import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
|
||||
import pybullet_envs
|
||||
|
||||
|
||||
# pylint: disable=unused-variable
|
||||
def default():
|
||||
"""The default configurations."""
|
||||
# General
|
||||
algorithm = ppo.PPOAlgorithm
|
||||
num_agents = 25
|
||||
eval_episodes = 25
|
||||
use_gpu = False
|
||||
# Network
|
||||
network = networks.ForwardGaussianPolicy
|
||||
weight_summaries = dict(
|
||||
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
|
||||
policy_layers = 200, 100
|
||||
value_layers = 200, 100
|
||||
init_mean_factor = 0.2
|
||||
init_logstd = -1
|
||||
network_config = dict()
|
||||
# Optimization
|
||||
update_every = 25
|
||||
policy_optimizer = 'AdamOptimizer'
|
||||
value_optimizer = 'AdamOptimizer'
|
||||
update_epochs_policy = 25
|
||||
update_epochs_value = 25
|
||||
value_lr = 1e-3
|
||||
policy_lr = 1e-4
|
||||
# Losses
|
||||
discount = 0.99
|
||||
kl_target = 1e-2
|
||||
kl_cutoff_factor = 2
|
||||
kl_cutoff_coef = 1000
|
||||
kl_init_penalty = 1
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_pendulum():
|
||||
locals().update(default())
|
||||
env = 'InvertedPendulumBulletEnv-v0'
|
||||
max_length = 200
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
def pybullet_doublependulum():
|
||||
locals().update(default())
|
||||
env = 'InvertedDoublePendulumBulletEnv-v0'
|
||||
max_length = 1000
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
def pybullet_pendulumswingup():
|
||||
locals().update(default())
|
||||
env = 'InvertedPendulumSwingupBulletEnv-v0'
|
||||
max_length = 1000
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
def pybullet_cheetah():
|
||||
"""Configuration for MuJoCo's half cheetah task."""
|
||||
locals().update(default())
|
||||
# Environment
|
||||
env = 'HalfCheetahBulletEnv-v0'
|
||||
max_length = 1000
|
||||
steps = 1e8 # 100M
|
||||
return locals()
|
||||
|
||||
def pybullet_ant():
|
||||
locals().update(default())
|
||||
env = 'AntBulletEnv-v0'
|
||||
max_length = 1000
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
def pybullet_racecar():
|
||||
"""Configuration for Bullet MIT Racecar task."""
|
||||
locals().update(default())
|
||||
# Environment
|
||||
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
|
||||
max_length = 10
|
||||
steps = 1e7 # 10M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_minitaur():
|
||||
"""Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
|
||||
locals().update(default())
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
env = functools.partial(
|
||||
minitaur_gym_env.MinitaurBulletEnv,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
pd_control_enabled=True,
|
||||
env_randomizer=randomizer,
|
||||
render=False)
|
||||
max_length = 1000
|
||||
steps = 3e7 # 30M
|
||||
return locals()
|
||||
|
||||
|
||||
48
examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
Normal file
48
examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
Normal file
@@ -0,0 +1,48 @@
|
||||
r"""Script to use Proximal Policy Gradient for the minitaur environments.
|
||||
|
||||
Run:
|
||||
python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import tensorflow as tf
|
||||
|
||||
from agents import tools
|
||||
from agents.scripts import train
|
||||
from agents.scripts import utility
|
||||
from . import config_ppo
|
||||
|
||||
|
||||
flags = tf.app.flags
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
flags.DEFINE_string(
|
||||
'logdir', None,
|
||||
'Base directory to store logs.')
|
||||
flags.DEFINE_string(
|
||||
'config', None,
|
||||
'Configuration to execute.')
|
||||
flags.DEFINE_string(
|
||||
'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
|
||||
'Sub directory to store logs.')
|
||||
|
||||
|
||||
def main(_):
|
||||
"""Create or load configuration and launch the trainer."""
|
||||
config = tools.AttrDict(getattr(config_ppo, FLAGS.config)())
|
||||
logdir = FLAGS.logdir and os.path.join(
|
||||
FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))
|
||||
utility.save_config(config, logdir)
|
||||
for score in train.train(config, env_processes=True):
|
||||
tf.logging.info(str(score))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
|
||||
42
examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
Normal file
42
examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
Normal file
@@ -0,0 +1,42 @@
|
||||
|
||||
r"""Script to visualize the trained PPO agent.
|
||||
|
||||
python -m pybullet_envs.agents.visualize \
|
||||
--logdir=ppo
|
||||
--outdir=/tmp/video/
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from agents.scripts import visualize
|
||||
|
||||
|
||||
flags = tf.app.flags
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
flags.DEFINE_string("logdir", None,
|
||||
"Directory to the checkpoint of a training run.")
|
||||
flags.DEFINE_string("outdir", None,
|
||||
"Local directory for storing the monitoring outdir.")
|
||||
flags.DEFINE_string("checkpoint", None,
|
||||
"Checkpoint name to load; defaults to most recent.")
|
||||
flags.DEFINE_integer("num_agents", 1,
|
||||
"How many environments to step in parallel.")
|
||||
flags.DEFINE_integer("num_episodes", 1, "Minimum number of episodes to render.")
|
||||
flags.DEFINE_boolean(
|
||||
"env_processes", False,
|
||||
"Step environments in separate processes to circumvent the GIL.")
|
||||
|
||||
|
||||
def main(_):
|
||||
visualize.visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents,
|
||||
FLAGS.num_episodes, FLAGS.checkpoint, FLAGS.env_processes)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.app.run()
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Abstract base class for environment randomizer."""
|
||||
|
||||
import abc
|
||||
|
||||
|
||||
class EnvRandomizerBase(object):
|
||||
"""Abstract base class for environment randomizer.
|
||||
|
||||
An EnvRandomizer is called in environment.reset(). It will
|
||||
randomize physical parameters of the objects in the simulation.
|
||||
The physical parameters will be fixed for that episode and be
|
||||
randomized again in the next environment.reset().
|
||||
"""
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def randomize_env(self, env):
|
||||
"""Randomize the simulated_objects in the environment.
|
||||
|
||||
Args:
|
||||
env: The environment to be randomized.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Randomize the minitaur_gym_env when reset() is called."""
|
||||
import random
|
||||
import numpy as np
|
||||
from . import env_randomizer_base
|
||||
|
||||
# Relative range.
|
||||
MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||
MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||
# Absolute range.
|
||||
BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt
|
||||
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel)
|
||||
MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless
|
||||
|
||||
|
||||
class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
|
||||
"""A randomizer that change the minitaur_gym_env during every reset."""
|
||||
|
||||
def __init__(self,
|
||||
minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
|
||||
minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
|
||||
battery_voltage_range=BATTERY_VOLTAGE_RANGE,
|
||||
motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
|
||||
self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
|
||||
self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
|
||||
self._battery_voltage_range = battery_voltage_range
|
||||
self._motor_viscous_damping_range = motor_viscous_damping_range
|
||||
|
||||
def randomize_env(self, env):
|
||||
self._randomize_minitaur(env.minitaur)
|
||||
|
||||
def _randomize_minitaur(self, minitaur):
|
||||
"""Randomize various physical properties of minitaur.
|
||||
|
||||
It randomizes the mass/inertia of the base, mass/inertia of the legs,
|
||||
friction coefficient of the feet, the battery voltage and the motor damping
|
||||
at each reset() of the environment.
|
||||
|
||||
Args:
|
||||
minitaur: the Minitaur instance in minitaur_gym_env environment.
|
||||
"""
|
||||
base_mass = minitaur.GetBaseMassFromURDF()
|
||||
randomized_base_mass = random.uniform(
|
||||
base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
|
||||
base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
|
||||
minitaur.SetBaseMass(randomized_base_mass)
|
||||
|
||||
leg_masses = minitaur.GetLegMassesFromURDF()
|
||||
leg_masses_lower_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[0])
|
||||
leg_masses_upper_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[1])
|
||||
randomized_leg_masses = [
|
||||
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
|
||||
for i in range(len(leg_masses))
|
||||
]
|
||||
minitaur.SetLegMasses(randomized_leg_masses)
|
||||
|
||||
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
|
||||
BATTERY_VOLTAGE_RANGE[1])
|
||||
minitaur.SetBatteryVoltage(randomized_battery_voltage)
|
||||
|
||||
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
|
||||
MOTOR_VISCOUS_DAMPING_RANGE[1])
|
||||
minitaur.SetMotorViscousDamping(randomized_motor_damping)
|
||||
|
||||
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
|
||||
MINITAUR_LEG_FRICTION[1])
|
||||
minitaur.SetFootFriction(randomized_foot_friction)
|
||||
@@ -19,6 +19,7 @@ from . import bullet_client
|
||||
from . import minitaur
|
||||
import os
|
||||
import pybullet_data
|
||||
from . import minitaur_env_randomizer
|
||||
|
||||
NUM_SUBSTEPS = 5
|
||||
NUM_MOTORS = 8
|
||||
@@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=None):
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
"""Initialize the minitaur gym environment.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
r"""An example to run of the minitaur gym environment with sine gaits.
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from pybullet_envs.bullet import minitaur_gym_env
|
||||
import argparse
|
||||
from pybullet_envs.bullet import minitaur_env_randomizer
|
||||
|
||||
def ResetPoseExample():
|
||||
"""An example that the minitaur stands still using the reset pose."""
|
||||
steps = 1000
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
env_randomizer = randomizer,
|
||||
hard_reset=False)
|
||||
action = [math.pi / 2] * 8
|
||||
for _ in range(steps):
|
||||
_, _, done, _ = environment.step(action)
|
||||
if done:
|
||||
break
|
||||
environment.reset()
|
||||
|
||||
def MotorOverheatExample():
|
||||
"""An example of minitaur motor overheat protection is triggered.
|
||||
|
||||
The minitaur is leaning forward and the motors are getting obove threshold
|
||||
torques. The overheat protection will be triggered in ~1 sec.
|
||||
"""
|
||||
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
motor_overheat_protection=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.20,
|
||||
motor_kd=0.00,
|
||||
on_rack=False)
|
||||
|
||||
action = [2.0] * 8
|
||||
for i in range(8):
|
||||
action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
|
||||
|
||||
steps = 500
|
||||
actions_and_observations = []
|
||||
for step_counter in range(steps):
|
||||
# Matches the internal timestep.
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
current_row = [t]
|
||||
current_row.extend(action)
|
||||
|
||||
observation, _, _, _ = environment.step(action)
|
||||
current_row.extend(observation.tolist())
|
||||
actions_and_observations.append(current_row)
|
||||
environment.reset()
|
||||
|
||||
def SineStandExample():
|
||||
"""An example of minitaur standing and squatting on the floor.
|
||||
|
||||
To validate the accurate motor model we command the robot and sit and stand up
|
||||
periodically in both simulation and experiment. We compare the measured motor
|
||||
trajectories, torques and gains.
|
||||
"""
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
motor_overheat_protection=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.20,
|
||||
motor_kd=0.02,
|
||||
on_rack=False)
|
||||
steps = 1000
|
||||
amplitude = 0.5
|
||||
speed = 3
|
||||
|
||||
actions_and_observations = []
|
||||
|
||||
for step_counter in range(steps):
|
||||
# Matches the internal timestep.
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
current_row = [t]
|
||||
|
||||
action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
|
||||
current_row.extend(action)
|
||||
|
||||
observation, _, _, _ = environment.step(action)
|
||||
current_row.extend(observation.tolist())
|
||||
actions_and_observations.append(current_row)
|
||||
|
||||
environment.reset()
|
||||
|
||||
|
||||
def SinePolicyExample():
|
||||
"""An example of minitaur walking with a sine gait."""
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=True,
|
||||
hard_reset=False,
|
||||
env_randomizer = randomizer,
|
||||
on_rack=False)
|
||||
sum_reward = 0
|
||||
steps = 20000
|
||||
amplitude_1_bound = 0.5
|
||||
amplitude_2_bound = 0.5
|
||||
speed = 40
|
||||
|
||||
for step_counter in range(steps):
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
|
||||
amplitude1 = amplitude_1_bound
|
||||
amplitude2 = amplitude_2_bound
|
||||
steering_amplitude = 0
|
||||
if t < 10:
|
||||
steering_amplitude = 0.5
|
||||
elif t < 20:
|
||||
steering_amplitude = -0.5
|
||||
else:
|
||||
steering_amplitude = 0
|
||||
|
||||
# Applying asymmetrical sine gaits to different legs can steer the minitaur.
|
||||
a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
|
||||
a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
|
||||
a3 = math.sin(t * speed) * amplitude2
|
||||
a4 = math.sin(t * speed + math.pi) * amplitude2
|
||||
action = [a1, a2, a2, a1, a3, a4, a4, a3]
|
||||
_, reward, done, _ = environment.step(action)
|
||||
sum_reward += reward
|
||||
if done:
|
||||
break
|
||||
environment.reset()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
print("--env=" + str(args.env))
|
||||
|
||||
if (args.env == 0):
|
||||
SinePolicyExample()
|
||||
if (args.env == 1):
|
||||
SineStandExample()
|
||||
if (args.env == 2):
|
||||
ResetPoseExample()
|
||||
if (args.env == 3):
|
||||
MotorOverheatExample()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user