add TensorFlow Agents PPO training script for various pybullet environments:
example: python -m pybullet_envs.agents.train_ppo --config=pybullet_pendulum --logdir=pendulum
This commit is contained in:
110
examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
Normal file
110
examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
"""The PPO training configuration file for minitaur environments."""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
import functools
|
||||||
|
from agents import ppo
|
||||||
|
from agents.scripts import networks
|
||||||
|
from pybullet_envs.bullet import minitaur_gym_env
|
||||||
|
from pybullet_envs.bullet import minitaur_env_randomizer
|
||||||
|
import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
|
||||||
|
import pybullet_envs
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=unused-variable
|
||||||
|
def default():
|
||||||
|
"""The default configurations."""
|
||||||
|
# General
|
||||||
|
algorithm = ppo.PPOAlgorithm
|
||||||
|
num_agents = 25
|
||||||
|
eval_episodes = 25
|
||||||
|
use_gpu = False
|
||||||
|
# Network
|
||||||
|
network = networks.ForwardGaussianPolicy
|
||||||
|
weight_summaries = dict(
|
||||||
|
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
|
||||||
|
policy_layers = 200, 100
|
||||||
|
value_layers = 200, 100
|
||||||
|
init_mean_factor = 0.2
|
||||||
|
init_logstd = -1
|
||||||
|
network_config = dict()
|
||||||
|
# Optimization
|
||||||
|
update_every = 25
|
||||||
|
policy_optimizer = 'AdamOptimizer'
|
||||||
|
value_optimizer = 'AdamOptimizer'
|
||||||
|
update_epochs_policy = 25
|
||||||
|
update_epochs_value = 25
|
||||||
|
value_lr = 1e-3
|
||||||
|
policy_lr = 1e-4
|
||||||
|
# Losses
|
||||||
|
discount = 0.99
|
||||||
|
kl_target = 1e-2
|
||||||
|
kl_cutoff_factor = 2
|
||||||
|
kl_cutoff_coef = 1000
|
||||||
|
kl_init_penalty = 1
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
|
def pybullet_pendulum():
|
||||||
|
locals().update(default())
|
||||||
|
env = 'InvertedPendulumBulletEnv-v0'
|
||||||
|
max_length = 200
|
||||||
|
steps = 5e7 # 50M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
def pybullet_doublependulum():
|
||||||
|
locals().update(default())
|
||||||
|
env = 'InvertedDoublePendulumBulletEnv-v0'
|
||||||
|
max_length = 1000
|
||||||
|
steps = 5e7 # 50M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
def pybullet_pendulumswingup():
|
||||||
|
locals().update(default())
|
||||||
|
env = 'InvertedPendulumSwingupBulletEnv-v0'
|
||||||
|
max_length = 1000
|
||||||
|
steps = 5e7 # 50M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
def pybullet_cheetah():
|
||||||
|
"""Configuration for MuJoCo's half cheetah task."""
|
||||||
|
locals().update(default())
|
||||||
|
# Environment
|
||||||
|
env = 'HalfCheetahBulletEnv-v0'
|
||||||
|
max_length = 1000
|
||||||
|
steps = 1e8 # 100M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
def pybullet_ant():
|
||||||
|
locals().update(default())
|
||||||
|
env = 'AntBulletEnv-v0'
|
||||||
|
max_length = 1000
|
||||||
|
steps = 5e7 # 50M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
def pybullet_racecar():
|
||||||
|
"""Configuration for Bullet MIT Racecar task."""
|
||||||
|
locals().update(default())
|
||||||
|
# Environment
|
||||||
|
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
|
||||||
|
max_length = 10
|
||||||
|
steps = 1e7 # 10M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
|
def pybullet_minitaur():
|
||||||
|
"""Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
|
||||||
|
locals().update(default())
|
||||||
|
randomizer = (minitaur_env_randomizer.MinitaurBulletRandomizer())
|
||||||
|
env = functools.partial(
|
||||||
|
minitaur_gym_env.MinitaurGymEnv,
|
||||||
|
accurate_motor_model_enabled=True,
|
||||||
|
motor_overheat_protection=True,
|
||||||
|
pd_control_enabled=True,
|
||||||
|
env_randomizer=randomizer,
|
||||||
|
render=False)
|
||||||
|
max_length = 1000
|
||||||
|
steps = 3e7 # 30M
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
48
examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
Normal file
48
examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
r"""Script to use Proximal Policy Gradient for the minitaur environments.
|
||||||
|
|
||||||
|
Run:
|
||||||
|
python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from agents import tools
|
||||||
|
from agents.scripts import train
|
||||||
|
from agents.scripts import utility
|
||||||
|
from . import config_ppo
|
||||||
|
|
||||||
|
|
||||||
|
flags = tf.app.flags
|
||||||
|
FLAGS = tf.app.flags.FLAGS
|
||||||
|
|
||||||
|
flags.DEFINE_string(
|
||||||
|
'logdir', None,
|
||||||
|
'Base directory to store logs.')
|
||||||
|
flags.DEFINE_string(
|
||||||
|
'config', None,
|
||||||
|
'Configuration to execute.')
|
||||||
|
flags.DEFINE_string(
|
||||||
|
'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
|
||||||
|
'Sub directory to store logs.')
|
||||||
|
|
||||||
|
|
||||||
|
def main(_):
|
||||||
|
"""Create or load configuration and launch the trainer."""
|
||||||
|
config = tools.AttrDict(getattr(config_ppo, FLAGS.config)())
|
||||||
|
logdir = FLAGS.logdir and os.path.join(
|
||||||
|
FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))
|
||||||
|
utility.save_config(config, logdir)
|
||||||
|
for score in train.train(config, env_processes=True):
|
||||||
|
tf.logging.info(str(score))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tf.app.run()
|
||||||
|
|
||||||
42
examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
Normal file
42
examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
r"""Script to visualize the trained PPO agent.
|
||||||
|
|
||||||
|
python -m pybullet_envs.agents.visualize \
|
||||||
|
--logdir=ppo
|
||||||
|
--outdir=/tmp/video/
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from agents.scripts import visualize
|
||||||
|
|
||||||
|
|
||||||
|
flags = tf.app.flags
|
||||||
|
FLAGS = tf.app.flags.FLAGS
|
||||||
|
flags.DEFINE_string("logdir", None,
|
||||||
|
"Directory to the checkpoint of a training run.")
|
||||||
|
flags.DEFINE_string("outdir", None,
|
||||||
|
"Local directory for storing the monitoring outdir.")
|
||||||
|
flags.DEFINE_string("checkpoint", None,
|
||||||
|
"Checkpoint name to load; defaults to most recent.")
|
||||||
|
flags.DEFINE_integer("num_agents", 1,
|
||||||
|
"How many environments to step in parallel.")
|
||||||
|
flags.DEFINE_integer("num_episodes", 1, "Minimum number of episodes to render.")
|
||||||
|
flags.DEFINE_boolean(
|
||||||
|
"env_processes", False,
|
||||||
|
"Step environments in separate processes to circumvent the GIL.")
|
||||||
|
|
||||||
|
|
||||||
|
def main(_):
|
||||||
|
visualize.visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents,
|
||||||
|
FLAGS.num_episodes, FLAGS.checkpoint, FLAGS.env_processes)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
tf.app.run()
|
||||||
|
|
||||||
Reference in New Issue
Block a user