add a temp copy of TF agents (until the API stops changing or configs.py are included)

This commit is contained in:
Erwin Coumans
2017-11-16 16:47:14 +00:00
parent 7f654bdd87
commit 7b030426c1
24 changed files with 3294 additions and 27 deletions

View File

@@ -20,21 +20,21 @@ from __future__ import print_function
import functools
from agents import ppo
from agents.scripts import networks
from . import ppo
from . import networks
from pybullet_envs.bullet import minitaur_gym_env
from pybullet_envs.bullet import minitaur_duck_gym_env
from pybullet_envs.bullet import minitaur_env_randomizer
import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
import pybullet_envs
import tensorflow as tf
def default():
"""Default configuration for PPO."""
# General
algorithm = ppo.PPOAlgorithm
num_agents = 10
eval_episodes = 20
num_agents = 30
eval_episodes = 30
use_gpu = False
# Network
network = networks.feed_forward_gaussian
@@ -44,18 +44,17 @@ def default():
value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.05
init_mean_factor = 0.1
init_logstd = -1
# Optimization
update_every = 20
policy_optimizer = 'AdamOptimizer'
value_optimizer = 'AdamOptimizer'
update_epochs_policy = 50
update_epochs_value = 50
policy_lr = 1e-4
value_lr = 3e-4
update_every = 30
update_epochs = 25
optimizer = tf.train.AdamOptimizer
update_epochs_policy = 64
update_epochs_value = 64
learning_rate = 1e-4
# Losses
discount = 0.985
discount = 0.995
kl_target = 1e-2
kl_cutoff_factor = 2
kl_cutoff_coef = 1000