From f483ccd8ef841e6b074b6d2822293a867e8f2e35 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Thu, 16 Nov 2017 16:49:25 +0000 Subject: [PATCH] remote org configs --- .../gym/pybullet_envs/agents/configs2.py | 130 ------------------ 1 file changed, 130 deletions(-) delete mode 100644 examples/pybullet/gym/pybullet_envs/agents/configs2.py diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs2.py b/examples/pybullet/gym/pybullet_envs/agents/configs2.py deleted file mode 100644 index 27bd70d1f..000000000 --- a/examples/pybullet/gym/pybullet_envs/agents/configs2.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2017 The TensorFlow Agents Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Example configurations using the PPO algorithm.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-variable - -import tensorflow as tf - -from . import ppo -from . import networks - - -def default(): - """Default configuration for PPO.""" - # General - algorithm = ppo.PPOAlgorithm - num_agents = 30 - eval_episodes = 30 - use_gpu = False - # Network - network = networks.feed_forward_gaussian - weight_summaries = dict( - all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*') - policy_layers = 200, 100 - value_layers = 200, 100 - init_mean_factor = 0.1 - init_logstd = -1 - # Optimization - update_every = 30 - update_epochs = 25 - optimizer = tf.train.AdamOptimizer - learning_rate = 1e-4 - # Losses - discount = 0.995 - kl_target = 1e-2 - kl_cutoff_factor = 2 - kl_cutoff_coef = 1000 - kl_init_penalty = 1 - return locals() - - -def pendulum(): - """Configuration for the pendulum classic control task.""" - locals().update(default()) - # Environment - env = 'Pendulum-v0' - max_length = 200 - steps = 2e6 # 2M - return locals() - - -def reacher(): - """Configuration for MuJoCo's reacher task.""" - locals().update(default()) - # Environment - env = 'Reacher-v1' - max_length = 1000 - steps = 5e6 # 5M - discount = 0.985 - update_every = 60 - return locals() - - -def cheetah(): - """Configuration for MuJoCo's half cheetah task.""" - locals().update(default()) - # Environment - env = 'HalfCheetah-v1' - max_length = 1000 - steps = 1e7 # 10M - discount = 0.99 - return locals() - - -def walker(): - """Configuration for MuJoCo's walker task.""" - locals().update(default()) - # Environment - env = 'Walker2d-v1' - max_length = 1000 - steps = 1e7 # 10M - return locals() - - -def hopper(): - """Configuration for MuJoCo's hopper task.""" - locals().update(default()) - # Environment - env = 'Hopper-v1' - max_length = 1000 - steps = 1e7 # 10M - update_every = 60 - return locals() - - -def ant(): - """Configuration for MuJoCo's ant task.""" - locals().update(default()) - # Environment - env = 'Ant-v1' - max_length = 1000 - steps = 2e7 # 20M - return locals() - - -def humanoid(): - """Configuration for MuJoCo's humanoid task.""" - locals().update(default()) - # Environment - env = 'Humanoid-v1' - max_length = 1000 - steps = 5e7 # 50M - update_every = 60 - return locals()