add yapf style and apply yapf to format all Python files

This recreates pull request #2192
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/init.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Executable scripts for reinforcement learning."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/configs.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Example configurations using the PPO algorithm."""

 from __future__ import absolute_import
@@ -33,10 +32,7 @@ def default():
  use_gpu = False
  # Network
  network = networks.ForwardGaussianPolicy
-  weight_summaries = dict(
-      all=r'.*',
-      policy=r'.*/policy/.*',
-      value=r'.*/value/.*')
+  weight_summaries = dict(all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
  policy_layers = 200, 100
  value_layers = 200, 100
  init_mean_factor = 0.05
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/networks.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/networks.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Networks for the PPO algorithm defined as recurrent cells."""

 from __future__ import absolute_import
@@ -20,11 +19,10 @@ from __future__ import print_function

 import tensorflow as tf

-
-_MEAN_WEIGHTS_INITIALIZER = tf.contrib.layers.variance_scaling_initializer(
-    factor=0.1)
+_MEAN_WEIGHTS_INITIALIZER = tf.contrib.layers.variance_scaling_initializer(factor=0.1)
 _LOGSTD_INITIALIZER = tf.random_normal_initializer(-1, 1e-10)

+
 class LinearGaussianPolicy(tf.contrib.rnn.RNNCell):
  """Indepent linear network with a tanh at the end for policy and feedforward network for the value.

@@ -56,15 +54,12 @@ class LinearGaussianPolicy(tf.contrib.rnn.RNNCell):
  def __call__(self, observation, state):
    with tf.variable_scope('policy'):
      x = tf.contrib.layers.flatten(observation)
-      mean = tf.contrib.layers.fully_connected(
-          x,
-          self._action_size,
-          tf.tanh,
-          weights_initializer=self._mean_weights_initializer)
-      logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32,
-                               self._logstd_initializer)
-      logstd = tf.tile(logstd[None, ...],
-                       [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
+      mean = tf.contrib.layers.fully_connected(x,
+                                               self._action_size,
+                                               tf.tanh,
+                                               weights_initializer=self._mean_weights_initializer)
+      logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
+      logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
    with tf.variable_scope('value'):
      x = tf.contrib.layers.flatten(observation)
      for size in self._value_layers:
@@ -80,10 +75,12 @@ class ForwardGaussianPolicy(tf.contrib.rnn.RNNCell):
  is learned as independent parameter vector.
  """

-  def __init__(
-      self, policy_layers, value_layers, action_size,
-      mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
-      logstd_initializer=_LOGSTD_INITIALIZER):
+  def __init__(self,
+               policy_layers,
+               value_layers,
+               action_size,
+               mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
+               logstd_initializer=_LOGSTD_INITIALIZER):
    self._policy_layers = policy_layers
    self._value_layers = value_layers
    self._action_size = action_size
@@ -104,13 +101,12 @@ class ForwardGaussianPolicy(tf.contrib.rnn.RNNCell):
      x = tf.contrib.layers.flatten(observation)
      for size in self._policy_layers:
        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
-      mean = tf.contrib.layers.fully_connected(
-          x, self._action_size, tf.tanh,
-          weights_initializer=self._mean_weights_initializer)
-      logstd = tf.get_variable(
-          'logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
-      logstd = tf.tile(
-          logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
+      mean = tf.contrib.layers.fully_connected(x,
+                                               self._action_size,
+                                               tf.tanh,
+                                               weights_initializer=self._mean_weights_initializer)
+      logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
+      logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
    with tf.variable_scope('value'):
      x = tf.contrib.layers.flatten(observation)
      for size in self._value_layers:
@@ -127,10 +123,12 @@ class RecurrentGaussianPolicy(tf.contrib.rnn.RNNCell):
  and uses a GRU cell.
  """

-  def __init__(
-      self, policy_layers, value_layers, action_size,
-      mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
-      logstd_initializer=_LOGSTD_INITIALIZER):
+  def __init__(self,
+               policy_layers,
+               value_layers,
+               action_size,
+               mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
+               logstd_initializer=_LOGSTD_INITIALIZER):
    self._policy_layers = policy_layers
    self._value_layers = value_layers
    self._action_size = action_size
@@ -152,13 +150,12 @@ class RecurrentGaussianPolicy(tf.contrib.rnn.RNNCell):
      for size in self._policy_layers[:-1]:
        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
      x, state = self._cell(x, state)
-      mean = tf.contrib.layers.fully_connected(
-          x, self._action_size, tf.tanh,
-          weights_initializer=self._mean_weights_initializer)
-      logstd = tf.get_variable(
-          'logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
-      logstd = tf.tile(
-          logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
+      mean = tf.contrib.layers.fully_connected(x,
+                                               self._action_size,
+                                               tf.tanh,
+                                               weights_initializer=self._mean_weights_initializer)
+      logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
+      logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
    with tf.variable_scope('value'):
      x = tf.contrib.layers.flatten(observation)
      for size in self._value_layers:
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/train.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/train.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 r"""Script to train a batch reinforcement learning algorithm.

 Command line:
@@ -68,21 +67,25 @@ def _define_loop(graph, logdir, train_steps, eval_steps):
  Returns:
    Loop object.
  """
-  loop = tools.Loop(
-      logdir, graph.step, graph.should_log, graph.do_report,
-      graph.force_reset)
-  loop.add_phase(
-      'train', graph.done, graph.score, graph.summary, train_steps,
-      report_every=None,
-      log_every=train_steps // 2,
-      checkpoint_every=None,
-      feed={graph.is_training: True})
-  loop.add_phase(
-      'eval', graph.done, graph.score, graph.summary, eval_steps,
-      report_every=eval_steps,
-      log_every=eval_steps // 2,
-      checkpoint_every=10 * eval_steps,
-      feed={graph.is_training: False})
+  loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report, graph.force_reset)
+  loop.add_phase('train',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 train_steps,
+                 report_every=None,
+                 log_every=train_steps // 2,
+                 checkpoint_every=None,
+                 feed={graph.is_training: True})
+  loop.add_phase('eval',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 eval_steps,
+                 report_every=eval_steps,
+                 log_every=eval_steps // 2,
+                 checkpoint_every=10 * eval_steps,
+                 feed={graph.is_training: False})
  return loop


@@ -101,25 +104,19 @@ def train(config, env_processes):
  """
  tf.reset_default_graph()
  with config.unlocked:
-    config.network = functools.partial(
-        utility.define_network, config.network, config)
+    config.network = functools.partial(utility.define_network, config.network, config)
    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
    config.value_optimizer = getattr(tf.train, config.value_optimizer)
  if config.update_every % config.num_agents:
    tf.logging.warn('Number of agents should divide episodes per update.')
  with tf.device('/cpu:0'):
-    batch_env = utility.define_batch_env(
-        lambda: _create_environment(config),
-        config.num_agents, env_processes)
-    graph = utility.define_simulation_graph(
-        batch_env, config.algorithm, config)
-    loop = _define_loop(
-        graph, config.logdir,
-        config.update_every * config.max_length,
-        config.eval_episodes * config.max_length)
-    total_steps = int(
-        config.steps / config.update_every *
-        (config.update_every + config.eval_episodes))
+    batch_env = utility.define_batch_env(lambda: _create_environment(config), config.num_agents,
+                                         env_processes)
+    graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
+    loop = _define_loop(graph, config.logdir, config.update_every * config.max_length,
+                        config.eval_episodes * config.max_length)
+    total_steps = int(config.steps / config.update_every *
+                      (config.update_every + config.eval_episodes))
  # Exclude episode related variables since the Python state of environments is
  # not checkpointed and thus new episodes start after resuming.
  saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
@@ -137,8 +134,8 @@ def main(_):
  utility.set_up_logging()
  if not FLAGS.config:
    raise KeyError('You must specify a configuration.')
-  logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
-      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
+  logdir = FLAGS.logdir and os.path.expanduser(
+      os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
  try:
    config = utility.load_config(logdir)
  except IOError:
@@ -150,16 +147,11 @@ def main(_):

 if __name__ == '__main__':
  FLAGS = tf.app.flags.FLAGS
-  tf.app.flags.DEFINE_string(
-      'logdir', None,
-      'Base directory to store logs.')
-  tf.app.flags.DEFINE_string(
-      'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
-      'Sub directory to store logs.')
-  tf.app.flags.DEFINE_string(
-      'config', None,
-      'Configuration to execute.')
-  tf.app.flags.DEFINE_boolean(
-      'env_processes', True,
-      'Step environments in separate processes to circumvent the GIL.')
+  tf.app.flags.DEFINE_string('logdir', None, 'Base directory to store logs.')
+  tf.app.flags.DEFINE_string('timestamp',
+                             datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
+                             'Sub directory to store logs.')
+  tf.app.flags.DEFINE_string('config', None, 'Configuration to execute.')
+  tf.app.flags.DEFINE_boolean('env_processes', True,
+                              'Step environments in separate processes to circumvent the GIL.')
  tf.app.run()
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/train_ppo_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/train_ppo_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for the PPO algorithm usage example."""

 from __future__ import absolute_import
@@ -29,7 +28,6 @@ from google3.robotics.reinforcement_learning.agents.scripts import configs
 from google3.robotics.reinforcement_learning.agents.scripts import networks
 from google3.robotics.reinforcement_learning.agents.scripts import train

-
 FLAGS = tf.app.flags.FLAGS


@@ -65,9 +63,11 @@ class PPOTest(tf.test.TestCase):
    for network, observ_shape in itertools.product(nets, observ_shapes):
      config = self._define_config()
      with config.unlocked:
-        config.env = functools.partial(
-            tools.MockEnvironment, observ_shape, action_shape=(3,),
-            min_duration=15, max_duration=15)
+        config.env = functools.partial(tools.MockEnvironment,
+                                       observ_shape,
+                                       action_shape=(3,),
+                                       min_duration=15,
+                                       max_duration=15)
        config.max_length = 20
        config.steps = 100
        config.network = network
@@ -77,9 +77,11 @@ class PPOTest(tf.test.TestCase):
  def test_no_crash_variable_duration(self):
    config = self._define_config()
    with config.unlocked:
-      config.env = functools.partial(
-          tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3,),
-          min_duration=5, max_duration=25)
+      config.env = functools.partial(tools.MockEnvironment,
+                                     observ_shape=(2, 3),
+                                     action_shape=(3,),
+                                     min_duration=5,
+                                     max_duration=25)
      config.max_length = 25
      config.steps = 200
      config.network = networks.RecurrentGaussianPolicy
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/utility.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/utility.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Utilities for using reinforcement learning algorithms."""

 from __future__ import absolute_import
@@ -46,8 +45,7 @@ def define_simulation_graph(batch_env, algo_cls, config):
  do_report = tf.placeholder(tf.bool, name='do_report')
  force_reset = tf.placeholder(tf.bool, name='force_reset')
  algo = algo_cls(batch_env, step, is_training, should_log, config)
-  done, score, summary = tools.simulate(
-      batch_env, algo, should_log, force_reset)
+  done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset)
  message = 'Graph contains {} trainable variables.'
  tf.logging.info(message.format(tools.count_weights()))
  # pylint: enable=unused-variable
@@ -67,9 +65,7 @@ def define_batch_env(constructor, num_agents, env_processes):
  """
  with tf.variable_scope('environments'):
    if env_processes:
-      envs = [
-          tools.wrappers.ExternalProcess(constructor)
-          for _ in range(num_agents)]
+      envs = [tools.wrappers.ExternalProcess(constructor) for _ in range(num_agents)]
    else:
      envs = [constructor() for _ in range(num_agents)]
    batch_env = tools.BatchEnv(envs, blocking=not env_processes)
@@ -108,15 +104,14 @@ def define_network(constructor, config, action_size):
  Returns:
    Created recurrent cell object.
  """
-  mean_weights_initializer = (
-      tf.contrib.layers.variance_scaling_initializer(
-          factor=config.init_mean_factor))
-  logstd_initializer = tf.random_normal_initializer(
-      config.init_logstd, 1e-10)
-  network = constructor(
-      config.policy_layers, config.value_layers, action_size,
-      mean_weights_initializer=mean_weights_initializer,
-      logstd_initializer=logstd_initializer)
+  mean_weights_initializer = (tf.contrib.layers.variance_scaling_initializer(
+      factor=config.init_mean_factor))
+  logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
+  network = constructor(config.policy_layers,
+                        config.value_layers,
+                        action_size,
+                        mean_weights_initializer=mean_weights_initializer,
+                        logstd_initializer=logstd_initializer)
  return network


@@ -134,9 +129,7 @@ def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
    ValueError: If resume expected but no log directory specified.
    RuntimeError: If no resume expected but a checkpoint was found.
  """
-  sess.run(tf.group(
-      tf.local_variables_initializer(),
-      tf.global_variables_initializer()))
+  sess.run(tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()))
  if resume and not (logdir or checkpoint):
    raise ValueError('Need to specify logdir to resume a checkpoint.')
  if logdir:
@@ -175,9 +168,8 @@ def save_config(config, logdir=None):
    with tf.gfile.GFile(config_path, 'w') as file_:
      yaml.dump(config, file_, default_flow_style=False)
  else:
-    message = (
-        'Start a new run without storing summaries and checkpoints since no '
-        'logging directory was specified.')
+    message = ('Start a new run without storing summaries and checkpoints since no '
+               'logging directory was specified.')
    tf.logging.info(message)
  return config

@@ -196,9 +188,8 @@ def load_config(logdir):
  """
  config_path = logdir and os.path.join(logdir, 'config.yaml')
  if not config_path or not tf.gfile.Exists(config_path):
-    message = (
-        'Cannot resume an existing run since the logging directory does not '
-        'contain a configuration file.')
+    message = ('Cannot resume an existing run since the logging directory does not '
+               'contain a configuration file.')
    raise IOError(message)
  with tf.gfile.FastGFile(config_path, 'r') as file_:
    config = yaml.load(file_)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/visualize.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/scripts/visualize.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 r"""Script to render videos of the Proximal Policy Gradient algorithm.

 Command line:
@@ -54,6 +53,8 @@ def _create_environment(config, outdir):
    setattr(env, 'spec', getattr(env, 'spec', None))
  if config.max_length:
    env = tools.wrappers.LimitDuration(env, config.max_length)
+
+
 #  env = gym.wrappers.Monitor(
 #      env, outdir, lambda unused_episode_number: True)
  env = tools.wrappers.RangeNormalize(env)
@@ -72,20 +73,20 @@ def _define_loop(graph, eval_steps):
  Returns:
    Loop object.
  """
-  loop = tools.Loop(
-      None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
-  loop.add_phase(
-      'eval', graph.done, graph.score, graph.summary, eval_steps,
-      report_every=eval_steps,
-      log_every=None,
-      checkpoint_every=None,
-      feed={graph.is_training: False})
+  loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
+  loop.add_phase('eval',
+                 graph.done,
+                 graph.score,
+                 graph.summary,
+                 eval_steps,
+                 report_every=eval_steps,
+                 log_every=None,
+                 checkpoint_every=None,
+                 feed={graph.is_training: False})
  return loop


-def visualize(
-    logdir, outdir, num_agents, num_episodes, checkpoint=None,
-    env_processes=True):
+def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
  """Recover checkpoint and render videos from it.

  Args:
@@ -98,25 +99,20 @@ def visualize(
  """
  config = utility.load_config(logdir)
  with config.unlocked:
-    config.network = functools.partial(
-        utility.define_network, config.network, config)
+    config.network = functools.partial(utility.define_network, config.network, config)
    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
    config.value_optimizer = getattr(tf.train, config.value_optimizer)
  with tf.device('/cpu:0'):
-    batch_env = utility.define_batch_env(
-        lambda: _create_environment(config, outdir),
-        num_agents, env_processes)
-    graph = utility.define_simulation_graph(
-        batch_env, config.algorithm, config)
+    batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
+                                         env_processes)
+    graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
    total_steps = num_episodes * config.max_length
    loop = _define_loop(graph, total_steps)
-  saver = utility.define_saver(
-      exclude=(r'.*_temporary/.*', r'global_step'))
+  saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
-    utility.initialize_variables(
-        sess, saver, config.logdir, checkpoint, resume=True)
+    utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
    for unused_score in loop.run(sess, saver, total_steps):
      pass
  batch_env.close()
@@ -129,29 +125,18 @@ def main(_):
    raise KeyError('You must specify logging and outdirs directories.')
  FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
  FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
-  visualize(
-      FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
-      FLAGS.checkpoint, FLAGS.env_processes)
+  visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes, FLAGS.checkpoint,
+            FLAGS.env_processes)


 if __name__ == '__main__':
  FLAGS = tf.app.flags.FLAGS
-  tf.app.flags.DEFINE_string(
-      'logdir', None,
-      'Directory to the checkpoint of a training run.')
-  tf.app.flags.DEFINE_string(
-      'outdir', None,
-      'Local directory for storing the monitoring outdir.')
-  tf.app.flags.DEFINE_string(
-      'checkpoint', None,
-      'Checkpoint name to load; defaults to most recent.')
-  tf.app.flags.DEFINE_integer(
-      'num_agents', 1,
-      'How many environments to step in parallel.')
-  tf.app.flags.DEFINE_integer(
-      'num_episodes', 5,
-      'Minimum number of episodes to render.')
-  tf.app.flags.DEFINE_boolean(
-      'env_processes', True,
-      'Step environments in separate processes to circumvent the GIL.')
+  tf.app.flags.DEFINE_string('logdir', None, 'Directory to the checkpoint of a training run.')
+  tf.app.flags.DEFINE_string('outdir', None, 'Local directory for storing the monitoring outdir.')
+  tf.app.flags.DEFINE_string('checkpoint', None,
+                             'Checkpoint name to load; defaults to most recent.')
+  tf.app.flags.DEFINE_integer('num_agents', 1, 'How many environments to step in parallel.')
+  tf.app.flags.DEFINE_integer('num_episodes', 5, 'Minimum number of episodes to render.')
+  tf.app.flags.DEFINE_boolean('env_processes', True,
+                              'Step environments in separate processes to circumvent the GIL.')
  tf.app.run()