add yapf style and apply yapf to format all Python files

This recreates pull request #2192
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/init.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/init.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tools for reinforcement learning."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/attr_dict.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/attr_dict.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Wrap a dictionary to access keys as attributes."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/attr_dict_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/attr_dict_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for the attribute dictionary."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/batch_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/batch_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Combine multiple environments to step them in batch."""

 from __future__ import absolute_import
@@ -84,13 +83,9 @@ class BatchEnv(object):
        message = 'Invalid action at index {}: {}'
        raise ValueError(message.format(index, action))
    if self._blocking:
-      transitions = [
-          env.step(action)
-          for env, action in zip(self._envs, actions)]
+      transitions = [env.step(action) for env, action in zip(self._envs, actions)]
    else:
-      transitions = [
-          env.step(action, blocking=False)
-          for env, action in zip(self._envs, actions)]
+      transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
      transitions = [transition() for transition in transitions]
    observs, rewards, dones, infos = zip(*transitions)
    observ = np.stack(observs)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/count_weights.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/count_weights.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Count learnable parameters."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/count_weights_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/count_weights_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for the weight counting utility."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/in_graph_batch_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/in_graph_batch_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Batch of environments inside the TensorFlow graph."""

 from __future__ import absolute_import
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
    action_shape = self._parse_shape(self._batch_env.action_space)
    action_dtype = self._parse_dtype(self._batch_env.action_space)
    with tf.variable_scope('env_temporary'):
-      self._observ = tf.Variable(
-          tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
-          name='observ', trainable=False)
-      self._action = tf.Variable(
-          tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
-          name='action', trainable=False)
-      self._reward = tf.Variable(
-          tf.zeros((len(self._batch_env),), tf.float32),
-          name='reward', trainable=False)
-      self._done = tf.Variable(
-          tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
-          name='done', trainable=False)
+      self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
+                                 name='observ',
+                                 trainable=False)
+      self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
+                                 name='action',
+                                 trainable=False)
+      self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
+                                 name='reward',
+                                 trainable=False)
+      self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
+                               name='done',
+                               trainable=False)

  def __getattr__(self, name):
    """Forward unimplemented attributes to one of the original environments.
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._batch_env.observation_space)
-      observ, reward, done = tf.py_func(
-          lambda a: self._batch_env.step(a)[:3], [action],
-          [observ_dtype, tf.float32, tf.bool], name='step')
+      observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
+                                        [observ_dtype, tf.float32, tf.bool],
+                                        name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
-      return tf.group(
-          self._observ.assign(observ),
-          self._action.assign(action),
-          self._reward.assign(reward),
-          self._done.assign(done))
+      return tf.group(self._observ.assign(observ), self._action.assign(action),
+                      self._reward.assign(reward), self._done.assign(done))

  def reset(self, indices=None):
    """Reset the batch of environments.
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
-    observ = tf.py_func(
-        self._batch_env.reset, [indices], observ_dtype, name='reset')
+    observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
-        tf.scatter_update(self._done, indices, done)]):
+        tf.scatter_update(self._done, indices, done)
+    ]):
      return tf.identity(observ)

  @property
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/in_graph_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/in_graph_env.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Put an OpenAI Gym environment into the TensorFlow graph."""

 from __future__ import absolute_import
@@ -42,16 +41,15 @@ class InGraphEnv(object):
    action_shape = self._parse_shape(self._env.action_space)
    action_dtype = self._parse_dtype(self._env.action_space)
    with tf.name_scope('environment'):
-      self._observ = tf.Variable(
-          tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
-      self._action = tf.Variable(
-          tf.zeros(action_shape, action_dtype), name='action', trainable=False)
-      self._reward = tf.Variable(
-          0.0, dtype=tf.float32, name='reward', trainable=False)
-      self._done = tf.Variable(
-          True, dtype=tf.bool, name='done', trainable=False)
-      self._step = tf.Variable(
-          0, dtype=tf.int32, name='step', trainable=False)
+      self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
+                                 name='observ',
+                                 trainable=False)
+      self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
+                                 name='action',
+                                 trainable=False)
+      self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
+      self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
+      self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)

  def __getattr__(self, name):
    """Forward unimplemented attributes to the original environment.
@@ -79,17 +77,14 @@ class InGraphEnv(object):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._env.observation_space)
-      observ, reward, done = tf.py_func(
-          lambda a: self._env.step(a)[:3], [action],
-          [observ_dtype, tf.float32, tf.bool], name='step')
+      observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
+                                        [observ_dtype, tf.float32, tf.bool],
+                                        name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
-      return tf.group(
-          self._observ.assign(observ),
-          self._action.assign(action),
-          self._reward.assign(reward),
-          self._done.assign(done),
-          self._step.assign_add(1))
+      return tf.group(self._observ.assign(observ), self._action.assign(action),
+                      self._reward.assign(reward), self._done.assign(done),
+                      self._step.assign_add(1))

  def reset(self):
    """Reset the environment.
@@ -100,10 +95,10 @@ class InGraphEnv(object):
    observ_dtype = self._parse_dtype(self._env.observation_space)
    observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
-    with tf.control_dependencies([
-        self._observ.assign(observ),
-        self._reward.assign(0),
-        self._done.assign(False)]):
+    with tf.control_dependencies(
+        [self._observ.assign(observ),
+         self._reward.assign(0),
+         self._done.assign(False)]):
      return tf.identity(observ)

  @property
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/loop.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/loop.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Execute operations in a loop and coordinate logging and checkpoints."""

 from __future__ import absolute_import
@@ -25,10 +24,8 @@ import tensorflow as tf

 from pybullet_envs.minitaur.agents.tools import streaming_mean

-
 _Phase = collections.namedtuple(
-    'Phase',
-    'name, writer, op, batch, steps, feed, report_every, log_every,'
+    'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
    'checkpoint_every')


@@ -56,16 +53,22 @@ class Loop(object):
      reset: Tensor indicating to the model to start a new computation.
    """
    self._logdir = logdir
-    self._step = (
-        tf.Variable(0, False, name='global_step') if step is None else step)
+    self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
    self._log = tf.placeholder(tf.bool) if log is None else log
    self._report = tf.placeholder(tf.bool) if report is None else report
    self._reset = tf.placeholder(tf.bool) if reset is None else reset
    self._phases = []

-  def add_phase(
-      self, name, done, score, summary, steps,
-      report_every=None, log_every=None, checkpoint_every=None, feed=None):
+  def add_phase(self,
+                name,
+                done,
+                score,
+                summary,
+                steps,
+                report_every=None,
+                log_every=None,
+                checkpoint_every=None,
+                feed=None):
    """Add a phase to the loop protocol.

    If the model breaks long computation into multiple steps, the done tensor
@@ -97,13 +100,12 @@ class Loop(object):
    if done.shape.ndims is None or score.shape.ndims is None:
      raise ValueError("Rank of 'done' and 'score' tensors must be known.")
    writer = self._logdir and tf.summary.FileWriter(
-        os.path.join(self._logdir, name), tf.get_default_graph(),
-        flush_secs=60)
+        os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
    op = self._define_step(done, score, summary)
    batch = 1 if score.shape.ndims == 0 else score.shape[0].value
-    self._phases.append(_Phase(
-        name, writer, op, batch, int(steps), feed, report_every,
-        log_every, checkpoint_every))
+    self._phases.append(
+        _Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
+               checkpoint_every))

  def run(self, sess, saver, max_step=None):
    """Run the loop schedule for a specified number of steps.
@@ -133,13 +135,11 @@ class Loop(object):
        tf.logging.info(message.format(phase.name, phase_step, global_step))
      # Populate book keeping tensors.
      phase.feed[self._reset] = (steps_in < steps_made)
-      phase.feed[self._log] = (
-          phase.writer and
-          self._is_every_steps(phase_step, phase.batch, phase.log_every))
-      phase.feed[self._report] = (
-          self._is_every_steps(phase_step, phase.batch, phase.report_every))
-      summary, mean_score, global_step, steps_made = sess.run(
-          phase.op, phase.feed)
+      phase.feed[self._log] = (phase.writer and
+                               self._is_every_steps(phase_step, phase.batch, phase.log_every))
+      phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
+                                                       phase.report_every))
+      summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
      if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
        self._store_checkpoint(sess, saver, global_step)
      if self._is_every_steps(phase_step, phase.batch, phase.report_every):
@@ -207,8 +207,7 @@ class Loop(object):
    score_mean = streaming_mean.StreamingMean((), tf.float32)
    with tf.control_dependencies([done, score, summary]):
      done_score = tf.gather(score, tf.where(done)[:, 0])
-      submit_score = tf.cond(
-          tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
+      submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
    with tf.control_dependencies([submit_score]):
      mean_score = tf.cond(self._report, score_mean.clear, float)
      steps_made = tf.shape(score)[0]
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/loop_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/loop_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for the training loop."""

 from __future__ import absolute_import
@@ -28,8 +27,7 @@ class LoopTest(tf.test.TestCase):
  def test_report_every_step(self):
    step = tf.Variable(0, False, dtype=tf.int32, name='step')
    loop = tools.Loop(None, step)
-    loop.add_phase(
-        'phase_1', done=True, score=0, summary='', steps=1, report_every=3)
+    loop.add_phase('phase_1', done=True, score=0, summary='', steps=1, report_every=3)
    # Step:   0 1 2 3 4 5 6 7 8
    # Report:     x     x     x
    with self.test_session() as sess:
@@ -45,15 +43,33 @@ class LoopTest(tf.test.TestCase):
  def test_phases_feed(self):
    score = tf.placeholder(tf.float32, [])
    loop = tools.Loop(None)
-    loop.add_phase(
-        'phase_1', done=True, score=score, summary='', steps=1, report_every=1,
-        log_every=None, checkpoint_every=None, feed={score: 1})
-    loop.add_phase(
-        'phase_2', done=True, score=score, summary='', steps=3, report_every=1,
-        log_every=None, checkpoint_every=None, feed={score: 2})
-    loop.add_phase(
-        'phase_3', done=True, score=score, summary='', steps=2, report_every=1,
-        log_every=None, checkpoint_every=None, feed={score: 3})
+    loop.add_phase('phase_1',
+                   done=True,
+                   score=score,
+                   summary='',
+                   steps=1,
+                   report_every=1,
+                   log_every=None,
+                   checkpoint_every=None,
+                   feed={score: 1})
+    loop.add_phase('phase_2',
+                   done=True,
+                   score=score,
+                   summary='',
+                   steps=3,
+                   report_every=1,
+                   log_every=None,
+                   checkpoint_every=None,
+                   feed={score: 2})
+    loop.add_phase('phase_3',
+                   done=True,
+                   score=score,
+                   summary='',
+                   steps=2,
+                   report_every=1,
+                   log_every=None,
+                   checkpoint_every=None,
+                   feed={score: 3})
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = list(loop.run(sess, saver=None, max_step=15))
@@ -61,10 +77,8 @@ class LoopTest(tf.test.TestCase):

  def test_average_score_over_phases(self):
    loop = tools.Loop(None)
-    loop.add_phase(
-        'phase_1', done=True, score=1, summary='', steps=1, report_every=2)
-    loop.add_phase(
-        'phase_2', done=True, score=2, summary='', steps=2, report_every=5)
+    loop.add_phase('phase_1', done=True, score=1, summary='', steps=1, report_every=2)
+    loop.add_phase('phase_2', done=True, score=2, summary='', steps=2, report_every=5)
    # Score:    1 2 2 1 2 2 1 2 2 1 2 2 1 2 2 1 2
    # Report 1:       x           x           x
    # Report 2:               x             x
@@ -78,8 +92,7 @@ class LoopTest(tf.test.TestCase):
    done = tf.equal((step + 1) % 2, 0)
    score = tf.cast(step, tf.float32)
    loop = tools.Loop(None, step)
-    loop.add_phase(
-        'phase_1', done, score, summary='', steps=1, report_every=3)
+    loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=3)
    # Score:  0 1 2 3 4 5 6 7 8
    # Done:     x   x   x   x
    # Report:     x     x     x
@@ -91,10 +104,9 @@ class LoopTest(tf.test.TestCase):
  def test_not_done_batch(self):
    step = tf.Variable(0, False, dtype=tf.int32, name='step')
    done = tf.equal([step % 3, step % 4], 0)
-    score = tf.cast([step, step ** 2], tf.float32)
+    score = tf.cast([step, step**2], tf.float32)
    loop = tools.Loop(None, step)
-    loop.add_phase(
-        'phase_1', done, score, summary='', steps=1, report_every=8)
+    loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=8)
    # Step:    0  2  4  6
    # Score 1: 0  2  4  6
    # Done 1:  x        x
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/mock_algorithm.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/mock_algorithm.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Mock algorithm for testing reinforcement learning code."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/mock_environment.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/mock_environment.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Mock environment for testing reinforcement learning code."""

 from __future__ import absolute_import
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/simulate.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/simulate.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """In-graph simulation step of a vecrotized algorithm with environments."""

 from __future__ import absolute_import
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
    reset_ops = [
        batch_env.reset(agent_indices),
        tf.scatter_update(score, agent_indices, zero_scores),
-        tf.scatter_update(length, agent_indices, zero_durations)]
+        tf.scatter_update(length, agent_indices, zero_durations)
+    ]
    with tf.control_dependencies(reset_ops):
      return algo.begin_episode(agent_indices)

@@ -76,9 +76,8 @@ def simulate(batch_env, algo, log=True, reset=False):
      add_score = score.assign_add(batch_env.reward)
      inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
    with tf.control_dependencies([add_score, inc_length]):
-      experience_summary = algo.experience(
-          prevob, batch_env.action, batch_env.reward, batch_env.done,
-          batch_env.observ)
+      experience_summary = algo.experience(prevob, batch_env.action, batch_env.reward,
+                                           batch_env.done, batch_env.observ)
    return tf.summary.merge([step_summary, experience_summary])

  def _define_end_episode(agent_indices):
@@ -94,8 +93,7 @@ def simulate(batch_env, algo, log=True, reset=False):
    """
    assert agent_indices.shape.ndims == 1
    submit_score = mean_score.submit(tf.gather(score, agent_indices))
-    submit_length = mean_length.submit(
-        tf.cast(tf.gather(length, agent_indices), tf.float32))
+    submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
    with tf.control_dependencies([submit_score, submit_length]):
      return algo.end_episode(agent_indices)

@@ -105,41 +103,34 @@ def simulate(batch_env, algo, log=True, reset=False):
    Returns:
      Summary string.
    """
-    score_summary = tf.cond(
-        tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
-        lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
-    length_summary = tf.cond(
-        tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
-        lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
+    score_summary = tf.cond(tf.logical_and(log, tf.cast(
+        mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
+                            str)
+    length_summary = tf.cond(tf.logical_and(
+        log, tf.cast(mean_length.count,
+                     tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
    return tf.summary.merge([score_summary, length_summary])

  with tf.name_scope('simulate'):
    log = tf.convert_to_tensor(log)
    reset = tf.convert_to_tensor(reset)
    with tf.variable_scope('simulate_temporary'):
-      score = tf.Variable(
-          tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
-      length = tf.Variable(
-          tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
+      score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
+      length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
    mean_score = streaming_mean.StreamingMean((), tf.float32)
    mean_length = streaming_mean.StreamingMean((), tf.float32)
-    agent_indices = tf.cond(
-        reset,
-        lambda: tf.range(len(batch_env)),
-        lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
-    begin_episode = tf.cond(
-        tf.cast(tf.shape(agent_indices)[0], tf.bool),
-        lambda: _define_begin_episode(agent_indices), str)
+    agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
+        tf.where(batch_env.done)[:, 0], tf.int32))
+    begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
+                                    tf.bool), lambda: _define_begin_episode(agent_indices), str)
    with tf.control_dependencies([begin_episode]):
      step = _define_step()
    with tf.control_dependencies([step]):
      agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
-      end_episode = tf.cond(
-          tf.cast(tf.shape(agent_indices)[0], tf.bool),
-          lambda: _define_end_episode(agent_indices), str)
+      end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
+                                    tf.bool), lambda: _define_end_episode(agent_indices), str)
    with tf.control_dependencies([end_episode]):
-      summary = tf.summary.merge([
-          _define_summaries(), begin_episode, step, end_episode])
+      summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
    with tf.control_dependencies([summary]):
      done, score = tf.identity(batch_env.done), tf.identity(score)
    return done, score, summary
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/simulate_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/simulate_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for the simulation operation."""

 from __future__ import absolute_import
@@ -84,9 +83,10 @@ class SimulateTest(tf.test.TestCase):
  def _create_test_batch_env(self, durations):
    envs = []
    for duration in durations:
-      env = tools.MockEnvironment(
-          observ_shape=(2, 3), action_shape=(3,),
-          min_duration=duration, max_duration=duration)
+      env = tools.MockEnvironment(observ_shape=(2, 3),
+                                  action_shape=(3,),
+                                  min_duration=duration,
+                                  max_duration=duration)
      env = tools.wrappers.ConvertTo32Bit(env)
      envs.append(env)
    batch_env = tools.BatchEnv(envs, blocking=True)
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/streaming_mean.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/streaming_mean.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Compute a streaming estimation of the mean of submitted tensors."""

 from __future__ import absolute_import
@@ -53,9 +52,8 @@ class StreamingMean(object):
    # Add a batch dimension if necessary.
    if value.shape.ndims == self._sum.shape.ndims:
      value = value[None, ...]
-    return tf.group(
-        self._sum.assign_add(tf.reduce_sum(value, 0)),
-        self._count.assign_add(tf.shape(value)[0]))
+    return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
+                    self._count.assign_add(tf.shape(value)[0]))

  def clear(self):
    """Return the mean estimate and reset the streaming statistics."""
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/wrappers.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/wrappers.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Wrappers for OpenAI Gym environments."""

 from __future__ import absolute_import
@@ -150,8 +149,7 @@ class FrameHistory(object):
    return self._select_frames()

  def _select_frames(self):
-    indices = [
-        (self._step - index) % self._capacity for index in self._past_indices]
+    indices = [(self._step - index) % self._capacity for index in self._past_indices]
    observ = self._buffer[indices]
    if self._flatten:
      observ = np.reshape(observ, (-1,) + observ.shape[2:])
@@ -192,14 +190,14 @@ class RangeNormalize(object):

  def __init__(self, env, observ=None, action=None):
    self._env = env
-    self._should_normalize_observ = (
-        observ is not False and self._is_finite(self._env.observation_space))
+    self._should_normalize_observ = (observ is not False and
+                                     self._is_finite(self._env.observation_space))
    if observ is True and not self._should_normalize_observ:
      raise ValueError('Cannot normalize infinite observation range.')
    if observ is None and not self._should_normalize_observ:
      tf.logging.info('Not normalizing infinite observation range.')
-    self._should_normalize_action = (
-        action is not False and self._is_finite(self._env.action_space))
+    self._should_normalize_action = (action is not False and
+                                     self._is_finite(self._env.action_space))
    if action is True and not self._should_normalize_action:
      raise ValueError('Cannot normalize infinite action range.')
    if action is None and not self._should_normalize_action:
@@ -327,8 +325,7 @@ class ExternalProcess(object):
      action_space: The cached action space of the environment.
    """
    self._conn, conn = multiprocessing.Pipe()
-    self._process = multiprocessing.Process(
-        target=self._worker, args=(constructor, conn))
+    self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
    atexit.register(self.close)
    self._process.start()
    self._observ_space = None
--- a/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/wrappers_test.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/wrappers_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests for environment wrappers."""

 from __future__ import absolute_import
@@ -28,18 +27,20 @@ from agents import tools
 class ExternalProcessTest(tf.test.TestCase):

  def test_close_no_hang_after_init(self):
-    constructor = functools.partial(
-        tools.MockEnvironment,
-        observ_shape=(2, 3), action_shape=(2,),
-        min_duration=2, max_duration=2)
+    constructor = functools.partial(tools.MockEnvironment,
+                                    observ_shape=(2, 3),
+                                    action_shape=(2,),
+                                    min_duration=2,
+                                    max_duration=2)
    env = tools.wrappers.ExternalProcess(constructor)
    env.close()

  def test_close_no_hang_after_step(self):
-    constructor = functools.partial(
-        tools.MockEnvironment,
-        observ_shape=(2, 3), action_shape=(2,),
-        min_duration=5, max_duration=5)
+    constructor = functools.partial(tools.MockEnvironment,
+                                    observ_shape=(2, 3),
+                                    action_shape=(2,),
+                                    min_duration=5,
+                                    max_duration=5)
    env = tools.wrappers.ExternalProcess(constructor)
    env.reset()
    env.step(env.action_space.sample())
@@ -53,8 +54,7 @@ class ExternalProcessTest(tf.test.TestCase):
      env.step(env.action_space.sample())

  def test_reraise_exception_in_step(self):
-    constructor = functools.partial(
-        MockEnvironmentCrashInStep, crash_at_step=3)
+    constructor = functools.partial(MockEnvironmentCrashInStep, crash_at_step=3)
    env = tools.wrappers.ExternalProcess(constructor)
    env.reset()
    env.step(env.action_space.sample())
@@ -74,9 +74,10 @@ class MockEnvironmentCrashInStep(tools.MockEnvironment):
  """Raise an error after specified number of steps in an episode."""

  def __init__(self, crash_at_step):
-    super(MockEnvironmentCrashInStep, self).__init__(
-        observ_shape=(2, 3), action_shape=(2,),
-        min_duration=crash_at_step + 1, max_duration=crash_at_step + 1)
+    super(MockEnvironmentCrashInStep, self).__init__(observ_shape=(2, 3),
+                                                     action_shape=(2,),
+                                                     min_duration=crash_at_step + 1,
+                                                     max_duration=crash_at_step + 1)
    self._crash_at_step = crash_at_step

  def step(self, *args, **kwargs):