add yapf style and apply yapf to format all Python files
This recreates pull request #2192
This commit is contained in:
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tools for reinforcement learning."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Wrap a dictionary to access keys as attributes."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the attribute dictionary."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Combine multiple environments to step them in batch."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -84,13 +83,9 @@ class BatchEnv(object):
|
||||
message = 'Invalid action at index {}: {}'
|
||||
raise ValueError(message.format(index, action))
|
||||
if self._blocking:
|
||||
transitions = [
|
||||
env.step(action)
|
||||
for env, action in zip(self._envs, actions)]
|
||||
transitions = [env.step(action) for env, action in zip(self._envs, actions)]
|
||||
else:
|
||||
transitions = [
|
||||
env.step(action, blocking=False)
|
||||
for env, action in zip(self._envs, actions)]
|
||||
transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
|
||||
transitions = [transition() for transition in transitions]
|
||||
observs, rewards, dones, infos = zip(*transitions)
|
||||
observ = np.stack(observs)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Count learnable parameters."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the weight counting utility."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Batch of environments inside the TensorFlow graph."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
|
||||
action_shape = self._parse_shape(self._batch_env.action_space)
|
||||
action_dtype = self._parse_dtype(self._batch_env.action_space)
|
||||
with tf.variable_scope('env_temporary'):
|
||||
self._observ = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
|
||||
name='observ', trainable=False)
|
||||
self._action = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
|
||||
name='action', trainable=False)
|
||||
self._reward = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),), tf.float32),
|
||||
name='reward', trainable=False)
|
||||
self._done = tf.Variable(
|
||||
tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
|
||||
name='done', trainable=False)
|
||||
self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
|
||||
name='observ',
|
||||
trainable=False)
|
||||
self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
|
||||
name='action',
|
||||
trainable=False)
|
||||
self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
|
||||
name='reward',
|
||||
trainable=False)
|
||||
self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
|
||||
name='done',
|
||||
trainable=False)
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Forward unimplemented attributes to one of the original environments.
|
||||
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
|
||||
if action.dtype in (tf.float16, tf.float32, tf.float64):
|
||||
action = tf.check_numerics(action, 'action')
|
||||
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
|
||||
observ, reward, done = tf.py_func(
|
||||
lambda a: self._batch_env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool], name='step')
|
||||
observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool],
|
||||
name='step')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.check_numerics(reward, 'reward')
|
||||
return tf.group(
|
||||
self._observ.assign(observ),
|
||||
self._action.assign(action),
|
||||
self._reward.assign(reward),
|
||||
self._done.assign(done))
|
||||
return tf.group(self._observ.assign(observ), self._action.assign(action),
|
||||
self._reward.assign(reward), self._done.assign(done))
|
||||
|
||||
def reset(self, indices=None):
|
||||
"""Reset the batch of environments.
|
||||
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
|
||||
if indices is None:
|
||||
indices = tf.range(len(self._batch_env))
|
||||
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
|
||||
observ = tf.py_func(
|
||||
self._batch_env.reset, [indices], observ_dtype, name='reset')
|
||||
observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.zeros_like(indices, tf.float32)
|
||||
done = tf.zeros_like(indices, tf.bool)
|
||||
with tf.control_dependencies([
|
||||
tf.scatter_update(self._observ, indices, observ),
|
||||
tf.scatter_update(self._reward, indices, reward),
|
||||
tf.scatter_update(self._done, indices, done)]):
|
||||
tf.scatter_update(self._done, indices, done)
|
||||
]):
|
||||
return tf.identity(observ)
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Put an OpenAI Gym environment into the TensorFlow graph."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -42,16 +41,15 @@ class InGraphEnv(object):
|
||||
action_shape = self._parse_shape(self._env.action_space)
|
||||
action_dtype = self._parse_dtype(self._env.action_space)
|
||||
with tf.name_scope('environment'):
|
||||
self._observ = tf.Variable(
|
||||
tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
|
||||
self._action = tf.Variable(
|
||||
tf.zeros(action_shape, action_dtype), name='action', trainable=False)
|
||||
self._reward = tf.Variable(
|
||||
0.0, dtype=tf.float32, name='reward', trainable=False)
|
||||
self._done = tf.Variable(
|
||||
True, dtype=tf.bool, name='done', trainable=False)
|
||||
self._step = tf.Variable(
|
||||
0, dtype=tf.int32, name='step', trainable=False)
|
||||
self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
|
||||
name='observ',
|
||||
trainable=False)
|
||||
self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
|
||||
name='action',
|
||||
trainable=False)
|
||||
self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
|
||||
self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
|
||||
self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Forward unimplemented attributes to the original environment.
|
||||
@@ -79,17 +77,14 @@ class InGraphEnv(object):
|
||||
if action.dtype in (tf.float16, tf.float32, tf.float64):
|
||||
action = tf.check_numerics(action, 'action')
|
||||
observ_dtype = self._parse_dtype(self._env.observation_space)
|
||||
observ, reward, done = tf.py_func(
|
||||
lambda a: self._env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool], name='step')
|
||||
observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool],
|
||||
name='step')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.check_numerics(reward, 'reward')
|
||||
return tf.group(
|
||||
self._observ.assign(observ),
|
||||
self._action.assign(action),
|
||||
self._reward.assign(reward),
|
||||
self._done.assign(done),
|
||||
self._step.assign_add(1))
|
||||
return tf.group(self._observ.assign(observ), self._action.assign(action),
|
||||
self._reward.assign(reward), self._done.assign(done),
|
||||
self._step.assign_add(1))
|
||||
|
||||
def reset(self):
|
||||
"""Reset the environment.
|
||||
@@ -100,10 +95,10 @@ class InGraphEnv(object):
|
||||
observ_dtype = self._parse_dtype(self._env.observation_space)
|
||||
observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
with tf.control_dependencies([
|
||||
self._observ.assign(observ),
|
||||
self._reward.assign(0),
|
||||
self._done.assign(False)]):
|
||||
with tf.control_dependencies(
|
||||
[self._observ.assign(observ),
|
||||
self._reward.assign(0),
|
||||
self._done.assign(False)]):
|
||||
return tf.identity(observ)
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Execute operations in a loop and coordinate logging and checkpoints."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -25,10 +24,8 @@ import tensorflow as tf
|
||||
|
||||
from pybullet_envs.minitaur.agents.tools import streaming_mean
|
||||
|
||||
|
||||
_Phase = collections.namedtuple(
|
||||
'Phase',
|
||||
'name, writer, op, batch, steps, feed, report_every, log_every,'
|
||||
'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
|
||||
'checkpoint_every')
|
||||
|
||||
|
||||
@@ -56,16 +53,22 @@ class Loop(object):
|
||||
reset: Tensor indicating to the model to start a new computation.
|
||||
"""
|
||||
self._logdir = logdir
|
||||
self._step = (
|
||||
tf.Variable(0, False, name='global_step') if step is None else step)
|
||||
self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
|
||||
self._log = tf.placeholder(tf.bool) if log is None else log
|
||||
self._report = tf.placeholder(tf.bool) if report is None else report
|
||||
self._reset = tf.placeholder(tf.bool) if reset is None else reset
|
||||
self._phases = []
|
||||
|
||||
def add_phase(
|
||||
self, name, done, score, summary, steps,
|
||||
report_every=None, log_every=None, checkpoint_every=None, feed=None):
|
||||
def add_phase(self,
|
||||
name,
|
||||
done,
|
||||
score,
|
||||
summary,
|
||||
steps,
|
||||
report_every=None,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed=None):
|
||||
"""Add a phase to the loop protocol.
|
||||
|
||||
If the model breaks long computation into multiple steps, the done tensor
|
||||
@@ -97,13 +100,12 @@ class Loop(object):
|
||||
if done.shape.ndims is None or score.shape.ndims is None:
|
||||
raise ValueError("Rank of 'done' and 'score' tensors must be known.")
|
||||
writer = self._logdir and tf.summary.FileWriter(
|
||||
os.path.join(self._logdir, name), tf.get_default_graph(),
|
||||
flush_secs=60)
|
||||
os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
|
||||
op = self._define_step(done, score, summary)
|
||||
batch = 1 if score.shape.ndims == 0 else score.shape[0].value
|
||||
self._phases.append(_Phase(
|
||||
name, writer, op, batch, int(steps), feed, report_every,
|
||||
log_every, checkpoint_every))
|
||||
self._phases.append(
|
||||
_Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
|
||||
checkpoint_every))
|
||||
|
||||
def run(self, sess, saver, max_step=None):
|
||||
"""Run the loop schedule for a specified number of steps.
|
||||
@@ -133,13 +135,11 @@ class Loop(object):
|
||||
tf.logging.info(message.format(phase.name, phase_step, global_step))
|
||||
# Populate book keeping tensors.
|
||||
phase.feed[self._reset] = (steps_in < steps_made)
|
||||
phase.feed[self._log] = (
|
||||
phase.writer and
|
||||
self._is_every_steps(phase_step, phase.batch, phase.log_every))
|
||||
phase.feed[self._report] = (
|
||||
self._is_every_steps(phase_step, phase.batch, phase.report_every))
|
||||
summary, mean_score, global_step, steps_made = sess.run(
|
||||
phase.op, phase.feed)
|
||||
phase.feed[self._log] = (phase.writer and
|
||||
self._is_every_steps(phase_step, phase.batch, phase.log_every))
|
||||
phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
|
||||
phase.report_every))
|
||||
summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
|
||||
if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
|
||||
self._store_checkpoint(sess, saver, global_step)
|
||||
if self._is_every_steps(phase_step, phase.batch, phase.report_every):
|
||||
@@ -207,8 +207,7 @@ class Loop(object):
|
||||
score_mean = streaming_mean.StreamingMean((), tf.float32)
|
||||
with tf.control_dependencies([done, score, summary]):
|
||||
done_score = tf.gather(score, tf.where(done)[:, 0])
|
||||
submit_score = tf.cond(
|
||||
tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
|
||||
submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
|
||||
with tf.control_dependencies([submit_score]):
|
||||
mean_score = tf.cond(self._report, score_mean.clear, float)
|
||||
steps_made = tf.shape(score)[0]
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the training loop."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -28,8 +27,7 @@ class LoopTest(tf.test.TestCase):
|
||||
def test_report_every_step(self):
|
||||
step = tf.Variable(0, False, dtype=tf.int32, name='step')
|
||||
loop = tools.Loop(None, step)
|
||||
loop.add_phase(
|
||||
'phase_1', done=True, score=0, summary='', steps=1, report_every=3)
|
||||
loop.add_phase('phase_1', done=True, score=0, summary='', steps=1, report_every=3)
|
||||
# Step: 0 1 2 3 4 5 6 7 8
|
||||
# Report: x x x
|
||||
with self.test_session() as sess:
|
||||
@@ -45,15 +43,33 @@ class LoopTest(tf.test.TestCase):
|
||||
def test_phases_feed(self):
|
||||
score = tf.placeholder(tf.float32, [])
|
||||
loop = tools.Loop(None)
|
||||
loop.add_phase(
|
||||
'phase_1', done=True, score=score, summary='', steps=1, report_every=1,
|
||||
log_every=None, checkpoint_every=None, feed={score: 1})
|
||||
loop.add_phase(
|
||||
'phase_2', done=True, score=score, summary='', steps=3, report_every=1,
|
||||
log_every=None, checkpoint_every=None, feed={score: 2})
|
||||
loop.add_phase(
|
||||
'phase_3', done=True, score=score, summary='', steps=2, report_every=1,
|
||||
log_every=None, checkpoint_every=None, feed={score: 3})
|
||||
loop.add_phase('phase_1',
|
||||
done=True,
|
||||
score=score,
|
||||
summary='',
|
||||
steps=1,
|
||||
report_every=1,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed={score: 1})
|
||||
loop.add_phase('phase_2',
|
||||
done=True,
|
||||
score=score,
|
||||
summary='',
|
||||
steps=3,
|
||||
report_every=1,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed={score: 2})
|
||||
loop.add_phase('phase_3',
|
||||
done=True,
|
||||
score=score,
|
||||
summary='',
|
||||
steps=2,
|
||||
report_every=1,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed={score: 3})
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
scores = list(loop.run(sess, saver=None, max_step=15))
|
||||
@@ -61,10 +77,8 @@ class LoopTest(tf.test.TestCase):
|
||||
|
||||
def test_average_score_over_phases(self):
|
||||
loop = tools.Loop(None)
|
||||
loop.add_phase(
|
||||
'phase_1', done=True, score=1, summary='', steps=1, report_every=2)
|
||||
loop.add_phase(
|
||||
'phase_2', done=True, score=2, summary='', steps=2, report_every=5)
|
||||
loop.add_phase('phase_1', done=True, score=1, summary='', steps=1, report_every=2)
|
||||
loop.add_phase('phase_2', done=True, score=2, summary='', steps=2, report_every=5)
|
||||
# Score: 1 2 2 1 2 2 1 2 2 1 2 2 1 2 2 1 2
|
||||
# Report 1: x x x
|
||||
# Report 2: x x
|
||||
@@ -78,8 +92,7 @@ class LoopTest(tf.test.TestCase):
|
||||
done = tf.equal((step + 1) % 2, 0)
|
||||
score = tf.cast(step, tf.float32)
|
||||
loop = tools.Loop(None, step)
|
||||
loop.add_phase(
|
||||
'phase_1', done, score, summary='', steps=1, report_every=3)
|
||||
loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=3)
|
||||
# Score: 0 1 2 3 4 5 6 7 8
|
||||
# Done: x x x x
|
||||
# Report: x x x
|
||||
@@ -91,10 +104,9 @@ class LoopTest(tf.test.TestCase):
|
||||
def test_not_done_batch(self):
|
||||
step = tf.Variable(0, False, dtype=tf.int32, name='step')
|
||||
done = tf.equal([step % 3, step % 4], 0)
|
||||
score = tf.cast([step, step ** 2], tf.float32)
|
||||
score = tf.cast([step, step**2], tf.float32)
|
||||
loop = tools.Loop(None, step)
|
||||
loop.add_phase(
|
||||
'phase_1', done, score, summary='', steps=1, report_every=8)
|
||||
loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=8)
|
||||
# Step: 0 2 4 6
|
||||
# Score 1: 0 2 4 6
|
||||
# Done 1: x x
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Mock algorithm for testing reinforcement learning code."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Mock environment for testing reinforcement learning code."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""In-graph simulation step of a vecrotized algorithm with environments."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
reset_ops = [
|
||||
batch_env.reset(agent_indices),
|
||||
tf.scatter_update(score, agent_indices, zero_scores),
|
||||
tf.scatter_update(length, agent_indices, zero_durations)]
|
||||
tf.scatter_update(length, agent_indices, zero_durations)
|
||||
]
|
||||
with tf.control_dependencies(reset_ops):
|
||||
return algo.begin_episode(agent_indices)
|
||||
|
||||
@@ -76,9 +76,8 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
add_score = score.assign_add(batch_env.reward)
|
||||
inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
|
||||
with tf.control_dependencies([add_score, inc_length]):
|
||||
experience_summary = algo.experience(
|
||||
prevob, batch_env.action, batch_env.reward, batch_env.done,
|
||||
batch_env.observ)
|
||||
experience_summary = algo.experience(prevob, batch_env.action, batch_env.reward,
|
||||
batch_env.done, batch_env.observ)
|
||||
return tf.summary.merge([step_summary, experience_summary])
|
||||
|
||||
def _define_end_episode(agent_indices):
|
||||
@@ -94,8 +93,7 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
"""
|
||||
assert agent_indices.shape.ndims == 1
|
||||
submit_score = mean_score.submit(tf.gather(score, agent_indices))
|
||||
submit_length = mean_length.submit(
|
||||
tf.cast(tf.gather(length, agent_indices), tf.float32))
|
||||
submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
|
||||
with tf.control_dependencies([submit_score, submit_length]):
|
||||
return algo.end_episode(agent_indices)
|
||||
|
||||
@@ -105,41 +103,34 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
Returns:
|
||||
Summary string.
|
||||
"""
|
||||
score_summary = tf.cond(
|
||||
tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
|
||||
lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
|
||||
length_summary = tf.cond(
|
||||
tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
|
||||
lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
|
||||
score_summary = tf.cond(tf.logical_and(log, tf.cast(
|
||||
mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
|
||||
str)
|
||||
length_summary = tf.cond(tf.logical_and(
|
||||
log, tf.cast(mean_length.count,
|
||||
tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
|
||||
return tf.summary.merge([score_summary, length_summary])
|
||||
|
||||
with tf.name_scope('simulate'):
|
||||
log = tf.convert_to_tensor(log)
|
||||
reset = tf.convert_to_tensor(reset)
|
||||
with tf.variable_scope('simulate_temporary'):
|
||||
score = tf.Variable(
|
||||
tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
|
||||
length = tf.Variable(
|
||||
tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
|
||||
score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
|
||||
length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
|
||||
mean_score = streaming_mean.StreamingMean((), tf.float32)
|
||||
mean_length = streaming_mean.StreamingMean((), tf.float32)
|
||||
agent_indices = tf.cond(
|
||||
reset,
|
||||
lambda: tf.range(len(batch_env)),
|
||||
lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
|
||||
begin_episode = tf.cond(
|
||||
tf.cast(tf.shape(agent_indices)[0], tf.bool),
|
||||
lambda: _define_begin_episode(agent_indices), str)
|
||||
agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
|
||||
tf.where(batch_env.done)[:, 0], tf.int32))
|
||||
begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
|
||||
tf.bool), lambda: _define_begin_episode(agent_indices), str)
|
||||
with tf.control_dependencies([begin_episode]):
|
||||
step = _define_step()
|
||||
with tf.control_dependencies([step]):
|
||||
agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
|
||||
end_episode = tf.cond(
|
||||
tf.cast(tf.shape(agent_indices)[0], tf.bool),
|
||||
lambda: _define_end_episode(agent_indices), str)
|
||||
end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
|
||||
tf.bool), lambda: _define_end_episode(agent_indices), str)
|
||||
with tf.control_dependencies([end_episode]):
|
||||
summary = tf.summary.merge([
|
||||
_define_summaries(), begin_episode, step, end_episode])
|
||||
summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
|
||||
with tf.control_dependencies([summary]):
|
||||
done, score = tf.identity(batch_env.done), tf.identity(score)
|
||||
return done, score, summary
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the simulation operation."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -84,9 +83,10 @@ class SimulateTest(tf.test.TestCase):
|
||||
def _create_test_batch_env(self, durations):
|
||||
envs = []
|
||||
for duration in durations:
|
||||
env = tools.MockEnvironment(
|
||||
observ_shape=(2, 3), action_shape=(3,),
|
||||
min_duration=duration, max_duration=duration)
|
||||
env = tools.MockEnvironment(observ_shape=(2, 3),
|
||||
action_shape=(3,),
|
||||
min_duration=duration,
|
||||
max_duration=duration)
|
||||
env = tools.wrappers.ConvertTo32Bit(env)
|
||||
envs.append(env)
|
||||
batch_env = tools.BatchEnv(envs, blocking=True)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Compute a streaming estimation of the mean of submitted tensors."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -53,9 +52,8 @@ class StreamingMean(object):
|
||||
# Add a batch dimension if necessary.
|
||||
if value.shape.ndims == self._sum.shape.ndims:
|
||||
value = value[None, ...]
|
||||
return tf.group(
|
||||
self._sum.assign_add(tf.reduce_sum(value, 0)),
|
||||
self._count.assign_add(tf.shape(value)[0]))
|
||||
return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
|
||||
self._count.assign_add(tf.shape(value)[0]))
|
||||
|
||||
def clear(self):
|
||||
"""Return the mean estimate and reset the streaming statistics."""
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Wrappers for OpenAI Gym environments."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -150,8 +149,7 @@ class FrameHistory(object):
|
||||
return self._select_frames()
|
||||
|
||||
def _select_frames(self):
|
||||
indices = [
|
||||
(self._step - index) % self._capacity for index in self._past_indices]
|
||||
indices = [(self._step - index) % self._capacity for index in self._past_indices]
|
||||
observ = self._buffer[indices]
|
||||
if self._flatten:
|
||||
observ = np.reshape(observ, (-1,) + observ.shape[2:])
|
||||
@@ -192,14 +190,14 @@ class RangeNormalize(object):
|
||||
|
||||
def __init__(self, env, observ=None, action=None):
|
||||
self._env = env
|
||||
self._should_normalize_observ = (
|
||||
observ is not False and self._is_finite(self._env.observation_space))
|
||||
self._should_normalize_observ = (observ is not False and
|
||||
self._is_finite(self._env.observation_space))
|
||||
if observ is True and not self._should_normalize_observ:
|
||||
raise ValueError('Cannot normalize infinite observation range.')
|
||||
if observ is None and not self._should_normalize_observ:
|
||||
tf.logging.info('Not normalizing infinite observation range.')
|
||||
self._should_normalize_action = (
|
||||
action is not False and self._is_finite(self._env.action_space))
|
||||
self._should_normalize_action = (action is not False and
|
||||
self._is_finite(self._env.action_space))
|
||||
if action is True and not self._should_normalize_action:
|
||||
raise ValueError('Cannot normalize infinite action range.')
|
||||
if action is None and not self._should_normalize_action:
|
||||
@@ -327,8 +325,7 @@ class ExternalProcess(object):
|
||||
action_space: The cached action space of the environment.
|
||||
"""
|
||||
self._conn, conn = multiprocessing.Pipe()
|
||||
self._process = multiprocessing.Process(
|
||||
target=self._worker, args=(constructor, conn))
|
||||
self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
|
||||
atexit.register(self.close)
|
||||
self._process.start()
|
||||
self._observ_space = None
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for environment wrappers."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -28,18 +27,20 @@ from agents import tools
|
||||
class ExternalProcessTest(tf.test.TestCase):
|
||||
|
||||
def test_close_no_hang_after_init(self):
|
||||
constructor = functools.partial(
|
||||
tools.MockEnvironment,
|
||||
observ_shape=(2, 3), action_shape=(2,),
|
||||
min_duration=2, max_duration=2)
|
||||
constructor = functools.partial(tools.MockEnvironment,
|
||||
observ_shape=(2, 3),
|
||||
action_shape=(2,),
|
||||
min_duration=2,
|
||||
max_duration=2)
|
||||
env = tools.wrappers.ExternalProcess(constructor)
|
||||
env.close()
|
||||
|
||||
def test_close_no_hang_after_step(self):
|
||||
constructor = functools.partial(
|
||||
tools.MockEnvironment,
|
||||
observ_shape=(2, 3), action_shape=(2,),
|
||||
min_duration=5, max_duration=5)
|
||||
constructor = functools.partial(tools.MockEnvironment,
|
||||
observ_shape=(2, 3),
|
||||
action_shape=(2,),
|
||||
min_duration=5,
|
||||
max_duration=5)
|
||||
env = tools.wrappers.ExternalProcess(constructor)
|
||||
env.reset()
|
||||
env.step(env.action_space.sample())
|
||||
@@ -53,8 +54,7 @@ class ExternalProcessTest(tf.test.TestCase):
|
||||
env.step(env.action_space.sample())
|
||||
|
||||
def test_reraise_exception_in_step(self):
|
||||
constructor = functools.partial(
|
||||
MockEnvironmentCrashInStep, crash_at_step=3)
|
||||
constructor = functools.partial(MockEnvironmentCrashInStep, crash_at_step=3)
|
||||
env = tools.wrappers.ExternalProcess(constructor)
|
||||
env.reset()
|
||||
env.step(env.action_space.sample())
|
||||
@@ -74,9 +74,10 @@ class MockEnvironmentCrashInStep(tools.MockEnvironment):
|
||||
"""Raise an error after specified number of steps in an episode."""
|
||||
|
||||
def __init__(self, crash_at_step):
|
||||
super(MockEnvironmentCrashInStep, self).__init__(
|
||||
observ_shape=(2, 3), action_shape=(2,),
|
||||
min_duration=crash_at_step + 1, max_duration=crash_at_step + 1)
|
||||
super(MockEnvironmentCrashInStep, self).__init__(observ_shape=(2, 3),
|
||||
action_shape=(2,),
|
||||
min_duration=crash_at_step + 1,
|
||||
max_duration=crash_at_step + 1)
|
||||
self._crash_at_step = crash_at_step
|
||||
|
||||
def step(self, *args, **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user