add a temp copy of TF agents (until the API stops changing or configs.py are included)
This commit is contained in:
168
examples/pybullet/gym/pybullet_envs/agents/ppo/normalize.py
Normal file
168
examples/pybullet/gym/pybullet_envs/agents/ppo/normalize.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# Copyright 2017 The TensorFlow Agents Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Normalize tensors based on streaming estimates of mean and variance."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class StreamingNormalize(object):
|
||||
"""Normalize tensors based on streaming estimates of mean and variance."""
|
||||
|
||||
def __init__(
|
||||
self, template, center=True, scale=True, clip=10, name='normalize'):
|
||||
"""Normalize tensors based on streaming estimates of mean and variance.
|
||||
|
||||
Centering the value, scaling it by the standard deviation, and clipping
|
||||
outlier values are optional.
|
||||
|
||||
Args:
|
||||
template: Example tensor providing shape and dtype of the vaule to track.
|
||||
center: Python boolean indicating whether to subtract mean from values.
|
||||
scale: Python boolean indicating whether to scale values by stddev.
|
||||
clip: If and when to clip normalized values.
|
||||
name: Parent scope of operations provided by this class.
|
||||
"""
|
||||
self._center = center
|
||||
self._scale = scale
|
||||
self._clip = clip
|
||||
self._name = name
|
||||
with tf.name_scope(name):
|
||||
self._count = tf.Variable(0, False)
|
||||
self._mean = tf.Variable(tf.zeros_like(template), False)
|
||||
self._var_sum = tf.Variable(tf.zeros_like(template), False)
|
||||
|
||||
def transform(self, value):
|
||||
"""Normalize a single or batch tensor.
|
||||
|
||||
Applies the activated transformations in the constructor using current
|
||||
estimates of mean and variance.
|
||||
|
||||
Args:
|
||||
value: Batch or single value tensor.
|
||||
|
||||
Returns:
|
||||
Normalized batch or single value tensor.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/transform'):
|
||||
no_batch_dim = value.shape.ndims == self._mean.shape.ndims
|
||||
if no_batch_dim:
|
||||
# Add a batch dimension if necessary.
|
||||
value = value[None, ...]
|
||||
if self._center:
|
||||
value -= self._mean[None, ...]
|
||||
if self._scale:
|
||||
# We cannot scale before seeing at least two samples.
|
||||
value /= tf.cond(
|
||||
self._count > 1, lambda: self._std() + 1e-8,
|
||||
lambda: tf.ones_like(self._var_sum))[None]
|
||||
if self._clip:
|
||||
value = tf.clip_by_value(value, -self._clip, self._clip)
|
||||
# Remove batch dimension if necessary.
|
||||
if no_batch_dim:
|
||||
value = value[0]
|
||||
return tf.check_numerics(value, 'value')
|
||||
|
||||
def update(self, value):
|
||||
"""Update the mean and variance estimates.
|
||||
|
||||
Args:
|
||||
value: Batch or single value tensor.
|
||||
|
||||
Returns:
|
||||
Summary tensor.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/update'):
|
||||
if value.shape.ndims == self._mean.shape.ndims:
|
||||
# Add a batch dimension if necessary.
|
||||
value = value[None, ...]
|
||||
count = tf.shape(value)[0]
|
||||
with tf.control_dependencies([self._count.assign_add(count)]):
|
||||
step = tf.cast(self._count, tf.float32)
|
||||
mean_delta = tf.reduce_sum(value - self._mean[None, ...], 0)
|
||||
new_mean = self._mean + mean_delta / step
|
||||
new_mean = tf.cond(self._count > 1, lambda: new_mean, lambda: value[0])
|
||||
var_delta = (
|
||||
value - self._mean[None, ...]) * (value - new_mean[None, ...])
|
||||
new_var_sum = self._var_sum + tf.reduce_sum(var_delta, 0)
|
||||
with tf.control_dependencies([new_mean, new_var_sum]):
|
||||
update = self._mean.assign(new_mean), self._var_sum.assign(new_var_sum)
|
||||
with tf.control_dependencies(update):
|
||||
if value.shape.ndims == 1:
|
||||
value = tf.reduce_mean(value)
|
||||
return self._summary('value', tf.reduce_mean(value))
|
||||
|
||||
def reset(self):
|
||||
"""Reset the estimates of mean and variance.
|
||||
|
||||
Resets the full state of this class.
|
||||
|
||||
Returns:
|
||||
Operation.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/reset'):
|
||||
return tf.group(
|
||||
self._count.assign(0),
|
||||
self._mean.assign(tf.zeros_like(self._mean)),
|
||||
self._var_sum.assign(tf.zeros_like(self._var_sum)))
|
||||
|
||||
def summary(self):
|
||||
"""Summary string of mean and standard deviation.
|
||||
|
||||
Returns:
|
||||
Summary tensor.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/summary'):
|
||||
mean_summary = tf.cond(
|
||||
self._count > 0, lambda: self._summary('mean', self._mean), str)
|
||||
std_summary = tf.cond(
|
||||
self._count > 1, lambda: self._summary('stddev', self._std()), str)
|
||||
return tf.summary.merge([mean_summary, std_summary])
|
||||
|
||||
def _std(self):
|
||||
"""Computes the current estimate of the standard deviation.
|
||||
|
||||
Note that the standard deviation is not defined until at least two samples
|
||||
were seen.
|
||||
|
||||
Returns:
|
||||
Tensor of current variance.
|
||||
"""
|
||||
variance = tf.cond(
|
||||
self._count > 1,
|
||||
lambda: self._var_sum / tf.cast(self._count - 1, tf.float32),
|
||||
lambda: tf.ones_like(self._var_sum) * float('nan'))
|
||||
# The epsilon corrects for small negative variance values caused by
|
||||
# the algorithm. It was empirically chosen to work with all environments
|
||||
# tested.
|
||||
return tf.sqrt(variance + 1e-4)
|
||||
|
||||
def _summary(self, name, tensor):
|
||||
"""Create a scalar or histogram summary matching the rank of the tensor.
|
||||
|
||||
Args:
|
||||
name: Name for the summary.
|
||||
tensor: Tensor to summarize.
|
||||
|
||||
Returns:
|
||||
Summary tensor.
|
||||
"""
|
||||
if tensor.shape.ndims == 0:
|
||||
return tf.summary.scalar(name, tensor)
|
||||
else:
|
||||
return tf.summary.histogram(name, tensor)
|
||||
Reference in New Issue
Block a user