add yapf style and apply yapf to format all Python files

This recreates pull request #2192
This commit is contained in:
Erwin Coumans
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Network definitions for the PPO algorithm."""
from __future__ import absolute_import
@@ -24,13 +23,10 @@ import operator
import tensorflow as tf
NetworkOutput = collections.namedtuple(
'NetworkOutput', 'policy, mean, logstd, value, state')
NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')
def feed_forward_gaussian(
config, action_size, observations, unused_length, state=None):
def feed_forward_gaussian(config, action_size, observations, unused_length, state=None):
"""Independent feed forward networks for policy and value.
The policy network outputs the mean action and the log standard deviation
@@ -50,20 +46,22 @@ def feed_forward_gaussian(
factor=config.init_mean_factor)
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
flat_observations = tf.reshape(observations, [
tf.shape(observations)[0], tf.shape(observations)[1],
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
tf.shape(observations)[0],
tf.shape(observations)[1],
functools.reduce(operator.mul,
observations.shape.as_list()[2:], 1)
])
with tf.variable_scope('policy'):
x = flat_observations
for size in config.policy_layers:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
mean = tf.contrib.layers.fully_connected(
x, action_size, tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(
logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
mean = tf.contrib.layers.fully_connected(x,
action_size,
tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
with tf.variable_scope('value'):
x = flat_observations
for size in config.value_layers:
@@ -72,13 +70,11 @@ def feed_forward_gaussian(
mean = tf.check_numerics(mean, 'mean')
logstd = tf.check_numerics(logstd, 'logstd')
value = tf.check_numerics(value, 'value')
policy = tf.contrib.distributions.MultivariateNormalDiag(
mean, tf.exp(logstd))
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
return NetworkOutput(policy, mean, logstd, value, state)
def recurrent_gaussian(
config, action_size, observations, length, state=None):
def recurrent_gaussian(config, action_size, observations, length, state=None):
"""Independent recurrent policy and feed forward value networks.
The policy network outputs the mean action and the log standard deviation
@@ -100,21 +96,23 @@ def recurrent_gaussian(
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
cell = tf.contrib.rnn.GRUBlockCell(config.policy_layers[-1])
flat_observations = tf.reshape(observations, [
tf.shape(observations)[0], tf.shape(observations)[1],
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
tf.shape(observations)[0],
tf.shape(observations)[1],
functools.reduce(operator.mul,
observations.shape.as_list()[2:], 1)
])
with tf.variable_scope('policy'):
x = flat_observations
for size in config.policy_layers[:-1]:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
x, state = tf.nn.dynamic_rnn(cell, x, length, state, tf.float32)
mean = tf.contrib.layers.fully_connected(
x, action_size, tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(
logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
mean = tf.contrib.layers.fully_connected(x,
action_size,
tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
with tf.variable_scope('value'):
x = flat_observations
for size in config.value_layers:
@@ -123,7 +121,6 @@ def recurrent_gaussian(
mean = tf.check_numerics(mean, 'mean')
logstd = tf.check_numerics(logstd, 'logstd')
value = tf.check_numerics(value, 'value')
policy = tf.contrib.distributions.MultivariateNormalDiag(
mean, tf.exp(logstd))
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
# assert state.shape.as_list()[0] is not None
return NetworkOutput(policy, mean, logstd, value, state)