add yapf style and apply yapf to format all Python files
This recreates pull request #2192
This commit is contained in:
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Network definitions for the PPO algorithm."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -24,13 +23,10 @@ import operator
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
NetworkOutput = collections.namedtuple(
|
||||
'NetworkOutput', 'policy, mean, logstd, value, state')
|
||||
NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')
|
||||
|
||||
|
||||
def feed_forward_gaussian(
|
||||
config, action_size, observations, unused_length, state=None):
|
||||
def feed_forward_gaussian(config, action_size, observations, unused_length, state=None):
|
||||
"""Independent feed forward networks for policy and value.
|
||||
|
||||
The policy network outputs the mean action and the log standard deviation
|
||||
@@ -50,20 +46,22 @@ def feed_forward_gaussian(
|
||||
factor=config.init_mean_factor)
|
||||
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
|
||||
flat_observations = tf.reshape(observations, [
|
||||
tf.shape(observations)[0], tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
|
||||
tf.shape(observations)[0],
|
||||
tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul,
|
||||
observations.shape.as_list()[2:], 1)
|
||||
])
|
||||
with tf.variable_scope('policy'):
|
||||
x = flat_observations
|
||||
for size in config.policy_layers:
|
||||
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
|
||||
mean = tf.contrib.layers.fully_connected(
|
||||
x, action_size, tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable(
|
||||
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(
|
||||
logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
mean = tf.contrib.layers.fully_connected(x,
|
||||
action_size,
|
||||
tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
with tf.variable_scope('value'):
|
||||
x = flat_observations
|
||||
for size in config.value_layers:
|
||||
@@ -72,13 +70,11 @@ def feed_forward_gaussian(
|
||||
mean = tf.check_numerics(mean, 'mean')
|
||||
logstd = tf.check_numerics(logstd, 'logstd')
|
||||
value = tf.check_numerics(value, 'value')
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(
|
||||
mean, tf.exp(logstd))
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
|
||||
return NetworkOutput(policy, mean, logstd, value, state)
|
||||
|
||||
|
||||
def recurrent_gaussian(
|
||||
config, action_size, observations, length, state=None):
|
||||
def recurrent_gaussian(config, action_size, observations, length, state=None):
|
||||
"""Independent recurrent policy and feed forward value networks.
|
||||
|
||||
The policy network outputs the mean action and the log standard deviation
|
||||
@@ -100,21 +96,23 @@ def recurrent_gaussian(
|
||||
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
|
||||
cell = tf.contrib.rnn.GRUBlockCell(config.policy_layers[-1])
|
||||
flat_observations = tf.reshape(observations, [
|
||||
tf.shape(observations)[0], tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
|
||||
tf.shape(observations)[0],
|
||||
tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul,
|
||||
observations.shape.as_list()[2:], 1)
|
||||
])
|
||||
with tf.variable_scope('policy'):
|
||||
x = flat_observations
|
||||
for size in config.policy_layers[:-1]:
|
||||
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
|
||||
x, state = tf.nn.dynamic_rnn(cell, x, length, state, tf.float32)
|
||||
mean = tf.contrib.layers.fully_connected(
|
||||
x, action_size, tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable(
|
||||
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(
|
||||
logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
mean = tf.contrib.layers.fully_connected(x,
|
||||
action_size,
|
||||
tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
with tf.variable_scope('value'):
|
||||
x = flat_observations
|
||||
for size in config.value_layers:
|
||||
@@ -123,7 +121,6 @@ def recurrent_gaussian(
|
||||
mean = tf.check_numerics(mean, 'mean')
|
||||
logstd = tf.check_numerics(logstd, 'logstd')
|
||||
value = tf.check_numerics(value, 'value')
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(
|
||||
mean, tf.exp(logstd))
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
|
||||
# assert state.shape.as_list()[0] is not None
|
||||
return NetworkOutput(policy, mean, logstd, value, state)
|
||||
|
||||
Reference in New Issue
Block a user