add yapf style and apply yapf to format all Python files

This recreates pull request #2192
This commit is contained in:
Erwin Coumans
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions

View File

@@ -1,4 +1,3 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -22,9 +21,7 @@ def main(unused_argv):
# fix random seed for reproducibility
numpy.random.seed(7)
# load pima indians dataset
dataset = numpy.loadtxt(
FLAGS.input_filename,
delimiter=",")
dataset = numpy.loadtxt(FLAGS.input_filename, delimiter=",")
# split into input (X) and output (Y) variables
x = dataset[:, 0:3]
y = dataset[:, 3]
@@ -38,28 +35,27 @@ def main(unused_argv):
model.add(Dense(1, activation="linear"))
# Compile model (use adam or sgd)
model.compile(
loss="mean_squared_error",
optimizer="adam",
metrics=["mean_squared_error"])
model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"])
# checkpoint
filepath = "/tmp/keras/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(
filepath, monitor="val_loss", verbose=1, save_best_only=True, mode="min")
checkpoint = ModelCheckpoint(filepath,
monitor="val_loss",
verbose=1,
save_best_only=True,
mode="min")
callbacks_list = [checkpoint]
# Fit the model
# model.fit(X, Y, epochs=150, batch_size=10)
# model.fit(X, Y, epochs=150, batch_size=10, callbacks=callbacks_list)
model.fit(
x,
y,
validation_split=0.34,
epochs=4500,
batch_size=1024,
callbacks=callbacks_list,
verbose=0)
model.fit(x,
y,
validation_split=0.34,
epochs=4500,
batch_size=1024,
callbacks=callbacks_list,
verbose=0)
# evaluate the model
scores = model.evaluate(x, y)

View File

@@ -75,8 +75,8 @@ reframed = series_to_supervised(scaled, lag_steps, 1)
print("reframed before drop=", reframed)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[3,7,11,15,19]], axis=1, inplace=True)
print("after drop=",reframed.head())
reframed.drop(reframed.columns[[3, 7, 11, 15, 19]], axis=1, inplace=True)
print("after drop=", reframed.head())
#dummy = scaler.inverse_transform(reframed)
#print(dummy)
@@ -104,17 +104,17 @@ test = values[n_train_hours:, :]
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
print("train_X.shape[1]=",train_X.shape[1])
print("train_X.shape[1]=", train_X.shape[1])
# design network
useLSTM=True
useLSTM = True
if useLSTM:
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], lag_steps+1, int(train_X.shape[1]/(lag_steps+1))))
test_X = test_X.reshape((test_X.shape[0], lag_steps+1, int(test_X.shape[1]/(lag_steps+1))))
train_X = train_X.reshape(
(train_X.shape[0], lag_steps + 1, int(train_X.shape[1] / (lag_steps + 1))))
test_X = test_X.reshape((test_X.shape[0], lag_steps + 1, int(test_X.shape[1] / (lag_steps + 1))))
model = Sequential()
model.add(LSTM(40, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(LSTM(40, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.05))
model.add(Dense(8, activation='sigmoid'))
model.add(Dense(8, activation='sigmoid'))
@@ -128,39 +128,37 @@ else:
model.add(Dense(1, activation="linear"))
#model.compile(loss='mae', optimizer='adam')
model.compile(
loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])
# checkpoint
filepath = '/tmp/keras/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint = ModelCheckpoint(
filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
checkpoint = ModelCheckpoint(filepath,
monitor='val_loss',
verbose=1,
save_best_only=True,
mode='min')
callbacks_list = [checkpoint]
# fit network
history = model.fit(
train_X,
train_y,
epochs=1500,
batch_size=32,
callbacks=callbacks_list,
validation_data=(test_X, test_y),
verbose=2,
shuffle=False)
history = model.fit(train_X,
train_y,
epochs=1500,
batch_size=32,
callbacks=callbacks_list,
validation_data=(test_X, test_y),
verbose=2,
shuffle=False)
# plot history
data = np.array([[[1.513535008329887299,3.234624992847829894e-01,1.731481043119239782,1.741165415165205399,
1.534267104753672228e+00,1.071354965017878635e+00,1.712386127673626302e+00]]])
data = np.array([[[
1.513535008329887299, 3.234624992847829894e-01, 1.731481043119239782, 1.741165415165205399,
1.534267104753672228e+00, 1.071354965017878635e+00, 1.712386127673626302e+00
]]])
#prediction = model.predict(data)
#print("prediction=",prediction)
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

View File

@@ -15,9 +15,8 @@ FLAGS = tf.app.flags.FLAGS
flags.DEFINE_float("motor_kp", 1.0, "The position gain of the motor.")
flags.DEFINE_float("motor_kd", 0.015, "The speed gain of the motor.")
flags.DEFINE_float(
"control_latency", 0.006, "The latency between sensor measurement and action"
" execution the robot.")
flags.DEFINE_float("control_latency", 0.006, "The latency between sensor measurement and action"
" execution the robot.")
flags.DEFINE_string("log_path", ".", "The directory to write the log file.")
@@ -31,37 +30,34 @@ def speed(t):
def main(argv):
del argv
env = minitaur_gym_env.MinitaurGymEnv(
urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION,
control_time_step=0.006,
action_repeat=6,
pd_latency=0.0,
control_latency=FLAGS.control_latency,
motor_kp=FLAGS.motor_kp,
motor_kd=FLAGS.motor_kd,
remove_default_joint_damping=True,
leg_model_enabled=False,
render=True,
on_rack=False,
accurate_motor_model_enabled=True,
log_path=FLAGS.log_path)
env.reset()
del argv
env = minitaur_gym_env.MinitaurGymEnv(urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION,
control_time_step=0.006,
action_repeat=6,
pd_latency=0.0,
control_latency=FLAGS.control_latency,
motor_kp=FLAGS.motor_kp,
motor_kd=FLAGS.motor_kd,
remove_default_joint_damping=True,
leg_model_enabled=False,
render=True,
on_rack=False,
accurate_motor_model_enabled=True,
log_path=FLAGS.log_path)
env.reset()
controller = minitaur_raibert_controller.MinitaurRaibertTrottingController(
env.minitaur)
controller = minitaur_raibert_controller.MinitaurRaibertTrottingController(env.minitaur)
tstart = env.minitaur.GetTimeSinceReset()
for _ in range(1000):
t = env.minitaur.GetTimeSinceReset() - tstart
controller.behavior_parameters = (
minitaur_raibert_controller.BehaviorParameters(
desired_forward_speed=speed(t)))
controller.update(t)
env.step(controller.get_action())
tstart = env.minitaur.GetTimeSinceReset()
for _ in range(1000):
t = env.minitaur.GetTimeSinceReset() - tstart
controller.behavior_parameters = (minitaur_raibert_controller.BehaviorParameters(
desired_forward_speed=speed(t)))
controller.update(t)
env.step(controller.get_action())
#env.close()
#env.close()
if __name__ == "__main__":
tf.app.run(main)

View File

@@ -35,12 +35,11 @@ def main(argv):
#print("motorState.velocity=",motorState.velocity)
#print("motorState.action=",motorState.action)
#print("motorState.torque=",motorState.torque)
recs.append([motorState.angle,motorState.velocity,motorState.action,motorState.torque])
recs.append([motorState.angle, motorState.velocity, motorState.action, motorState.torque])
a = numpy.array(recs)
numpy.savetxt(FLAGS.csv_file, a, delimiter=",")
if __name__ == "__main__":
tf.app.run(main)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Proximal Policy Optimization algorithm."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Proximal Policy Optimization algorithm.
Based on John Schulman's implementation in Python and Theano:
@@ -30,9 +29,7 @@ from . import memory
from . import normalize
from . import utility
_NetworkOutput = collections.namedtuple(
'NetworkOutput', 'policy, mean, logstd, value, state')
_NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')
class PPOAlgorithm(object):
@@ -53,44 +50,46 @@ class PPOAlgorithm(object):
self._is_training = is_training
self._should_log = should_log
self._config = config
self._observ_filter = normalize.StreamingNormalize(
self._batch_env.observ[0], center=True, scale=True, clip=5,
name='normalize_observ')
self._reward_filter = normalize.StreamingNormalize(
self._batch_env.reward[0], center=False, scale=True, clip=10,
name='normalize_reward')
self._observ_filter = normalize.StreamingNormalize(self._batch_env.observ[0],
center=True,
scale=True,
clip=5,
name='normalize_observ')
self._reward_filter = normalize.StreamingNormalize(self._batch_env.reward[0],
center=False,
scale=True,
clip=10,
name='normalize_reward')
# Memory stores tuple of observ, action, mean, logstd, reward.
template = (
self._batch_env.observ[0], self._batch_env.action[0],
self._batch_env.action[0], self._batch_env.action[0],
self._batch_env.reward[0])
self._memory = memory.EpisodeMemory(
template, config.update_every, config.max_length, 'memory')
template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0],
self._batch_env.action[0], self._batch_env.reward[0])
self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory')
self._memory_index = tf.Variable(0, False)
use_gpu = self._config.use_gpu and utility.available_gpus()
with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
# Create network variables for later calls to reuse.
self._network(
tf.zeros_like(self._batch_env.observ)[:, None],
tf.ones(len(self._batch_env)), reuse=None)
self._network(tf.zeros_like(self._batch_env.observ)[:, None],
tf.ones(len(self._batch_env)),
reuse=None)
cell = self._config.network(self._batch_env.action.shape[1].value)
with tf.variable_scope('ppo_temporary'):
self._episodes = memory.EpisodeMemory(
template, len(batch_env), config.max_length, 'episodes')
self._last_state = utility.create_nested_vars(
cell.zero_state(len(batch_env), tf.float32))
self._last_action = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_action')
self._last_mean = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_mean')
self._last_logstd = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_logstd')
self._penalty = tf.Variable(
self._config.kl_init_penalty, False, dtype=tf.float32)
self._policy_optimizer = self._config.policy_optimizer(
self._config.policy_lr, name='policy_optimizer')
self._value_optimizer = self._config.value_optimizer(
self._config.value_lr, name='value_optimizer')
self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length,
'episodes')
self._last_state = utility.create_nested_vars(cell.zero_state(len(batch_env), tf.float32))
self._last_action = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_action')
self._last_mean = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_mean')
self._last_logstd = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_logstd')
self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32)
self._policy_optimizer = self._config.policy_optimizer(self._config.policy_lr,
name='policy_optimizer')
self._value_optimizer = self._config.value_optimizer(self._config.value_lr,
name='value_optimizer')
def begin_episode(self, agent_indices):
"""Reset the recurrent states and stored episode.
@@ -118,23 +117,24 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('perform/'):
observ = self._observ_filter.transform(observ)
network = self._network(
observ[:, None], tf.ones(observ.shape[0]), self._last_state)
action = tf.cond(
self._is_training, network.policy.sample, lambda: network.mean)
network = self._network(observ[:, None], tf.ones(observ.shape[0]), self._last_state)
action = tf.cond(self._is_training, network.policy.sample, lambda: network.mean)
logprob = network.policy.log_prob(action)[:, 0]
# pylint: disable=g-long-lambda
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
tf.summary.histogram('mean', network.mean[:, 0]),
tf.summary.histogram('std', tf.exp(network.logstd[:, 0])),
tf.summary.histogram('action', action[:, 0]),
tf.summary.histogram('logprob', logprob)]), str)
summary = tf.cond(
self._should_log, lambda: tf.summary.merge([
tf.summary.histogram('mean', network.mean[:, 0]),
tf.summary.histogram('std', tf.exp(network.logstd[:, 0])),
tf.summary.histogram('action', action[:, 0]),
tf.summary.histogram('logprob', logprob)
]), str)
# Remember current policy to append to memory in the experience callback.
with tf.control_dependencies([
utility.assign_nested_vars(self._last_state, network.state),
self._last_action.assign(action[:, 0]),
self._last_mean.assign(network.mean[:, 0]),
self._last_logstd.assign(network.logstd[:, 0])]):
self._last_logstd.assign(network.logstd[:, 0])
]):
return tf.check_numerics(action[:, 0], 'action'), tf.identity(summary)
def experience(self, observ, action, reward, unused_done, unused_nextob):
@@ -155,15 +155,14 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('experience/'):
return tf.cond(
self._is_training,
lambda: self._define_experience(observ, action, reward), str)
return tf.cond(self._is_training, lambda: self._define_experience(observ, action, reward),
str)
def _define_experience(self, observ, action, reward):
"""Implement the branch of experience() entered during training."""
update_filters = tf.summary.merge([
self._observ_filter.update(observ),
self._reward_filter.update(reward)])
update_filters = tf.summary.merge(
[self._observ_filter.update(observ),
self._reward_filter.update(reward)])
with tf.control_dependencies([update_filters]):
if self._config.train_on_agent_action:
# NOTE: Doesn't seem to change much.
@@ -174,14 +173,16 @@ class PPOAlgorithm(object):
norm_observ = self._observ_filter.transform(observ)
norm_reward = tf.reduce_mean(self._reward_filter.transform(reward))
# pylint: disable=g-long-lambda
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
update_filters,
self._observ_filter.summary(),
self._reward_filter.summary(),
tf.summary.scalar('memory_size', self._memory_index),
tf.summary.histogram('normalized_observ', norm_observ),
tf.summary.histogram('action', self._last_action),
tf.summary.scalar('normalized_reward', norm_reward)]), str)
summary = tf.cond(
self._should_log, lambda: tf.summary.merge([
update_filters,
self._observ_filter.summary(),
self._reward_filter.summary(),
tf.summary.scalar('memory_size', self._memory_index),
tf.summary.histogram('normalized_observ', norm_observ),
tf.summary.histogram('action', self._last_action),
tf.summary.scalar('normalized_reward', norm_reward)
]), str)
return summary
def end_episode(self, agent_indices):
@@ -199,20 +200,16 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('end_episode/'):
return tf.cond(
self._is_training,
lambda: self._define_end_episode(agent_indices), str)
return tf.cond(self._is_training, lambda: self._define_end_episode(agent_indices), str)
def _define_end_episode(self, agent_indices):
"""Implement the branch of end_episode() entered during training."""
episodes, length = self._episodes.data(agent_indices)
space_left = self._config.update_every - self._memory_index
use_episodes = tf.range(tf.minimum(
tf.shape(agent_indices)[0], space_left))
use_episodes = tf.range(tf.minimum(tf.shape(agent_indices)[0], space_left))
episodes = [tf.gather(elem, use_episodes) for elem in episodes]
append = self._memory.replace(
episodes, tf.gather(length, use_episodes),
use_episodes + self._memory_index)
append = self._memory.replace(episodes, tf.gather(length, use_episodes),
use_episodes + self._memory_index)
with tf.control_dependencies([append]):
inc_index = self._memory_index.assign_add(tf.shape(use_episodes)[0])
with tf.control_dependencies([inc_index]):
@@ -229,8 +226,7 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('training'):
assert_full = tf.assert_equal(
self._memory_index, self._config.update_every)
assert_full = tf.assert_equal(self._memory_index, self._config.update_every)
with tf.control_dependencies([assert_full]):
data = self._memory.data()
(observ, action, old_mean, old_logstd, reward), length = data
@@ -238,21 +234,17 @@ class PPOAlgorithm(object):
length = tf.identity(length)
observ = self._observ_filter.transform(observ)
reward = self._reward_filter.transform(reward)
policy_summary = self._update_policy(
observ, action, old_mean, old_logstd, reward, length)
policy_summary = self._update_policy(observ, action, old_mean, old_logstd, reward, length)
with tf.control_dependencies([policy_summary]):
value_summary = self._update_value(observ, reward, length)
with tf.control_dependencies([value_summary]):
penalty_summary = self._adjust_penalty(
observ, old_mean, old_logstd, length)
penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length)
with tf.control_dependencies([penalty_summary]):
clear_memory = tf.group(
self._memory.clear(), self._memory_index.assign(0))
clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0))
with tf.control_dependencies([clear_memory]):
weight_summary = utility.variable_summaries(
tf.trainable_variables(), self._config.weight_summaries)
return tf.summary.merge([
policy_summary, value_summary, penalty_summary, weight_summary])
weight_summary = utility.variable_summaries(tf.trainable_variables(),
self._config.weight_summaries)
return tf.summary.merge([policy_summary, value_summary, penalty_summary, weight_summary])
def _update_value(self, observ, reward, length):
"""Perform multiple update steps of the value baseline.
@@ -269,10 +261,9 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('update_value'):
loss, summary = tf.scan(
lambda _1, _2: self._update_value_step(observ, reward, length),
tf.range(self._config.update_epochs_value),
[0., ''], parallel_iterations=1)
loss, summary = tf.scan(lambda _1, _2: self._update_value_step(observ, reward, length),
tf.range(self._config.update_epochs_value), [0., ''],
parallel_iterations=1)
print_loss = tf.Print(0, [tf.reduce_mean(loss)], 'value loss: ')
with tf.control_dependencies([loss, print_loss]):
return summary[self._config.update_epochs_value // 2]
@@ -289,15 +280,13 @@ class PPOAlgorithm(object):
Tuple of loss tensor and summary tensor.
"""
loss, summary = self._value_loss(observ, reward, length)
gradients, variables = (
zip(*self._value_optimizer.compute_gradients(loss)))
optimize = self._value_optimizer.apply_gradients(
zip(gradients, variables))
gradients, variables = (zip(*self._value_optimizer.compute_gradients(loss)))
optimize = self._value_optimizer.apply_gradients(zip(gradients, variables))
summary = tf.summary.merge([
summary,
tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
utility.gradient_summaries(
zip(gradients, variables), dict(value=r'.*'))])
utility.gradient_summaries(zip(gradients, variables), dict(value=r'.*'))
])
with tf.control_dependencies([optimize]):
return [tf.identity(loss), tf.identity(summary)]
@@ -317,18 +306,17 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('value_loss'):
value = self._network(observ, length).value
return_ = utility.discounted_return(
reward, length, self._config.discount)
return_ = utility.discounted_return(reward, length, self._config.discount)
advantage = return_ - value
value_loss = 0.5 * self._mask(advantage ** 2, length)
value_loss = 0.5 * self._mask(advantage**2, length)
summary = tf.summary.merge([
tf.summary.histogram('value_loss', value_loss),
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))])
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))
])
value_loss = tf.reduce_mean(value_loss)
return tf.check_numerics(value_loss, 'value_loss'), summary
def _update_policy(
self, observ, action, old_mean, old_logstd, reward, length):
def _update_policy(self, observ, action, old_mean, old_logstd, reward, length):
"""Perform multiple update steps of the policy.
The advantage is computed once at the beginning and shared across
@@ -347,35 +335,28 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('update_policy'):
return_ = utility.discounted_return(
reward, length, self._config.discount)
return_ = utility.discounted_return(reward, length, self._config.discount)
value = self._network(observ, length).value
if self._config.gae_lambda:
advantage = utility.lambda_return(
reward, value, length, self._config.discount,
self._config.gae_lambda)
advantage = utility.lambda_return(reward, value, length, self._config.discount,
self._config.gae_lambda)
else:
advantage = return_ - value
mean, variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True)
advantage = (advantage - mean) / (tf.sqrt(variance) + 1e-8)
advantage = tf.Print(
advantage, [tf.reduce_mean(return_), tf.reduce_mean(value)],
'return and value: ')
advantage = tf.Print(
advantage, [tf.reduce_mean(advantage)],
'normalized advantage: ')
advantage, [tf.reduce_mean(return_), tf.reduce_mean(value)], 'return and value: ')
advantage = tf.Print(advantage, [tf.reduce_mean(advantage)], 'normalized advantage: ')
# pylint: disable=g-long-lambda
loss, summary = tf.scan(
lambda _1, _2: self._update_policy_step(
observ, action, old_mean, old_logstd, advantage, length),
tf.range(self._config.update_epochs_policy),
[0., ''], parallel_iterations=1)
loss, summary = tf.scan(lambda _1, _2: self._update_policy_step(
observ, action, old_mean, old_logstd, advantage, length),
tf.range(self._config.update_epochs_policy), [0., ''],
parallel_iterations=1)
print_loss = tf.Print(0, [tf.reduce_mean(loss)], 'policy loss: ')
with tf.control_dependencies([loss, print_loss]):
return summary[self._config.update_epochs_policy // 2]
def _update_policy_step(
self, observ, action, old_mean, old_logstd, advantage, length):
def _update_policy_step(self, observ, action, old_mean, old_logstd, advantage, length):
"""Compute the current policy loss and perform a gradient update step.
Args:
@@ -390,23 +371,19 @@ class PPOAlgorithm(object):
Tuple of loss tensor and summary tensor.
"""
network = self._network(observ, length)
loss, summary = self._policy_loss(
network.mean, network.logstd, old_mean, old_logstd, action,
advantage, length)
gradients, variables = (
zip(*self._policy_optimizer.compute_gradients(loss)))
optimize = self._policy_optimizer.apply_gradients(
zip(gradients, variables))
loss, summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action,
advantage, length)
gradients, variables = (zip(*self._policy_optimizer.compute_gradients(loss)))
optimize = self._policy_optimizer.apply_gradients(zip(gradients, variables))
summary = tf.summary.merge([
summary,
tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
utility.gradient_summaries(
zip(gradients, variables), dict(policy=r'.*'))])
utility.gradient_summaries(zip(gradients, variables), dict(policy=r'.*'))
])
with tf.control_dependencies([optimize]):
return [tf.identity(loss), tf.identity(summary)]
def _policy_loss(
self, mean, logstd, old_mean, old_logstd, action, advantage, length):
def _policy_loss(self, mean, logstd, old_mean, old_logstd, action, advantage, length):
"""Compute the policy loss composed of multiple components.
1. The policy gradient loss is importance sampled from the data-collecting
@@ -430,24 +407,20 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('policy_loss'):
entropy = utility.diag_normal_entropy(mean, logstd)
kl = tf.reduce_mean(self._mask(utility.diag_normal_kl(
old_mean, old_logstd, mean, logstd), length), 1)
kl = tf.reduce_mean(
self._mask(utility.diag_normal_kl(old_mean, old_logstd, mean, logstd), length), 1)
policy_gradient = tf.exp(
utility.diag_normal_logpdf(mean, logstd, action) -
utility.diag_normal_logpdf(old_mean, old_logstd, action))
surrogate_loss = -tf.reduce_mean(self._mask(
policy_gradient * tf.stop_gradient(advantage), length), 1)
surrogate_loss = -tf.reduce_mean(
self._mask(policy_gradient * tf.stop_gradient(advantage), length), 1)
kl_penalty = self._penalty * kl
cutoff_threshold = self._config.kl_target * self._config.kl_cutoff_factor
cutoff_count = tf.reduce_sum(
tf.cast(kl > cutoff_threshold, tf.int32))
with tf.control_dependencies([tf.cond(
cutoff_count > 0,
lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
kl_cutoff = (
self._config.kl_cutoff_coef *
tf.cast(kl > cutoff_threshold, tf.float32) *
(kl - cutoff_threshold) ** 2)
cutoff_count = tf.reduce_sum(tf.cast(kl > cutoff_threshold, tf.int32))
with tf.control_dependencies(
[tf.cond(cutoff_count > 0, lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
kl_cutoff = (self._config.kl_cutoff_coef * tf.cast(kl > cutoff_threshold, tf.float32) *
(kl - cutoff_threshold)**2)
policy_loss = surrogate_loss + kl_penalty + kl_cutoff
summary = tf.summary.merge([
tf.summary.histogram('entropy', entropy),
@@ -459,7 +432,8 @@ class PPOAlgorithm(object):
tf.summary.histogram('policy_loss', policy_loss),
tf.summary.scalar('avg_surr_loss', tf.reduce_mean(surrogate_loss)),
tf.summary.scalar('avg_kl_penalty', tf.reduce_mean(kl_penalty)),
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))])
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))
])
policy_loss = tf.reduce_mean(policy_loss, 0)
return tf.check_numerics(policy_loss, 'policy_loss'), summary
@@ -481,30 +455,30 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('adjust_penalty'):
network = self._network(observ, length)
assert_change = tf.assert_equal(
tf.reduce_all(tf.equal(network.mean, old_mean)), False,
message='policy should change')
assert_change = tf.assert_equal(tf.reduce_all(tf.equal(network.mean, old_mean)),
False,
message='policy should change')
print_penalty = tf.Print(0, [self._penalty], 'current penalty: ')
with tf.control_dependencies([assert_change, print_penalty]):
kl_change = tf.reduce_mean(self._mask(utility.diag_normal_kl(
old_mean, old_logstd, network.mean, network.logstd), length))
kl_change = tf.reduce_mean(
self._mask(utility.diag_normal_kl(old_mean, old_logstd, network.mean, network.logstd),
length))
kl_change = tf.Print(kl_change, [kl_change], 'kl change: ')
maybe_increase = tf.cond(
kl_change > 1.3 * self._config.kl_target,
# pylint: disable=g-long-lambda
lambda: tf.Print(self._penalty.assign(
self._penalty * 1.5), [0], 'increase penalty '),
lambda: tf.Print(self._penalty.assign(self._penalty * 1.5), [0], 'increase penalty '),
float)
maybe_decrease = tf.cond(
kl_change < 0.7 * self._config.kl_target,
# pylint: disable=g-long-lambda
lambda: tf.Print(self._penalty.assign(
self._penalty / 1.5), [0], 'decrease penalty '),
lambda: tf.Print(self._penalty.assign(self._penalty / 1.5), [0], 'decrease penalty '),
float)
with tf.control_dependencies([maybe_increase, maybe_decrease]):
return tf.summary.merge([
tf.summary.scalar('kl_change', kl_change),
tf.summary.scalar('penalty', self._penalty)])
tf.summary.scalar('penalty', self._penalty)
])
def _mask(self, tensor, length):
"""Set padding elements of a batch of sequences to zero.
@@ -548,11 +522,14 @@ class PPOAlgorithm(object):
with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
observ = tf.check_numerics(observ, 'observ')
cell = self._config.network(self._batch_env.action.shape[1].value)
(mean, logstd, value), state = tf.nn.dynamic_rnn(
cell, observ, length, state, tf.float32, swap_memory=True)
(mean, logstd, value), state = tf.nn.dynamic_rnn(cell,
observ,
length,
state,
tf.float32,
swap_memory=True)
mean = tf.check_numerics(mean, 'mean')
logstd = tf.check_numerics(logstd, 'logstd')
value = tf.check_numerics(value, 'value')
policy = tf.contrib.distributions.MultivariateNormalDiag(
mean, tf.exp(logstd))
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
return _NetworkOutput(policy, mean, logstd, value, state)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Memory that stores episodes."""
from __future__ import absolute_import
@@ -43,10 +42,9 @@ class EpisodeMemory(object):
self._scope = scope
self._length = tf.Variable(tf.zeros(capacity, tf.int32), False)
self._buffers = [
tf.Variable(tf.zeros(
[capacity, max_length] + elem.shape.as_list(),
elem.dtype), False)
for elem in template]
tf.Variable(tf.zeros([capacity, max_length] + elem.shape.as_list(), elem.dtype), False)
for elem in template
]
def length(self, rows=None):
"""Tensor holding the current length of episodes.
@@ -72,13 +70,11 @@ class EpisodeMemory(object):
"""
rows = tf.range(self._capacity) if rows is None else rows
assert rows.shape.ndims == 1
assert_capacity = tf.assert_less(
rows, self._capacity,
message='capacity exceeded')
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
with tf.control_dependencies([assert_capacity]):
assert_max_length = tf.assert_less(
tf.gather(self._length, rows), self._max_length,
message='max length exceeded')
assert_max_length = tf.assert_less(tf.gather(self._length, rows),
self._max_length,
message='max length exceeded')
append_ops = []
with tf.control_dependencies([assert_max_length]):
for buffer_, elements in zip(self._buffers, transitions):
@@ -86,8 +82,7 @@ class EpisodeMemory(object):
indices = tf.stack([rows, timestep], 1)
append_ops.append(tf.scatter_nd_update(buffer_, indices, elements))
with tf.control_dependencies(append_ops):
episode_mask = tf.reduce_sum(tf.one_hot(
rows, self._capacity, dtype=tf.int32), 0)
episode_mask = tf.reduce_sum(tf.one_hot(rows, self._capacity, dtype=tf.int32), 0)
return self._length.assign_add(episode_mask)
def replace(self, episodes, length, rows=None):
@@ -103,11 +98,11 @@ class EpisodeMemory(object):
"""
rows = tf.range(self._capacity) if rows is None else rows
assert rows.shape.ndims == 1
assert_capacity = tf.assert_less(
rows, self._capacity, message='capacity exceeded')
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
with tf.control_dependencies([assert_capacity]):
assert_max_length = tf.assert_less_equal(
length, self._max_length, message='max length exceeded')
assert_max_length = tf.assert_less_equal(length,
self._max_length,
message='max length exceeded')
replace_ops = []
with tf.control_dependencies([assert_max_length]):
for buffer_, elements in zip(self._buffers, episodes):

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize tensors based on streaming estimates of mean and variance."""
from __future__ import absolute_import
@@ -24,8 +23,7 @@ import tensorflow as tf
class StreamingNormalize(object):
"""Normalize tensors based on streaming estimates of mean and variance."""
def __init__(
self, template, center=True, scale=True, clip=10, name='normalize'):
def __init__(self, template, center=True, scale=True, clip=10, name='normalize'):
"""Normalize tensors based on streaming estimates of mean and variance.
Centering the value, scaling it by the standard deviation, and clipping
@@ -69,8 +67,7 @@ class StreamingNormalize(object):
if self._scale:
# We cannot scale before seeing at least two samples.
value /= tf.cond(
self._count > 1, lambda: self._std() + 1e-8,
lambda: tf.ones_like(self._var_sum))[None]
self._count > 1, lambda: self._std() + 1e-8, lambda: tf.ones_like(self._var_sum))[None]
if self._clip:
value = tf.clip_by_value(value, -self._clip, self._clip)
# Remove batch dimension if necessary.
@@ -97,8 +94,7 @@ class StreamingNormalize(object):
mean_delta = tf.reduce_sum(value - self._mean[None, ...], 0)
new_mean = self._mean + mean_delta / step
new_mean = tf.cond(self._count > 1, lambda: new_mean, lambda: value[0])
var_delta = (
value - self._mean[None, ...]) * (value - new_mean[None, ...])
var_delta = (value - self._mean[None, ...]) * (value - new_mean[None, ...])
new_var_sum = self._var_sum + tf.reduce_sum(var_delta, 0)
with tf.control_dependencies([new_mean, new_var_sum]):
update = self._mean.assign(new_mean), self._var_sum.assign(new_var_sum)
@@ -116,10 +112,8 @@ class StreamingNormalize(object):
Operation.
"""
with tf.name_scope(self._name + '/reset'):
return tf.group(
self._count.assign(0),
self._mean.assign(tf.zeros_like(self._mean)),
self._var_sum.assign(tf.zeros_like(self._var_sum)))
return tf.group(self._count.assign(0), self._mean.assign(tf.zeros_like(self._mean)),
self._var_sum.assign(tf.zeros_like(self._var_sum)))
def summary(self):
"""Summary string of mean and standard deviation.
@@ -128,10 +122,8 @@ class StreamingNormalize(object):
Summary tensor.
"""
with tf.name_scope(self._name + '/summary'):
mean_summary = tf.cond(
self._count > 0, lambda: self._summary('mean', self._mean), str)
std_summary = tf.cond(
self._count > 1, lambda: self._summary('stddev', self._std()), str)
mean_summary = tf.cond(self._count > 0, lambda: self._summary('mean', self._mean), str)
std_summary = tf.cond(self._count > 1, lambda: self._summary('stddev', self._std()), str)
return tf.summary.merge([mean_summary, std_summary])
def _std(self):
@@ -143,10 +135,8 @@ class StreamingNormalize(object):
Returns:
Tensor of current variance.
"""
variance = tf.cond(
self._count > 1,
lambda: self._var_sum / tf.cast(self._count - 1, tf.float32),
lambda: tf.ones_like(self._var_sum) * float('nan'))
variance = tf.cond(self._count > 1, lambda: self._var_sum / tf.cast(
self._count - 1, tf.float32), lambda: tf.ones_like(self._var_sum) * float('nan'))
# The epsilon corrects for small negative variance values caused by
# the algorithm. It was empirically chosen to work with all environments
# tested.

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for the PPO algorithm."""
from __future__ import absolute_import
@@ -51,8 +50,7 @@ def reinit_nested_vars(variables, indices=None):
Operation.
"""
if isinstance(variables, (tuple, list)):
return tf.group(*[
reinit_nested_vars(variable, indices) for variable in variables])
return tf.group(*[reinit_nested_vars(variable, indices) for variable in variables])
if indices is None:
return variables.assign(tf.zeros_like(variables))
else:
@@ -71,9 +69,8 @@ def assign_nested_vars(variables, tensors):
Operation.
"""
if isinstance(variables, (tuple, list)):
return tf.group(*[
assign_nested_vars(variable, tensor)
for variable, tensor in zip(variables, tensors)])
return tf.group(
*[assign_nested_vars(variable, tensor) for variable, tensor in zip(variables, tensors)])
return variables.assign(tensors)
@@ -81,10 +78,11 @@ def discounted_return(reward, length, discount):
"""Discounted Monte-Carlo returns."""
timestep = tf.range(reward.shape[1].value)
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
return_ = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
return_ = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(return_), 'return')
@@ -95,9 +93,8 @@ def fixed_step_return(reward, value, length, discount, window):
return_ = tf.zeros_like(reward)
for _ in range(window):
return_ += reward
reward = discount * tf.concat(
[reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
return_ += discount ** window * tf.concat(
reward = discount * tf.concat([reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
return_ += discount**window * tf.concat(
[value[:, window:], tf.zeros_like(value[:, -window:]), 1])
return tf.check_numerics(tf.stop_gradient(mask * return_), 'return')
@@ -109,10 +106,11 @@ def lambda_return(reward, value, length, discount, lambda_):
sequence = mask * reward + discount * value * (1 - lambda_)
discount = mask * discount * lambda_
sequence = tf.stack([sequence, discount], 2)
return_ = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur[0] + cur[1] * agg,
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]),
tf.zeros_like(value[:, -1]), 1, False), [1, 0]), [1])
return_ = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur[0] + cur[1] * agg,
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]), tf.zeros_like(value[:, -1]),
1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(return_), 'return')
@@ -122,27 +120,26 @@ def lambda_advantage(reward, value, length, discount):
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
delta = reward + discount * next_value - value
advantage = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]),
tf.zeros_like(delta[:, -1]), 1, False), [1, 0]), [1])
advantage = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]), tf.zeros_like(delta[:, -1]),
1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')
def diag_normal_kl(mean0, logstd0, mean1, logstd1):
"""Epirical KL divergence of two normals with diagonal covariance."""
logstd0_2, logstd1_2 = 2 * logstd0, 2 * logstd1
return 0.5 * (
tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) +
tf.reduce_sum((mean1 - mean0) ** 2 / tf.exp(logstd1_2), -1) +
tf.reduce_sum(logstd1_2, -1) - tf.reduce_sum(logstd0_2, -1) -
mean0.shape[-1].value)
return 0.5 * (tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) + tf.reduce_sum(
(mean1 - mean0)**2 / tf.exp(logstd1_2), -1) + tf.reduce_sum(logstd1_2, -1) -
tf.reduce_sum(logstd0_2, -1) - mean0.shape[-1].value)
def diag_normal_logpdf(mean, logstd, loc):
"""Log density of a normal with diagonal covariance."""
constant = -0.5 * (math.log(2 * math.pi) + logstd)
value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2
value = -0.5 * ((loc - mean) / tf.exp(logstd))**2
return tf.reduce_sum(constant + value, -1)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Executable scripts for reinforcement learning."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example configurations using the PPO algorithm."""
from __future__ import absolute_import
@@ -33,10 +32,7 @@ def default():
use_gpu = False
# Network
network = networks.ForwardGaussianPolicy
weight_summaries = dict(
all=r'.*',
policy=r'.*/policy/.*',
value=r'.*/value/.*')
weight_summaries = dict(all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.05

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Networks for the PPO algorithm defined as recurrent cells."""
from __future__ import absolute_import
@@ -20,11 +19,10 @@ from __future__ import print_function
import tensorflow as tf
_MEAN_WEIGHTS_INITIALIZER = tf.contrib.layers.variance_scaling_initializer(
factor=0.1)
_MEAN_WEIGHTS_INITIALIZER = tf.contrib.layers.variance_scaling_initializer(factor=0.1)
_LOGSTD_INITIALIZER = tf.random_normal_initializer(-1, 1e-10)
class LinearGaussianPolicy(tf.contrib.rnn.RNNCell):
"""Indepent linear network with a tanh at the end for policy and feedforward network for the value.
@@ -56,15 +54,12 @@ class LinearGaussianPolicy(tf.contrib.rnn.RNNCell):
def __call__(self, observation, state):
with tf.variable_scope('policy'):
x = tf.contrib.layers.flatten(observation)
mean = tf.contrib.layers.fully_connected(
x,
self._action_size,
tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32,
self._logstd_initializer)
logstd = tf.tile(logstd[None, ...],
[tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
mean = tf.contrib.layers.fully_connected(x,
self._action_size,
tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
with tf.variable_scope('value'):
x = tf.contrib.layers.flatten(observation)
for size in self._value_layers:
@@ -80,10 +75,12 @@ class ForwardGaussianPolicy(tf.contrib.rnn.RNNCell):
is learned as independent parameter vector.
"""
def __init__(
self, policy_layers, value_layers, action_size,
mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
logstd_initializer=_LOGSTD_INITIALIZER):
def __init__(self,
policy_layers,
value_layers,
action_size,
mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
logstd_initializer=_LOGSTD_INITIALIZER):
self._policy_layers = policy_layers
self._value_layers = value_layers
self._action_size = action_size
@@ -104,13 +101,12 @@ class ForwardGaussianPolicy(tf.contrib.rnn.RNNCell):
x = tf.contrib.layers.flatten(observation)
for size in self._policy_layers:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
mean = tf.contrib.layers.fully_connected(
x, self._action_size, tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
logstd = tf.tile(
logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
mean = tf.contrib.layers.fully_connected(x,
self._action_size,
tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
with tf.variable_scope('value'):
x = tf.contrib.layers.flatten(observation)
for size in self._value_layers:
@@ -127,10 +123,12 @@ class RecurrentGaussianPolicy(tf.contrib.rnn.RNNCell):
and uses a GRU cell.
"""
def __init__(
self, policy_layers, value_layers, action_size,
mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
logstd_initializer=_LOGSTD_INITIALIZER):
def __init__(self,
policy_layers,
value_layers,
action_size,
mean_weights_initializer=_MEAN_WEIGHTS_INITIALIZER,
logstd_initializer=_LOGSTD_INITIALIZER):
self._policy_layers = policy_layers
self._value_layers = value_layers
self._action_size = action_size
@@ -152,13 +150,12 @@ class RecurrentGaussianPolicy(tf.contrib.rnn.RNNCell):
for size in self._policy_layers[:-1]:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
x, state = self._cell(x, state)
mean = tf.contrib.layers.fully_connected(
x, self._action_size, tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
logstd = tf.tile(
logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
mean = tf.contrib.layers.fully_connected(x,
self._action_size,
tf.tanh,
weights_initializer=self._mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer)
logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims)
with tf.variable_scope('value'):
x = tf.contrib.layers.flatten(observation)
for size in self._value_layers:

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Script to train a batch reinforcement learning algorithm.
Command line:
@@ -68,21 +67,25 @@ def _define_loop(graph, logdir, train_steps, eval_steps):
Returns:
Loop object.
"""
loop = tools.Loop(
logdir, graph.step, graph.should_log, graph.do_report,
graph.force_reset)
loop.add_phase(
'train', graph.done, graph.score, graph.summary, train_steps,
report_every=None,
log_every=train_steps // 2,
checkpoint_every=None,
feed={graph.is_training: True})
loop.add_phase(
'eval', graph.done, graph.score, graph.summary, eval_steps,
report_every=eval_steps,
log_every=eval_steps // 2,
checkpoint_every=10 * eval_steps,
feed={graph.is_training: False})
loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase('train',
graph.done,
graph.score,
graph.summary,
train_steps,
report_every=None,
log_every=train_steps // 2,
checkpoint_every=None,
feed={graph.is_training: True})
loop.add_phase('eval',
graph.done,
graph.score,
graph.summary,
eval_steps,
report_every=eval_steps,
log_every=eval_steps // 2,
checkpoint_every=10 * eval_steps,
feed={graph.is_training: False})
return loop
@@ -101,25 +104,19 @@ def train(config, env_processes):
"""
tf.reset_default_graph()
with config.unlocked:
config.network = functools.partial(
utility.define_network, config.network, config)
config.network = functools.partial(utility.define_network, config.network, config)
config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
config.value_optimizer = getattr(tf.train, config.value_optimizer)
if config.update_every % config.num_agents:
tf.logging.warn('Number of agents should divide episodes per update.')
with tf.device('/cpu:0'):
batch_env = utility.define_batch_env(
lambda: _create_environment(config),
config.num_agents, env_processes)
graph = utility.define_simulation_graph(
batch_env, config.algorithm, config)
loop = _define_loop(
graph, config.logdir,
config.update_every * config.max_length,
config.eval_episodes * config.max_length)
total_steps = int(
config.steps / config.update_every *
(config.update_every + config.eval_episodes))
batch_env = utility.define_batch_env(lambda: _create_environment(config), config.num_agents,
env_processes)
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
loop = _define_loop(graph, config.logdir, config.update_every * config.max_length,
config.eval_episodes * config.max_length)
total_steps = int(config.steps / config.update_every *
(config.update_every + config.eval_episodes))
# Exclude episode related variables since the Python state of environments is
# not checkpointed and thus new episodes start after resuming.
saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
@@ -137,8 +134,8 @@ def main(_):
utility.set_up_logging()
if not FLAGS.config:
raise KeyError('You must specify a configuration.')
logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
logdir = FLAGS.logdir and os.path.expanduser(
os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
try:
config = utility.load_config(logdir)
except IOError:
@@ -150,16 +147,11 @@ def main(_):
if __name__ == '__main__':
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'logdir', None,
'Base directory to store logs.')
tf.app.flags.DEFINE_string(
'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
'Sub directory to store logs.')
tf.app.flags.DEFINE_string(
'config', None,
'Configuration to execute.')
tf.app.flags.DEFINE_boolean(
'env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.flags.DEFINE_string('logdir', None, 'Base directory to store logs.')
tf.app.flags.DEFINE_string('timestamp',
datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
'Sub directory to store logs.')
tf.app.flags.DEFINE_string('config', None, 'Configuration to execute.')
tf.app.flags.DEFINE_boolean('env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.run()

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the PPO algorithm usage example."""
from __future__ import absolute_import
@@ -29,7 +28,6 @@ from google3.robotics.reinforcement_learning.agents.scripts import configs
from google3.robotics.reinforcement_learning.agents.scripts import networks
from google3.robotics.reinforcement_learning.agents.scripts import train
FLAGS = tf.app.flags.FLAGS
@@ -65,9 +63,11 @@ class PPOTest(tf.test.TestCase):
for network, observ_shape in itertools.product(nets, observ_shapes):
config = self._define_config()
with config.unlocked:
config.env = functools.partial(
tools.MockEnvironment, observ_shape, action_shape=(3,),
min_duration=15, max_duration=15)
config.env = functools.partial(tools.MockEnvironment,
observ_shape,
action_shape=(3,),
min_duration=15,
max_duration=15)
config.max_length = 20
config.steps = 100
config.network = network
@@ -77,9 +77,11 @@ class PPOTest(tf.test.TestCase):
def test_no_crash_variable_duration(self):
config = self._define_config()
with config.unlocked:
config.env = functools.partial(
tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3,),
min_duration=5, max_duration=25)
config.env = functools.partial(tools.MockEnvironment,
observ_shape=(2, 3),
action_shape=(3,),
min_duration=5,
max_duration=25)
config.max_length = 25
config.steps = 200
config.network = networks.RecurrentGaussianPolicy

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for using reinforcement learning algorithms."""
from __future__ import absolute_import
@@ -46,8 +45,7 @@ def define_simulation_graph(batch_env, algo_cls, config):
do_report = tf.placeholder(tf.bool, name='do_report')
force_reset = tf.placeholder(tf.bool, name='force_reset')
algo = algo_cls(batch_env, step, is_training, should_log, config)
done, score, summary = tools.simulate(
batch_env, algo, should_log, force_reset)
done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset)
message = 'Graph contains {} trainable variables.'
tf.logging.info(message.format(tools.count_weights()))
# pylint: enable=unused-variable
@@ -67,9 +65,7 @@ def define_batch_env(constructor, num_agents, env_processes):
"""
with tf.variable_scope('environments'):
if env_processes:
envs = [
tools.wrappers.ExternalProcess(constructor)
for _ in range(num_agents)]
envs = [tools.wrappers.ExternalProcess(constructor) for _ in range(num_agents)]
else:
envs = [constructor() for _ in range(num_agents)]
batch_env = tools.BatchEnv(envs, blocking=not env_processes)
@@ -108,15 +104,14 @@ def define_network(constructor, config, action_size):
Returns:
Created recurrent cell object.
"""
mean_weights_initializer = (
tf.contrib.layers.variance_scaling_initializer(
factor=config.init_mean_factor))
logstd_initializer = tf.random_normal_initializer(
config.init_logstd, 1e-10)
network = constructor(
config.policy_layers, config.value_layers, action_size,
mean_weights_initializer=mean_weights_initializer,
logstd_initializer=logstd_initializer)
mean_weights_initializer = (tf.contrib.layers.variance_scaling_initializer(
factor=config.init_mean_factor))
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
network = constructor(config.policy_layers,
config.value_layers,
action_size,
mean_weights_initializer=mean_weights_initializer,
logstd_initializer=logstd_initializer)
return network
@@ -134,9 +129,7 @@ def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
ValueError: If resume expected but no log directory specified.
RuntimeError: If no resume expected but a checkpoint was found.
"""
sess.run(tf.group(
tf.local_variables_initializer(),
tf.global_variables_initializer()))
sess.run(tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()))
if resume and not (logdir or checkpoint):
raise ValueError('Need to specify logdir to resume a checkpoint.')
if logdir:
@@ -175,9 +168,8 @@ def save_config(config, logdir=None):
with tf.gfile.GFile(config_path, 'w') as file_:
yaml.dump(config, file_, default_flow_style=False)
else:
message = (
'Start a new run without storing summaries and checkpoints since no '
'logging directory was specified.')
message = ('Start a new run without storing summaries and checkpoints since no '
'logging directory was specified.')
tf.logging.info(message)
return config
@@ -196,9 +188,8 @@ def load_config(logdir):
"""
config_path = logdir and os.path.join(logdir, 'config.yaml')
if not config_path or not tf.gfile.Exists(config_path):
message = (
'Cannot resume an existing run since the logging directory does not '
'contain a configuration file.')
message = ('Cannot resume an existing run since the logging directory does not '
'contain a configuration file.')
raise IOError(message)
with tf.gfile.FastGFile(config_path, 'r') as file_:
config = yaml.load(file_)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Script to render videos of the Proximal Policy Gradient algorithm.
Command line:
@@ -54,6 +53,8 @@ def _create_environment(config, outdir):
setattr(env, 'spec', getattr(env, 'spec', None))
if config.max_length:
env = tools.wrappers.LimitDuration(env, config.max_length)
# env = gym.wrappers.Monitor(
# env, outdir, lambda unused_episode_number: True)
env = tools.wrappers.RangeNormalize(env)
@@ -72,20 +73,20 @@ def _define_loop(graph, eval_steps):
Returns:
Loop object.
"""
loop = tools.Loop(
None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase(
'eval', graph.done, graph.score, graph.summary, eval_steps,
report_every=eval_steps,
log_every=None,
checkpoint_every=None,
feed={graph.is_training: False})
loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase('eval',
graph.done,
graph.score,
graph.summary,
eval_steps,
report_every=eval_steps,
log_every=None,
checkpoint_every=None,
feed={graph.is_training: False})
return loop
def visualize(
logdir, outdir, num_agents, num_episodes, checkpoint=None,
env_processes=True):
def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
"""Recover checkpoint and render videos from it.
Args:
@@ -98,25 +99,20 @@ def visualize(
"""
config = utility.load_config(logdir)
with config.unlocked:
config.network = functools.partial(
utility.define_network, config.network, config)
config.network = functools.partial(utility.define_network, config.network, config)
config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
config.value_optimizer = getattr(tf.train, config.value_optimizer)
with tf.device('/cpu:0'):
batch_env = utility.define_batch_env(
lambda: _create_environment(config, outdir),
num_agents, env_processes)
graph = utility.define_simulation_graph(
batch_env, config.algorithm, config)
batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
env_processes)
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
total_steps = num_episodes * config.max_length
loop = _define_loop(graph, total_steps)
saver = utility.define_saver(
exclude=(r'.*_temporary/.*', r'global_step'))
saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
utility.initialize_variables(
sess, saver, config.logdir, checkpoint, resume=True)
utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
for unused_score in loop.run(sess, saver, total_steps):
pass
batch_env.close()
@@ -129,29 +125,18 @@ def main(_):
raise KeyError('You must specify logging and outdirs directories.')
FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
visualize(
FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
FLAGS.checkpoint, FLAGS.env_processes)
visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes, FLAGS.checkpoint,
FLAGS.env_processes)
if __name__ == '__main__':
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'logdir', None,
'Directory to the checkpoint of a training run.')
tf.app.flags.DEFINE_string(
'outdir', None,
'Local directory for storing the monitoring outdir.')
tf.app.flags.DEFINE_string(
'checkpoint', None,
'Checkpoint name to load; defaults to most recent.')
tf.app.flags.DEFINE_integer(
'num_agents', 1,
'How many environments to step in parallel.')
tf.app.flags.DEFINE_integer(
'num_episodes', 5,
'Minimum number of episodes to render.')
tf.app.flags.DEFINE_boolean(
'env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.flags.DEFINE_string('logdir', None, 'Directory to the checkpoint of a training run.')
tf.app.flags.DEFINE_string('outdir', None, 'Local directory for storing the monitoring outdir.')
tf.app.flags.DEFINE_string('checkpoint', None,
'Checkpoint name to load; defaults to most recent.')
tf.app.flags.DEFINE_integer('num_agents', 1, 'How many environments to step in parallel.')
tf.app.flags.DEFINE_integer('num_episodes', 5, 'Minimum number of episodes to render.')
tf.app.flags.DEFINE_boolean('env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.run()

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for reinforcement learning."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrap a dictionary to access keys as attributes."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the attribute dictionary."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Combine multiple environments to step them in batch."""
from __future__ import absolute_import
@@ -84,13 +83,9 @@ class BatchEnv(object):
message = 'Invalid action at index {}: {}'
raise ValueError(message.format(index, action))
if self._blocking:
transitions = [
env.step(action)
for env, action in zip(self._envs, actions)]
transitions = [env.step(action) for env, action in zip(self._envs, actions)]
else:
transitions = [
env.step(action, blocking=False)
for env, action in zip(self._envs, actions)]
transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
transitions = [transition() for transition in transitions]
observs, rewards, dones, infos = zip(*transitions)
observ = np.stack(observs)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Count learnable parameters."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the weight counting utility."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Batch of environments inside the TensorFlow graph."""
from __future__ import absolute_import
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
action_shape = self._parse_shape(self._batch_env.action_space)
action_dtype = self._parse_dtype(self._batch_env.action_space)
with tf.variable_scope('env_temporary'):
self._observ = tf.Variable(
tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
name='observ', trainable=False)
self._action = tf.Variable(
tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
name='action', trainable=False)
self._reward = tf.Variable(
tf.zeros((len(self._batch_env),), tf.float32),
name='reward', trainable=False)
self._done = tf.Variable(
tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
name='done', trainable=False)
self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
name='observ',
trainable=False)
self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
name='action',
trainable=False)
self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
name='reward',
trainable=False)
self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
name='done',
trainable=False)
def __getattr__(self, name):
"""Forward unimplemented attributes to one of the original environments.
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
if action.dtype in (tf.float16, tf.float32, tf.float64):
action = tf.check_numerics(action, 'action')
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
observ, reward, done = tf.py_func(
lambda a: self._batch_env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool], name='step')
observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool],
name='step')
observ = tf.check_numerics(observ, 'observ')
reward = tf.check_numerics(reward, 'reward')
return tf.group(
self._observ.assign(observ),
self._action.assign(action),
self._reward.assign(reward),
self._done.assign(done))
return tf.group(self._observ.assign(observ), self._action.assign(action),
self._reward.assign(reward), self._done.assign(done))
def reset(self, indices=None):
"""Reset the batch of environments.
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
if indices is None:
indices = tf.range(len(self._batch_env))
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
observ = tf.py_func(
self._batch_env.reset, [indices], observ_dtype, name='reset')
observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
observ = tf.check_numerics(observ, 'observ')
reward = tf.zeros_like(indices, tf.float32)
done = tf.zeros_like(indices, tf.bool)
with tf.control_dependencies([
tf.scatter_update(self._observ, indices, observ),
tf.scatter_update(self._reward, indices, reward),
tf.scatter_update(self._done, indices, done)]):
tf.scatter_update(self._done, indices, done)
]):
return tf.identity(observ)
@property

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Put an OpenAI Gym environment into the TensorFlow graph."""
from __future__ import absolute_import
@@ -42,16 +41,15 @@ class InGraphEnv(object):
action_shape = self._parse_shape(self._env.action_space)
action_dtype = self._parse_dtype(self._env.action_space)
with tf.name_scope('environment'):
self._observ = tf.Variable(
tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
self._action = tf.Variable(
tf.zeros(action_shape, action_dtype), name='action', trainable=False)
self._reward = tf.Variable(
0.0, dtype=tf.float32, name='reward', trainable=False)
self._done = tf.Variable(
True, dtype=tf.bool, name='done', trainable=False)
self._step = tf.Variable(
0, dtype=tf.int32, name='step', trainable=False)
self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
name='observ',
trainable=False)
self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
name='action',
trainable=False)
self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)
def __getattr__(self, name):
"""Forward unimplemented attributes to the original environment.
@@ -79,17 +77,14 @@ class InGraphEnv(object):
if action.dtype in (tf.float16, tf.float32, tf.float64):
action = tf.check_numerics(action, 'action')
observ_dtype = self._parse_dtype(self._env.observation_space)
observ, reward, done = tf.py_func(
lambda a: self._env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool], name='step')
observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool],
name='step')
observ = tf.check_numerics(observ, 'observ')
reward = tf.check_numerics(reward, 'reward')
return tf.group(
self._observ.assign(observ),
self._action.assign(action),
self._reward.assign(reward),
self._done.assign(done),
self._step.assign_add(1))
return tf.group(self._observ.assign(observ), self._action.assign(action),
self._reward.assign(reward), self._done.assign(done),
self._step.assign_add(1))
def reset(self):
"""Reset the environment.
@@ -100,10 +95,10 @@ class InGraphEnv(object):
observ_dtype = self._parse_dtype(self._env.observation_space)
observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
observ = tf.check_numerics(observ, 'observ')
with tf.control_dependencies([
self._observ.assign(observ),
self._reward.assign(0),
self._done.assign(False)]):
with tf.control_dependencies(
[self._observ.assign(observ),
self._reward.assign(0),
self._done.assign(False)]):
return tf.identity(observ)
@property

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Execute operations in a loop and coordinate logging and checkpoints."""
from __future__ import absolute_import
@@ -25,10 +24,8 @@ import tensorflow as tf
from pybullet_envs.minitaur.agents.tools import streaming_mean
_Phase = collections.namedtuple(
'Phase',
'name, writer, op, batch, steps, feed, report_every, log_every,'
'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
'checkpoint_every')
@@ -56,16 +53,22 @@ class Loop(object):
reset: Tensor indicating to the model to start a new computation.
"""
self._logdir = logdir
self._step = (
tf.Variable(0, False, name='global_step') if step is None else step)
self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
self._log = tf.placeholder(tf.bool) if log is None else log
self._report = tf.placeholder(tf.bool) if report is None else report
self._reset = tf.placeholder(tf.bool) if reset is None else reset
self._phases = []
def add_phase(
self, name, done, score, summary, steps,
report_every=None, log_every=None, checkpoint_every=None, feed=None):
def add_phase(self,
name,
done,
score,
summary,
steps,
report_every=None,
log_every=None,
checkpoint_every=None,
feed=None):
"""Add a phase to the loop protocol.
If the model breaks long computation into multiple steps, the done tensor
@@ -97,13 +100,12 @@ class Loop(object):
if done.shape.ndims is None or score.shape.ndims is None:
raise ValueError("Rank of 'done' and 'score' tensors must be known.")
writer = self._logdir and tf.summary.FileWriter(
os.path.join(self._logdir, name), tf.get_default_graph(),
flush_secs=60)
os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
op = self._define_step(done, score, summary)
batch = 1 if score.shape.ndims == 0 else score.shape[0].value
self._phases.append(_Phase(
name, writer, op, batch, int(steps), feed, report_every,
log_every, checkpoint_every))
self._phases.append(
_Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
checkpoint_every))
def run(self, sess, saver, max_step=None):
"""Run the loop schedule for a specified number of steps.
@@ -133,13 +135,11 @@ class Loop(object):
tf.logging.info(message.format(phase.name, phase_step, global_step))
# Populate book keeping tensors.
phase.feed[self._reset] = (steps_in < steps_made)
phase.feed[self._log] = (
phase.writer and
self._is_every_steps(phase_step, phase.batch, phase.log_every))
phase.feed[self._report] = (
self._is_every_steps(phase_step, phase.batch, phase.report_every))
summary, mean_score, global_step, steps_made = sess.run(
phase.op, phase.feed)
phase.feed[self._log] = (phase.writer and
self._is_every_steps(phase_step, phase.batch, phase.log_every))
phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
phase.report_every))
summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
self._store_checkpoint(sess, saver, global_step)
if self._is_every_steps(phase_step, phase.batch, phase.report_every):
@@ -207,8 +207,7 @@ class Loop(object):
score_mean = streaming_mean.StreamingMean((), tf.float32)
with tf.control_dependencies([done, score, summary]):
done_score = tf.gather(score, tf.where(done)[:, 0])
submit_score = tf.cond(
tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
with tf.control_dependencies([submit_score]):
mean_score = tf.cond(self._report, score_mean.clear, float)
steps_made = tf.shape(score)[0]

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the training loop."""
from __future__ import absolute_import
@@ -28,8 +27,7 @@ class LoopTest(tf.test.TestCase):
def test_report_every_step(self):
step = tf.Variable(0, False, dtype=tf.int32, name='step')
loop = tools.Loop(None, step)
loop.add_phase(
'phase_1', done=True, score=0, summary='', steps=1, report_every=3)
loop.add_phase('phase_1', done=True, score=0, summary='', steps=1, report_every=3)
# Step: 0 1 2 3 4 5 6 7 8
# Report: x x x
with self.test_session() as sess:
@@ -45,15 +43,33 @@ class LoopTest(tf.test.TestCase):
def test_phases_feed(self):
score = tf.placeholder(tf.float32, [])
loop = tools.Loop(None)
loop.add_phase(
'phase_1', done=True, score=score, summary='', steps=1, report_every=1,
log_every=None, checkpoint_every=None, feed={score: 1})
loop.add_phase(
'phase_2', done=True, score=score, summary='', steps=3, report_every=1,
log_every=None, checkpoint_every=None, feed={score: 2})
loop.add_phase(
'phase_3', done=True, score=score, summary='', steps=2, report_every=1,
log_every=None, checkpoint_every=None, feed={score: 3})
loop.add_phase('phase_1',
done=True,
score=score,
summary='',
steps=1,
report_every=1,
log_every=None,
checkpoint_every=None,
feed={score: 1})
loop.add_phase('phase_2',
done=True,
score=score,
summary='',
steps=3,
report_every=1,
log_every=None,
checkpoint_every=None,
feed={score: 2})
loop.add_phase('phase_3',
done=True,
score=score,
summary='',
steps=2,
report_every=1,
log_every=None,
checkpoint_every=None,
feed={score: 3})
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
scores = list(loop.run(sess, saver=None, max_step=15))
@@ -61,10 +77,8 @@ class LoopTest(tf.test.TestCase):
def test_average_score_over_phases(self):
loop = tools.Loop(None)
loop.add_phase(
'phase_1', done=True, score=1, summary='', steps=1, report_every=2)
loop.add_phase(
'phase_2', done=True, score=2, summary='', steps=2, report_every=5)
loop.add_phase('phase_1', done=True, score=1, summary='', steps=1, report_every=2)
loop.add_phase('phase_2', done=True, score=2, summary='', steps=2, report_every=5)
# Score: 1 2 2 1 2 2 1 2 2 1 2 2 1 2 2 1 2
# Report 1: x x x
# Report 2: x x
@@ -78,8 +92,7 @@ class LoopTest(tf.test.TestCase):
done = tf.equal((step + 1) % 2, 0)
score = tf.cast(step, tf.float32)
loop = tools.Loop(None, step)
loop.add_phase(
'phase_1', done, score, summary='', steps=1, report_every=3)
loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=3)
# Score: 0 1 2 3 4 5 6 7 8
# Done: x x x x
# Report: x x x
@@ -91,10 +104,9 @@ class LoopTest(tf.test.TestCase):
def test_not_done_batch(self):
step = tf.Variable(0, False, dtype=tf.int32, name='step')
done = tf.equal([step % 3, step % 4], 0)
score = tf.cast([step, step ** 2], tf.float32)
score = tf.cast([step, step**2], tf.float32)
loop = tools.Loop(None, step)
loop.add_phase(
'phase_1', done, score, summary='', steps=1, report_every=8)
loop.add_phase('phase_1', done, score, summary='', steps=1, report_every=8)
# Step: 0 2 4 6
# Score 1: 0 2 4 6
# Done 1: x x

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock algorithm for testing reinforcement learning code."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock environment for testing reinforcement learning code."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""In-graph simulation step of a vecrotized algorithm with environments."""
from __future__ import absolute_import
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
reset_ops = [
batch_env.reset(agent_indices),
tf.scatter_update(score, agent_indices, zero_scores),
tf.scatter_update(length, agent_indices, zero_durations)]
tf.scatter_update(length, agent_indices, zero_durations)
]
with tf.control_dependencies(reset_ops):
return algo.begin_episode(agent_indices)
@@ -76,9 +76,8 @@ def simulate(batch_env, algo, log=True, reset=False):
add_score = score.assign_add(batch_env.reward)
inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
with tf.control_dependencies([add_score, inc_length]):
experience_summary = algo.experience(
prevob, batch_env.action, batch_env.reward, batch_env.done,
batch_env.observ)
experience_summary = algo.experience(prevob, batch_env.action, batch_env.reward,
batch_env.done, batch_env.observ)
return tf.summary.merge([step_summary, experience_summary])
def _define_end_episode(agent_indices):
@@ -94,8 +93,7 @@ def simulate(batch_env, algo, log=True, reset=False):
"""
assert agent_indices.shape.ndims == 1
submit_score = mean_score.submit(tf.gather(score, agent_indices))
submit_length = mean_length.submit(
tf.cast(tf.gather(length, agent_indices), tf.float32))
submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
with tf.control_dependencies([submit_score, submit_length]):
return algo.end_episode(agent_indices)
@@ -105,41 +103,34 @@ def simulate(batch_env, algo, log=True, reset=False):
Returns:
Summary string.
"""
score_summary = tf.cond(
tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
length_summary = tf.cond(
tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
score_summary = tf.cond(tf.logical_and(log, tf.cast(
mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
str)
length_summary = tf.cond(tf.logical_and(
log, tf.cast(mean_length.count,
tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
return tf.summary.merge([score_summary, length_summary])
with tf.name_scope('simulate'):
log = tf.convert_to_tensor(log)
reset = tf.convert_to_tensor(reset)
with tf.variable_scope('simulate_temporary'):
score = tf.Variable(
tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
length = tf.Variable(
tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
mean_score = streaming_mean.StreamingMean((), tf.float32)
mean_length = streaming_mean.StreamingMean((), tf.float32)
agent_indices = tf.cond(
reset,
lambda: tf.range(len(batch_env)),
lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
begin_episode = tf.cond(
tf.cast(tf.shape(agent_indices)[0], tf.bool),
lambda: _define_begin_episode(agent_indices), str)
agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
tf.where(batch_env.done)[:, 0], tf.int32))
begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
tf.bool), lambda: _define_begin_episode(agent_indices), str)
with tf.control_dependencies([begin_episode]):
step = _define_step()
with tf.control_dependencies([step]):
agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
end_episode = tf.cond(
tf.cast(tf.shape(agent_indices)[0], tf.bool),
lambda: _define_end_episode(agent_indices), str)
end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
tf.bool), lambda: _define_end_episode(agent_indices), str)
with tf.control_dependencies([end_episode]):
summary = tf.summary.merge([
_define_summaries(), begin_episode, step, end_episode])
summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
with tf.control_dependencies([summary]):
done, score = tf.identity(batch_env.done), tf.identity(score)
return done, score, summary

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the simulation operation."""
from __future__ import absolute_import
@@ -84,9 +83,10 @@ class SimulateTest(tf.test.TestCase):
def _create_test_batch_env(self, durations):
envs = []
for duration in durations:
env = tools.MockEnvironment(
observ_shape=(2, 3), action_shape=(3,),
min_duration=duration, max_duration=duration)
env = tools.MockEnvironment(observ_shape=(2, 3),
action_shape=(3,),
min_duration=duration,
max_duration=duration)
env = tools.wrappers.ConvertTo32Bit(env)
envs.append(env)
batch_env = tools.BatchEnv(envs, blocking=True)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute a streaming estimation of the mean of submitted tensors."""
from __future__ import absolute_import
@@ -53,9 +52,8 @@ class StreamingMean(object):
# Add a batch dimension if necessary.
if value.shape.ndims == self._sum.shape.ndims:
value = value[None, ...]
return tf.group(
self._sum.assign_add(tf.reduce_sum(value, 0)),
self._count.assign_add(tf.shape(value)[0]))
return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
self._count.assign_add(tf.shape(value)[0]))
def clear(self):
"""Return the mean estimate and reset the streaming statistics."""

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrappers for OpenAI Gym environments."""
from __future__ import absolute_import
@@ -150,8 +149,7 @@ class FrameHistory(object):
return self._select_frames()
def _select_frames(self):
indices = [
(self._step - index) % self._capacity for index in self._past_indices]
indices = [(self._step - index) % self._capacity for index in self._past_indices]
observ = self._buffer[indices]
if self._flatten:
observ = np.reshape(observ, (-1,) + observ.shape[2:])
@@ -192,14 +190,14 @@ class RangeNormalize(object):
def __init__(self, env, observ=None, action=None):
self._env = env
self._should_normalize_observ = (
observ is not False and self._is_finite(self._env.observation_space))
self._should_normalize_observ = (observ is not False and
self._is_finite(self._env.observation_space))
if observ is True and not self._should_normalize_observ:
raise ValueError('Cannot normalize infinite observation range.')
if observ is None and not self._should_normalize_observ:
tf.logging.info('Not normalizing infinite observation range.')
self._should_normalize_action = (
action is not False and self._is_finite(self._env.action_space))
self._should_normalize_action = (action is not False and
self._is_finite(self._env.action_space))
if action is True and not self._should_normalize_action:
raise ValueError('Cannot normalize infinite action range.')
if action is None and not self._should_normalize_action:
@@ -327,8 +325,7 @@ class ExternalProcess(object):
action_space: The cached action space of the environment.
"""
self._conn, conn = multiprocessing.Pipe()
self._process = multiprocessing.Process(
target=self._worker, args=(constructor, conn))
self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
atexit.register(self.close)
self._process.start()
self._observ_space = None

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for environment wrappers."""
from __future__ import absolute_import
@@ -28,18 +27,20 @@ from agents import tools
class ExternalProcessTest(tf.test.TestCase):
def test_close_no_hang_after_init(self):
constructor = functools.partial(
tools.MockEnvironment,
observ_shape=(2, 3), action_shape=(2,),
min_duration=2, max_duration=2)
constructor = functools.partial(tools.MockEnvironment,
observ_shape=(2, 3),
action_shape=(2,),
min_duration=2,
max_duration=2)
env = tools.wrappers.ExternalProcess(constructor)
env.close()
def test_close_no_hang_after_step(self):
constructor = functools.partial(
tools.MockEnvironment,
observ_shape=(2, 3), action_shape=(2,),
min_duration=5, max_duration=5)
constructor = functools.partial(tools.MockEnvironment,
observ_shape=(2, 3),
action_shape=(2,),
min_duration=5,
max_duration=5)
env = tools.wrappers.ExternalProcess(constructor)
env.reset()
env.step(env.action_space.sample())
@@ -53,8 +54,7 @@ class ExternalProcessTest(tf.test.TestCase):
env.step(env.action_space.sample())
def test_reraise_exception_in_step(self):
constructor = functools.partial(
MockEnvironmentCrashInStep, crash_at_step=3)
constructor = functools.partial(MockEnvironmentCrashInStep, crash_at_step=3)
env = tools.wrappers.ExternalProcess(constructor)
env.reset()
env.step(env.action_space.sample())
@@ -74,9 +74,10 @@ class MockEnvironmentCrashInStep(tools.MockEnvironment):
"""Raise an error after specified number of steps in an episode."""
def __init__(self, crash_at_step):
super(MockEnvironmentCrashInStep, self).__init__(
observ_shape=(2, 3), action_shape=(2,),
min_duration=crash_at_step + 1, max_duration=crash_at_step + 1)
super(MockEnvironmentCrashInStep, self).__init__(observ_shape=(2, 3),
action_shape=(2,),
min_duration=crash_at_step + 1,
max_duration=crash_at_step + 1)
self._crash_at_step = crash_at_step
def step(self, *args, **kwargs):

View File

@@ -5,6 +5,3 @@ from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEn
from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv
from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv
from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv

View File

@@ -42,9 +42,13 @@ class BulletClient(object):
attribute = getattr(pybullet, name)
if inspect.isbuiltin(attribute):
if name not in [
"invertTransform", "multiplyTransforms", "getMatrixFromQuaternion",
"getEulerFromQuaternion", "computeViewMatrixFromYawPitchRoll",
"computeProjectionMatrixFOV", "getQuaternionFromEuler",
"invertTransform",
"multiplyTransforms",
"getMatrixFromQuaternion",
"getEulerFromQuaternion",
"computeViewMatrixFromYawPitchRoll",
"computeProjectionMatrixFOV",
"getQuaternionFromEuler",
]: # A temporary hack for now.
attribute = functools.partial(attribute, physicsClientId=self._client)
return attribute

View File

@@ -4,11 +4,11 @@ The randomization include swing_offset, extension_offset of all legs that mimics
bent legs, desired_pitch from user input, battery voltage and motor damping.
"""
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
parentdir = os.path.dirname(os.path.dirname(parentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import numpy as np
import tensorflow as tf
@@ -20,8 +20,7 @@ BATTERY_VOLTAGE_RANGE = (14.8, 16.8)
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01)
class MinitaurAlternatingLegsEnvRandomizer(
env_randomizer_base.EnvRandomizerBase):
class MinitaurAlternatingLegsEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
"""A randomizer that changes the minitaur_gym_alternating_leg_env."""
def __init__(self,
@@ -34,23 +33,20 @@ class MinitaurAlternatingLegsEnvRandomizer(
self.perturb_desired_pitch_bound = perturb_desired_pitch_bound
def randomize_env(self, env):
perturb_magnitude = np.random.uniform(
low=-self.perturb_swing_bound,
high=self.perturb_swing_bound,
size=NUM_LEGS)
perturb_magnitude = np.random.uniform(low=-self.perturb_swing_bound,
high=self.perturb_swing_bound,
size=NUM_LEGS)
env.set_swing_offset(perturb_magnitude)
tf.logging.info("swing_offset: {}".format(perturb_magnitude))
perturb_magnitude = np.random.uniform(
low=-self.perturb_extension_bound,
high=self.perturb_extension_bound,
size=NUM_LEGS)
perturb_magnitude = np.random.uniform(low=-self.perturb_extension_bound,
high=self.perturb_extension_bound,
size=NUM_LEGS)
env.set_extension_offset(perturb_magnitude)
tf.logging.info("extension_offset: {}".format(perturb_magnitude))
perturb_magnitude = np.random.uniform(
low=-self.perturb_desired_pitch_bound,
high=self.perturb_desired_pitch_bound)
perturb_magnitude = np.random.uniform(low=-self.perturb_desired_pitch_bound,
high=self.perturb_desired_pitch_bound)
env.set_desired_pitch(perturb_magnitude)
tf.logging.info("desired_pitch: {}".format(perturb_magnitude))

View File

@@ -1,11 +1,11 @@
"""Randomize the minitaur_gym_env when reset() is called."""
import random
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
parentdir = os.path.dirname(os.path.dirname(parentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import numpy as np
from pybullet_envs.minitaur.envs import env_randomizer_base
@@ -52,24 +52,20 @@ class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
minitaur.SetBaseMasses(randomized_base_mass)
leg_masses = minitaur.GetLegMassesFromURDF()
leg_masses_lower_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[1])
leg_masses_lower_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[1])
randomized_leg_masses = [
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
for i in xrange(len(leg_masses))
]
minitaur.SetLegMasses(randomized_leg_masses)
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
BATTERY_VOLTAGE_RANGE[1])
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0], BATTERY_VOLTAGE_RANGE[1])
minitaur.SetBatteryVoltage(randomized_battery_voltage)
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
MOTOR_VISCOUS_DAMPING_RANGE[1])
minitaur.SetMotorViscousDamping(randomized_motor_damping)
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
MINITAUR_LEG_FRICTION[1])
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0], MINITAUR_LEG_FRICTION[1])
minitaur.SetFootFriction(randomized_foot_friction)

View File

@@ -4,7 +4,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
PARAM_RANGE = {
# The following ranges are in percentage. e.g. 0.8 means 80%.
"mass": [0.8, 1.2],

View File

@@ -7,11 +7,11 @@ from __future__ import print_function
import functools
import random
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
parentdir = os.path.dirname(os.path.dirname(parentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import numpy as np
import tensorflow as tf
@@ -32,8 +32,7 @@ class MinitaurEnvRandomizerFromConfig(env_randomizer_base.EnvRandomizerBase):
except AttributeError:
raise ValueError("Config {} is not found.".format(config))
self._randomization_param_dict = config()
tf.logging.info("Randomization config is: {}".format(
self._randomization_param_dict))
tf.logging.info("Randomization config is: {}".format(self._randomization_param_dict))
def randomize_env(self, env):
"""Randomize various physical properties of the environment.
@@ -43,35 +42,29 @@ class MinitaurEnvRandomizerFromConfig(env_randomizer_base.EnvRandomizerBase):
Args:
env: A minitaur gym environment.
"""
self._randomization_function_dict = self._build_randomization_function_dict(
env)
self._randomization_function_dict = self._build_randomization_function_dict(env)
for param_name, random_range in self._randomization_param_dict.iteritems():
self._randomization_function_dict[param_name](
lower_bound=random_range[0], upper_bound=random_range[1])
self._randomization_function_dict[param_name](lower_bound=random_range[0],
upper_bound=random_range[1])
def _build_randomization_function_dict(self, env):
func_dict = {}
func_dict["mass"] = functools.partial(
self._randomize_masses, minitaur=env.minitaur)
func_dict["inertia"] = functools.partial(
self._randomize_inertia, minitaur=env.minitaur)
func_dict["latency"] = functools.partial(
self._randomize_latency, minitaur=env.minitaur)
func_dict["joint friction"] = functools.partial(
self._randomize_joint_friction, minitaur=env.minitaur)
func_dict["motor friction"] = functools.partial(
self._randomize_motor_friction, minitaur=env.minitaur)
func_dict["restitution"] = functools.partial(
self._randomize_contact_restitution, minitaur=env.minitaur)
func_dict["lateral friction"] = functools.partial(
self._randomize_contact_friction, minitaur=env.minitaur)
func_dict["battery"] = functools.partial(
self._randomize_battery_level, minitaur=env.minitaur)
func_dict["motor strength"] = functools.partial(
self._randomize_motor_strength, minitaur=env.minitaur)
func_dict["mass"] = functools.partial(self._randomize_masses, minitaur=env.minitaur)
func_dict["inertia"] = functools.partial(self._randomize_inertia, minitaur=env.minitaur)
func_dict["latency"] = functools.partial(self._randomize_latency, minitaur=env.minitaur)
func_dict["joint friction"] = functools.partial(self._randomize_joint_friction,
minitaur=env.minitaur)
func_dict["motor friction"] = functools.partial(self._randomize_motor_friction,
minitaur=env.minitaur)
func_dict["restitution"] = functools.partial(self._randomize_contact_restitution,
minitaur=env.minitaur)
func_dict["lateral friction"] = functools.partial(self._randomize_contact_friction,
minitaur=env.minitaur)
func_dict["battery"] = functools.partial(self._randomize_battery_level, minitaur=env.minitaur)
func_dict["motor strength"] = functools.partial(self._randomize_motor_strength,
minitaur=env.minitaur)
# Settinmg control step needs access to the environment.
func_dict["control step"] = functools.partial(
self._randomize_control_step, env=env)
func_dict["control step"] = functools.partial(self._randomize_control_step, env=env)
return func_dict
def _randomize_control_step(self, env, lower_bound, upper_bound):
@@ -111,8 +104,8 @@ class MinitaurEnvRandomizerFromConfig(env_randomizer_base.EnvRandomizerBase):
def _randomize_joint_friction(self, minitaur, lower_bound, upper_bound):
num_knee_joints = minitaur.GetNumKneeJoints()
randomized_joint_frictions = np.random.uniform(
[lower_bound] * num_knee_joints, [upper_bound] * num_knee_joints)
randomized_joint_frictions = np.random.uniform([lower_bound] * num_knee_joints,
[upper_bound] * num_knee_joints)
minitaur.SetJointFriction(randomized_joint_frictions)
tf.logging.info("joint friction is: {}".format(randomized_joint_frictions))
@@ -137,9 +130,7 @@ class MinitaurEnvRandomizerFromConfig(env_randomizer_base.EnvRandomizerBase):
tf.logging.info("battery voltage is: {}".format(randomized_battery_voltage))
def _randomize_motor_strength(self, minitaur, lower_bound, upper_bound):
randomized_motor_strength_ratios = np.random.uniform(
[lower_bound] * minitaur.num_motors,
[upper_bound] * minitaur.num_motors)
randomized_motor_strength_ratios = np.random.uniform([lower_bound] * minitaur.num_motors,
[upper_bound] * minitaur.num_motors)
minitaur.SetMotorStrengthRatios(randomized_motor_strength_ratios)
tf.logging.info(
"motor strength is: {}".format(randomized_motor_strength_ratios))
tf.logging.info("motor strength is: {}".format(randomized_motor_strength_ratios))

View File

@@ -4,11 +4,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
parentdir = os.path.dirname(os.path.dirname(parentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import numpy as np
@@ -50,12 +50,10 @@ class MinitaurPushRandomizer(env_randomizer_base.EnvRandomizerBase):
self._perturbation_start_step = perturbation_start_step
self._perturbation_interval_steps = perturbation_interval_steps
self._perturbation_duration_steps = perturbation_duration_steps
self._horizontal_force_bound = (
horizontal_force_bound if horizontal_force_bound else
[_HORIZONTAL_FORCE_LOWER_BOUND, _HORIZONTAL_FORCE_UPPER_BOUND])
self._vertical_force_bound = (
vertical_force_bound if vertical_force_bound else
[_VERTICAL_FORCE_LOWER_BOUND, _VERTICAL_FORCE_UPPER_BOUND])
self._horizontal_force_bound = (horizontal_force_bound if horizontal_force_bound else
[_HORIZONTAL_FORCE_LOWER_BOUND, _HORIZONTAL_FORCE_UPPER_BOUND])
self._vertical_force_bound = (vertical_force_bound if vertical_force_bound else
[_VERTICAL_FORCE_LOWER_BOUND, _VERTICAL_FORCE_UPPER_BOUND])
def randomize_env(self, env):
"""Randomizes the simulation environment.
@@ -75,23 +73,20 @@ class MinitaurPushRandomizer(env_randomizer_base.EnvRandomizerBase):
"""
base_link_ids = env.minitaur.chassis_link_ids
if env.env_step_counter % self._perturbation_interval_steps == 0:
self._applied_link_id = base_link_ids[np.random.randint(
0, len(base_link_ids))]
horizontal_force_magnitude = np.random.uniform(
self._horizontal_force_bound[0], self._horizontal_force_bound[1])
self._applied_link_id = base_link_ids[np.random.randint(0, len(base_link_ids))]
horizontal_force_magnitude = np.random.uniform(self._horizontal_force_bound[0],
self._horizontal_force_bound[1])
theta = np.random.uniform(0, 2 * math.pi)
vertical_force_magnitude = np.random.uniform(
self._vertical_force_bound[0], self._vertical_force_bound[1])
vertical_force_magnitude = np.random.uniform(self._vertical_force_bound[0],
self._vertical_force_bound[1])
self._applied_force = horizontal_force_magnitude * np.array(
[math.cos(theta), math.sin(theta), 0]) + np.array(
[0, 0, -vertical_force_magnitude])
[math.cos(theta), math.sin(theta), 0]) + np.array([0, 0, -vertical_force_magnitude])
if (env.env_step_counter % self._perturbation_interval_steps <
self._perturbation_duration_steps) and (env.env_step_counter >=
self._perturbation_start_step):
env.pybullet_client.applyExternalForce(
objectUniqueId=env.minitaur.quadruped,
linkIndex=self._applied_link_id,
forceObj=self._applied_force,
posObj=[0.0, 0.0, 0.0],
flags=env.pybullet_client.LINK_FRAME)
env.pybullet_client.applyExternalForce(objectUniqueId=env.minitaur.quadruped,
linkIndex=self._applied_link_id,
forceObj=self._applied_force,
posObj=[0.0, 0.0, 0.0],
flags=env.pybullet_client.LINK_FRAME)

View File

@@ -4,11 +4,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
parentdir = os.path.dirname(os.path.dirname(parentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import itertools
import math
@@ -62,8 +62,7 @@ class PoissonDisc2D(object):
self._grid = [None] * self._grid_size_x * self._grid_size_y
# Generate the first sample point and set it as an active site.
first_sample = np.array(
np.random.random_sample(2)) * [grid_length, grid_width]
first_sample = np.array(np.random.random_sample(2)) * [grid_length, grid_width]
self._active_list = [first_sample]
# Also store the sample point in the grid.
@@ -114,8 +113,7 @@ class PoissonDisc2D(object):
Returns:
Whether the point is inside the grid.
"""
return (0 <= point[0] < self._grid_length) and (0 <= point[1] <
self._grid_width)
return (0 <= point[0] < self._grid_length) and (0 <= point[1] < self._grid_width)
def _is_in_range(self, index2d):
"""Checks if the cell ID is within the grid.
@@ -127,8 +125,7 @@ class PoissonDisc2D(object):
Whether the cell (2D index) is inside the grid.
"""
return (0 <= index2d[0] < self._grid_size_x) and (0 <= index2d[1] <
self._grid_size_y)
return (0 <= index2d[0] < self._grid_size_x) and (0 <= index2d[1] < self._grid_size_y)
def _is_close_to_existing_points(self, point):
"""Checks if the point is close to any already sampled (and stored) points.
@@ -142,15 +139,13 @@ class PoissonDisc2D(object):
"""
px, py = self._point_to_index_2d(point)
# Now we can check nearby cells for existing points
for neighbor_cell in itertools.product(
xrange(px - 1, px + 2), xrange(py - 1, py + 2)):
for neighbor_cell in itertools.product(xrange(px - 1, px + 2), xrange(py - 1, py + 2)):
if not self._is_in_range(neighbor_cell):
continue
maybe_a_point = self._grid[self._index_2d_to_1d(neighbor_cell)]
if maybe_a_point is not None and np.linalg.norm(
maybe_a_point - point) < self._min_radius:
if maybe_a_point is not None and np.linalg.norm(maybe_a_point - point) < self._min_radius:
return True
return False
@@ -168,8 +163,8 @@ class PoissonDisc2D(object):
random_angle = np.random.uniform(0, 2 * math.pi)
# The sampled 2D points near the active point
sample = random_radius * np.array(
[np.cos(random_angle), np.sin(random_angle)]) + active_point
sample = random_radius * np.array([np.cos(random_angle),
np.sin(random_angle)]) + active_point
if not self._is_in_grid(sample):
continue
@@ -214,12 +209,11 @@ class TerrainType(enum.Enum):
class MinitaurTerrainRandomizer(env_randomizer_base.EnvRandomizerBase):
"""Generates an uneven terrain in the gym env."""
def __init__(
self,
terrain_type=TerrainType.TRIANGLE_MESH,
mesh_filename="robotics/reinforcement_learning/minitaur/envs/testdata/"
"triangle_mesh_terrain/terrain9735.obj",
mesh_scale=None):
def __init__(self,
terrain_type=TerrainType.TRIANGLE_MESH,
mesh_filename="robotics/reinforcement_learning/minitaur/envs/testdata/"
"triangle_mesh_terrain/terrain9735.obj",
mesh_scale=None):
"""Initializes the randomizer.
Args:
@@ -261,9 +255,7 @@ class MinitaurTerrainRandomizer(env_randomizer_base.EnvRandomizerBase):
flags=1,
meshScale=self._mesh_scale)
env.ground_id = env.pybullet_client.createMultiBody(
baseMass=0,
baseCollisionShapeIndex=terrain_collision_shape_id,
basePosition=[0, 0, 0])
baseMass=0, baseCollisionShapeIndex=terrain_collision_shape_id, basePosition=[0, 0, 0])
def _generate_convex_blocks(self, env):
"""Adds random convex blocks to the flat ground.
@@ -277,8 +269,7 @@ class MinitaurTerrainRandomizer(env_randomizer_base.EnvRandomizerBase):
"""
poisson_disc = PoissonDisc2D(_GRID_LENGTH, _GRID_WIDTH, _MIN_BLOCK_DISTANCE,
_MAX_SAMPLE_SIZE)
poisson_disc = PoissonDisc2D(_GRID_LENGTH, _GRID_WIDTH, _MIN_BLOCK_DISTANCE, _MAX_SAMPLE_SIZE)
block_centers = poisson_disc.generate()
@@ -289,12 +280,10 @@ class MinitaurTerrainRandomizer(env_randomizer_base.EnvRandomizerBase):
# Do not place blocks near the point [0, 0], where the robot will start.
if abs(shifted_center[0]) < 1.0 and abs(shifted_center[1]) < 1.0:
continue
half_length = np.random.uniform(_MIN_BLOCK_LENGTH, _MAX_BLOCK_LENGTH) / (
2 * math.sqrt(2))
half_length = np.random.uniform(_MIN_BLOCK_LENGTH, _MAX_BLOCK_LENGTH) / (2 * math.sqrt(2))
half_height = np.random.uniform(_MIN_BLOCK_HEIGHT, _MAX_BLOCK_HEIGHT) / 2
box_id = env.pybullet_client.createCollisionShape(
env.pybullet_client.GEOM_BOX,
halfExtents=[half_length, half_length, half_height])
env.pybullet_client.GEOM_BOX, halfExtents=[half_length, half_length, half_height])
env.pybullet_client.createMultiBody(
baseMass=0,
baseCollisionShapeIndex=box_id,

View File

@@ -2,10 +2,10 @@
"""
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import collections
import copy
@@ -24,9 +24,8 @@ OVERHEAT_SHUTDOWN_TORQUE = 2.45
OVERHEAT_SHUTDOWN_TIME = 1.0
LEG_POSITION = ["front_left", "back_left", "front_right", "back_right"]
MOTOR_NAMES = [
"motor_front_leftL_joint", "motor_front_leftR_joint",
"motor_back_leftL_joint", "motor_back_leftR_joint",
"motor_front_rightL_joint", "motor_front_rightR_joint",
"motor_front_leftL_joint", "motor_front_leftR_joint", "motor_back_leftL_joint",
"motor_back_leftR_joint", "motor_front_rightL_joint", "motor_front_rightR_joint",
"motor_back_rightL_joint", "motor_back_rightR_joint"
]
_CHASSIS_NAME_PATTERN = re.compile(r"chassis\D*center")
@@ -141,10 +140,9 @@ class Minitaur(object):
if self._accurate_motor_model_enabled:
self._kp = motor_kp
self._kd = motor_kd
self._motor_model = motor.MotorModel(
torque_control_enabled=self._torque_control_enabled,
kp=self._kp,
kd=self._kd)
self._motor_model = motor.MotorModel(torque_control_enabled=self._torque_control_enabled,
kp=self._kp,
kd=self._kd)
elif self._pd_control_enabled:
self._kp = 8
self._kd = 0.3
@@ -188,17 +186,14 @@ class Minitaur(object):
self._link_urdf = []
num_bodies = self._pybullet_client.getNumJoints(self.quadruped)
for body_id in range(-1, num_bodies): # -1 is for the base link.
inertia = self._pybullet_client.getDynamicsInfo(self.quadruped,
body_id)[2]
inertia = self._pybullet_client.getDynamicsInfo(self.quadruped, body_id)[2]
self._link_urdf.append(inertia)
# We need to use id+1 to index self._link_urdf because it has the base
# (index = -1) at the first element.
self._base_inertia_urdf = [
self._link_urdf[chassis_id + 1] for chassis_id in self._chassis_link_ids
]
self._leg_inertia_urdf = [
self._link_urdf[leg_id + 1] for leg_id in self._leg_link_ids
]
self._leg_inertia_urdf = [self._link_urdf[leg_id + 1] for leg_id in self._leg_link_ids]
self._leg_inertia_urdf.extend(
[self._link_urdf[motor_id + 1] for motor_id in self._motor_link_ids])
@@ -239,13 +234,10 @@ class Minitaur(object):
num_joints = self._pybullet_client.getNumJoints(self.quadruped)
for i in range(num_joints):
joint_info = self._pybullet_client.getJointInfo(self.quadruped, i)
self._pybullet_client.changeDynamics(
joint_info[0], -1, linearDamping=0, angularDamping=0)
self._pybullet_client.changeDynamics(joint_info[0], -1, linearDamping=0, angularDamping=0)
def _BuildMotorIdList(self):
self._motor_id_list = [
self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES
]
self._motor_id_list = [self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES]
def IsObservationValid(self):
"""Whether the observation is valid for the current time step.
@@ -284,10 +276,10 @@ class Minitaur(object):
useFixedBase=self._on_rack,
flags=self._pybullet_client.URDF_USE_SELF_COLLISION)
else:
self.quadruped = self._pybullet_client.loadURDF(
"%s/quadruped/minitaur.urdf" % self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self.quadruped = self._pybullet_client.loadURDF("%s/quadruped/minitaur.urdf" %
self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self._BuildJointNameToIdDict()
self._BuildUrdfIds()
if self._remove_default_joint_damping:
@@ -297,10 +289,9 @@ class Minitaur(object):
self._RecordInertiaInfoFromURDF()
self.ResetPose(add_constraint=True)
else:
self._pybullet_client.resetBasePositionAndOrientation(
self.quadruped, init_position, INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
[0, 0, 0])
self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, init_position,
INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
self.ResetPose(add_constraint=False)
self._overheat_counter = np.zeros(self.num_motors)
self._motor_enabled_list = [True] * self.num_motors
@@ -325,25 +316,22 @@ class Minitaur(object):
self.ReceiveObservation()
def _SetMotorTorqueById(self, motor_id, torque):
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.TORQUE_CONTROL,
force=torque)
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.TORQUE_CONTROL,
force=torque)
def _SetDesiredMotorAngleById(self, motor_id, desired_angle):
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.POSITION_CONTROL,
targetPosition=desired_angle,
positionGain=self._kp,
velocityGain=self._kd,
force=self._max_force)
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.POSITION_CONTROL,
targetPosition=desired_angle,
positionGain=self._kp,
velocityGain=self._kd,
force=self._max_force)
def _SetDesiredMotorAngleByName(self, motor_name, desired_angle):
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name],
desired_angle)
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name], desired_angle)
def ResetPose(self, add_constraint):
"""Reset the pose of the minitaur.
@@ -367,59 +355,53 @@ class Minitaur(object):
knee_angle = -2.1834
leg_position = LEG_POSITION[leg_id]
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_link"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_link"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"L_link"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"R_link"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
if add_constraint:
self._pybullet_client.createConstraint(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_link"],
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_link"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_RIGHT, KNEE_CONSTRAINT_POINT_LEFT)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_link"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_link"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_RIGHT,
KNEE_CONSTRAINT_POINT_LEFT)
if self._accurate_motor_model_enabled or self._pd_control_enabled:
# Disable the default motor in pybullet.
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
else:
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
@@ -440,8 +422,7 @@ class Minitaur(object):
Returns:
The position of minitaur's base.
"""
position, _ = (
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
position, _ = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
return position
def GetTrueBaseRollPitchYaw(self):
@@ -464,10 +445,9 @@ class Minitaur(object):
"""
delayed_orientation = np.array(
self._control_observation[3 * self.num_motors:3 * self.num_motors + 4])
delayed_roll_pitch_yaw = self._pybullet_client.getEulerFromQuaternion(
delayed_orientation)
roll_pitch_yaw = self._AddSensorNoise(
np.array(delayed_roll_pitch_yaw), self._observation_noise_stdev[3])
delayed_roll_pitch_yaw = self._pybullet_client.getEulerFromQuaternion(delayed_orientation)
roll_pitch_yaw = self._AddSensorNoise(np.array(delayed_roll_pitch_yaw),
self._observation_noise_stdev[3])
return roll_pitch_yaw
def GetTrueMotorAngles(self):
@@ -492,9 +472,8 @@ class Minitaur(object):
Returns:
Motor angles polluted by noise and latency, mapped to [-pi, pi].
"""
motor_angles = self._AddSensorNoise(
np.array(self._control_observation[0:self.num_motors]),
self._observation_noise_stdev[0])
motor_angles = self._AddSensorNoise(np.array(self._control_observation[0:self.num_motors]),
self._observation_noise_stdev[0])
return MapToMinusPiToPi(motor_angles)
def GetTrueMotorVelocities(self):
@@ -518,8 +497,7 @@ class Minitaur(object):
Velocities of all eight motors polluted by noise and latency.
"""
return self._AddSensorNoise(
np.array(
self._control_observation[self.num_motors:2 * self.num_motors]),
np.array(self._control_observation[self.num_motors:2 * self.num_motors]),
self._observation_noise_stdev[1])
def GetTrueMotorTorques(self):
@@ -546,8 +524,7 @@ class Minitaur(object):
Motor torques of all eight motors polluted by noise and latency.
"""
return self._AddSensorNoise(
np.array(
self._control_observation[2 * self.num_motors:3 * self.num_motors]),
np.array(self._control_observation[2 * self.num_motors:3 * self.num_motors]),
self._observation_noise_stdev[2])
def GetTrueBaseOrientation(self):
@@ -556,8 +533,7 @@ class Minitaur(object):
Returns:
The orientation of minitaur's base.
"""
_, orientation = (
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
_, orientation = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
return orientation
def GetBaseOrientation(self):
@@ -567,8 +543,7 @@ class Minitaur(object):
Returns:
The orientation of minitaur's base polluted by noise and latency.
"""
return self._pybullet_client.getQuaternionFromEuler(
self.GetBaseRollPitchYaw())
return self._pybullet_client.getQuaternionFromEuler(self.GetBaseRollPitchYaw())
def GetTrueBaseRollPitchYawRate(self):
"""Get the rate of orientation change of the minitaur's base in euler angle.
@@ -588,8 +563,7 @@ class Minitaur(object):
and latency.
"""
return self._AddSensorNoise(
np.array(self._control_observation[3 * self.num_motors + 4:
3 * self.num_motors + 7]),
np.array(self._control_observation[3 * self.num_motors + 4:3 * self.num_motors + 7]),
self._observation_noise_stdev[4])
def GetActionDimension(self):
@@ -618,12 +592,9 @@ class Minitaur(object):
"""
if self._motor_velocity_limit < np.inf:
current_motor_angle = self.GetTrueMotorAngles()
motor_commands_max = (
current_motor_angle + self.time_step * self._motor_velocity_limit)
motor_commands_min = (
current_motor_angle - self.time_step * self._motor_velocity_limit)
motor_commands = np.clip(motor_commands, motor_commands_min,
motor_commands_max)
motor_commands_max = (current_motor_angle + self.time_step * self._motor_velocity_limit)
motor_commands_min = (current_motor_angle - self.time_step * self._motor_velocity_limit)
motor_commands = np.clip(motor_commands, motor_commands_min, motor_commands_max)
# Set the kp and kd for all the motors if not provided as an argument.
if motor_kps is None:
motor_kps = np.full(8, self._kp)
@@ -642,8 +613,7 @@ class Minitaur(object):
self._overheat_counter[i] += 1
else:
self._overheat_counter[i] = 0
if (self._overheat_counter[i] >
OVERHEAT_SHUTDOWN_TIME / self.time_step):
if (self._overheat_counter[i] > OVERHEAT_SHUTDOWN_TIME / self.time_step):
self._motor_enabled_list[i] = False
# The torque is already in the observation space because we use
@@ -651,19 +621,17 @@ class Minitaur(object):
self._observed_motor_torques = observed_torque
# Transform into the motor space when applying the torque.
self._applied_motor_torque = np.multiply(actual_torque,
self._motor_direction)
self._applied_motor_torque = np.multiply(actual_torque, self._motor_direction)
for motor_id, motor_torque, motor_enabled in zip(
self._motor_id_list, self._applied_motor_torque,
self._motor_enabled_list):
for motor_id, motor_torque, motor_enabled in zip(self._motor_id_list,
self._applied_motor_torque,
self._motor_enabled_list):
if motor_enabled:
self._SetMotorTorqueById(motor_id, motor_torque)
else:
self._SetMotorTorqueById(motor_id, 0)
else:
torque_commands = -1 * motor_kps * (
q - motor_commands) - motor_kds * qdot
torque_commands = -1 * motor_kps * (q - motor_commands) - motor_kds * qdot
# The torque is already in the observation space because we use
# GetMotorAngles and GetMotorVelocities.
@@ -673,14 +641,12 @@ class Minitaur(object):
self._applied_motor_torques = np.multiply(self._observed_motor_torques,
self._motor_direction)
for motor_id, motor_torque in zip(self._motor_id_list,
self._applied_motor_torques):
for motor_id, motor_torque in zip(self._motor_id_list, self._applied_motor_torques):
self._SetMotorTorqueById(motor_id, motor_torque)
else:
motor_commands_with_direction = np.multiply(motor_commands,
self._motor_direction)
for motor_id, motor_command_with_direction in zip(
self._motor_id_list, motor_commands_with_direction):
motor_commands_with_direction = np.multiply(motor_commands, self._motor_direction)
for motor_id, motor_command_with_direction in zip(self._motor_id_list,
motor_commands_with_direction):
self._SetDesiredMotorAngleById(motor_id, motor_command_with_direction)
def ConvertFromLegModel(self, actions):
@@ -698,13 +664,13 @@ class Minitaur(object):
quater_pi = math.pi / 4
for i in range(self.num_motors):
action_idx = int(i // 2)
forward_backward_component = (-scale_for_singularity * quater_pi * (
actions[action_idx + half_num_motors] + offset_for_singularity))
forward_backward_component = (
-scale_for_singularity * quater_pi *
(actions[action_idx + half_num_motors] + offset_for_singularity))
extension_component = (-1)**i * quater_pi * actions[action_idx]
if i >= half_num_motors:
extension_component = -extension_component
motor_angle[i] = (
math.pi + forward_backward_component + extension_component)
motor_angle[i] = (math.pi + forward_backward_component + extension_component)
return motor_angle
def GetBaseMassesFromURDF(self):
@@ -734,12 +700,10 @@ class Minitaur(object):
the length of self._chassis_link_ids.
"""
if len(base_mass) != len(self._chassis_link_ids):
raise ValueError(
"The length of base_mass {} and self._chassis_link_ids {} are not "
"the same.".format(len(base_mass), len(self._chassis_link_ids)))
raise ValueError("The length of base_mass {} and self._chassis_link_ids {} are not "
"the same.".format(len(base_mass), len(self._chassis_link_ids)))
for chassis_id, chassis_mass in zip(self._chassis_link_ids, base_mass):
self._pybullet_client.changeDynamics(
self.quadruped, chassis_id, mass=chassis_mass)
self._pybullet_client.changeDynamics(self.quadruped, chassis_id, mass=chassis_mass)
def SetLegMasses(self, leg_masses):
"""Set the mass of the legs.
@@ -759,12 +723,10 @@ class Minitaur(object):
raise ValueError("The number of values passed to SetLegMasses are "
"different than number of leg links and motors.")
for leg_id, leg_mass in zip(self._leg_link_ids, leg_masses):
self._pybullet_client.changeDynamics(
self.quadruped, leg_id, mass=leg_mass)
self._pybullet_client.changeDynamics(self.quadruped, leg_id, mass=leg_mass)
motor_masses = leg_masses[len(self._leg_link_ids):]
for link_id, motor_mass in zip(self._motor_link_ids, motor_masses):
self._pybullet_client.changeDynamics(
self.quadruped, link_id, mass=motor_mass)
self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=motor_mass)
def SetBaseInertias(self, base_inertias):
"""Set the inertias of minitaur's base.
@@ -779,17 +741,15 @@ class Minitaur(object):
negative values.
"""
if len(base_inertias) != len(self._chassis_link_ids):
raise ValueError(
"The length of base_inertias {} and self._chassis_link_ids {} are "
"not the same.".format(
len(base_inertias), len(self._chassis_link_ids)))
for chassis_id, chassis_inertia in zip(self._chassis_link_ids,
base_inertias):
raise ValueError("The length of base_inertias {} and self._chassis_link_ids {} are "
"not the same.".format(len(base_inertias), len(self._chassis_link_ids)))
for chassis_id, chassis_inertia in zip(self._chassis_link_ids, base_inertias):
for inertia_value in chassis_inertia:
if (np.asarray(inertia_value) < 0).any():
raise ValueError("Values in inertia matrix should be non-negative.")
self._pybullet_client.changeDynamics(
self.quadruped, chassis_id, localInertiaDiagonal=chassis_inertia)
self._pybullet_client.changeDynamics(self.quadruped,
chassis_id,
localInertiaDiagonal=chassis_inertia)
def SetLegInertias(self, leg_inertias):
"""Set the inertias of the legs.
@@ -813,16 +773,18 @@ class Minitaur(object):
for inertia_value in leg_inertias:
if (np.asarray(inertia_value) < 0).any():
raise ValueError("Values in inertia matrix should be non-negative.")
self._pybullet_client.changeDynamics(
self.quadruped, leg_id, localInertiaDiagonal=leg_inertia)
self._pybullet_client.changeDynamics(self.quadruped,
leg_id,
localInertiaDiagonal=leg_inertia)
motor_inertias = leg_inertias[len(self._leg_link_ids):]
for link_id, motor_inertia in zip(self._motor_link_ids, motor_inertias):
for inertia_value in motor_inertias:
if (np.asarray(inertia_value) < 0).any():
raise ValueError("Values in inertia matrix should be non-negative.")
self._pybullet_client.changeDynamics(
self.quadruped, link_id, localInertiaDiagonal=motor_inertia)
self._pybullet_client.changeDynamics(self.quadruped,
link_id,
localInertiaDiagonal=motor_inertia)
def SetFootFriction(self, foot_friction):
"""Set the lateral friction of the feet.
@@ -832,8 +794,7 @@ class Minitaur(object):
shared by all four feet.
"""
for link_id in self._foot_link_ids:
self._pybullet_client.changeDynamics(
self.quadruped, link_id, lateralFriction=foot_friction)
self._pybullet_client.changeDynamics(self.quadruped, link_id, lateralFriction=foot_friction)
# TODO(b/73748980): Add more API's to set other contact parameters.
def SetFootRestitution(self, foot_restitution):
@@ -844,8 +805,7 @@ class Minitaur(object):
This value is shared by all four feet.
"""
for link_id in self._foot_link_ids:
self._pybullet_client.changeDynamics(
self.quadruped, link_id, restitution=foot_restitution)
self._pybullet_client.changeDynamics(self.quadruped, link_id, restitution=foot_restitution)
def SetJointFriction(self, joint_frictions):
for knee_joint_id, friction in zip(self._foot_link_ids, joint_frictions):
@@ -901,9 +861,8 @@ class Minitaur(object):
return self._observation_history[-1]
remaining_latency = latency - n_steps_ago * self.time_step
blend_alpha = remaining_latency / self.time_step
observation = (
(1.0 - blend_alpha) * np.array(self._observation_history[n_steps_ago])
+ blend_alpha * np.array(self._observation_history[n_steps_ago + 1]))
observation = ((1.0 - blend_alpha) * np.array(self._observation_history[n_steps_ago]) +
blend_alpha * np.array(self._observation_history[n_steps_ago + 1]))
return observation
def _GetPDObservation(self):
@@ -913,15 +872,13 @@ class Minitaur(object):
return (np.array(q), np.array(qdot))
def _GetControlObservation(self):
control_delayed_observation = self._GetDelayedObservation(
self._control_latency)
control_delayed_observation = self._GetDelayedObservation(self._control_latency)
return control_delayed_observation
def _AddSensorNoise(self, sensor_values, noise_stdev):
if noise_stdev <= 0:
return sensor_values
observation = sensor_values + np.random.normal(
scale=noise_stdev, size=sensor_values.shape)
observation = sensor_values + np.random.normal(scale=noise_stdev, size=sensor_values.shape)
return observation
def SetControlLatency(self, latency):

View File

@@ -3,10 +3,10 @@
"""
import math
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
from gym import spaces
import numpy as np
@@ -31,10 +31,7 @@ class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv):
expenditure.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 66
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 66}
def __init__(self,
urdf_version=None,
@@ -81,23 +78,23 @@ class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv):
# _swing_offset and _extension_offset is to mimick the bent legs.
self._swing_offset = np.zeros(NUM_LEGS)
self._extension_offset = np.zeros(NUM_LEGS)
super(MinitaurAlternatingLegsEnv, self).__init__(
urdf_version=urdf_version,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
hard_reset=False,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
super(MinitaurAlternatingLegsEnv,
self).__init__(urdf_version=urdf_version,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
hard_reset=False,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
action_dim = 8
action_high = np.array([0.1] * action_dim)
@@ -113,33 +110,29 @@ class MinitaurAlternatingLegsEnv(minitaur_gym_env.MinitaurGymEnv):
# [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
# extension leg 1, extension leg 2, extension leg 3, extension leg 4]
init_pose = [
INIT_SWING_POS + self._swing_offset[0],
INIT_SWING_POS + self._swing_offset[1],
INIT_SWING_POS + self._swing_offset[2],
INIT_SWING_POS + self._swing_offset[3],
INIT_SWING_POS + self._swing_offset[0], INIT_SWING_POS + self._swing_offset[1],
INIT_SWING_POS + self._swing_offset[2], INIT_SWING_POS + self._swing_offset[3],
INIT_EXTENSION_POS + self._extension_offset[0],
INIT_EXTENSION_POS + self._extension_offset[1],
INIT_EXTENSION_POS + self._extension_offset[2],
INIT_EXTENSION_POS + self._extension_offset[3]
]
initial_motor_angles = self._convert_from_leg_model(init_pose)
super(MinitaurAlternatingLegsEnv, self).reset(
initial_motor_angles=initial_motor_angles, reset_duration=0.5)
super(MinitaurAlternatingLegsEnv, self).reset(initial_motor_angles=initial_motor_angles,
reset_duration=0.5)
return self._get_observation()
def _convert_from_leg_model(self, leg_pose):
motor_pose = np.zeros(NUM_MOTORS)
for i in range(NUM_LEGS):
motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
motor_pose[2 * i
+ 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
motor_pose[2 * i + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
return motor_pose
def _signal(self, t):
initial_pose = np.array([
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS
])
amplitude = STEP_AMPLITUDE
period = STEP_PERIOD

View File

@@ -3,10 +3,10 @@
"""
import time
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import os
import numpy as np
@@ -50,13 +50,11 @@ def hand_tuned_agent(observation, timestamp):
pitch_compensation = pitch_gain * pitch + pitch_dot_gain * pitch_dot
first_leg = [
0, -pitch_compensation, -pitch_compensation, 0, 0,
-pitch_compensation - roll_compensation,
0, -pitch_compensation, -pitch_compensation, 0, 0, -pitch_compensation - roll_compensation,
pitch_compensation + roll_compensation, 0
]
second_leg = [
-pitch_compensation, 0, 0, -pitch_compensation,
pitch_compensation - roll_compensation, 0, 0,
-pitch_compensation, 0, 0, -pitch_compensation, pitch_compensation - roll_compensation, 0, 0,
-pitch_compensation + roll_compensation
]
if (timestamp // minitaur_alternating_legs_env.STEP_PERIOD) % 2:
@@ -94,8 +92,7 @@ def hand_tuned_balance_example(log_path=None):
for _ in range(steps):
# Sleep to prevent serial buffer overflow on microcontroller.
time.sleep(0.002)
action = hand_tuned_agent(observation,
environment.minitaur.GetTimeSinceReset())
action = hand_tuned_agent(observation, environment.minitaur.GetTimeSinceReset())
observation, reward, done, _ = environment.step(action)
sum_reward += reward
if done:

View File

@@ -4,10 +4,10 @@
import math
import random
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
from gym import spaces
import numpy as np
@@ -52,61 +52,52 @@ class MinitaurBallGymEnv(minitaur_gym_env.MinitaurGymEnv):
that its walking gait is clearer to visualize.
render: Whether to render the simulation.
"""
super(MinitaurBallGymEnv, self).__init__(
urdf_root=urdf_root,
self_collision_enabled=self_collision_enabled,
pd_control_enabled=pd_control_enabled,
leg_model_enabled=leg_model_enabled,
on_rack=on_rack,
render=render)
super(MinitaurBallGymEnv, self).__init__(urdf_root=urdf_root,
self_collision_enabled=self_collision_enabled,
pd_control_enabled=pd_control_enabled,
leg_model_enabled=leg_model_enabled,
on_rack=on_rack,
render=render)
self._cam_dist = 2.0
self._cam_yaw = -70
self._cam_pitch = -30
self.action_space = spaces.Box(np.array([-1]), np.array([1]))
self.observation_space = spaces.Box(np.array([-math.pi, 0]),
np.array([math.pi, 100]))
self.observation_space = spaces.Box(np.array([-math.pi, 0]), np.array([math.pi, 100]))
def reset(self):
self._ball_id = 0
super(MinitaurBallGymEnv, self).reset()
self._init_ball_theta = random.uniform(-INIT_BALL_ANGLE, INIT_BALL_ANGLE)
self._init_ball_distance = INIT_BALL_DISTANCE
self._ball_pos = [self._init_ball_distance *
math.cos(self._init_ball_theta),
self._init_ball_distance *
math.sin(self._init_ball_theta), 1]
self._ball_pos = [
self._init_ball_distance * math.cos(self._init_ball_theta),
self._init_ball_distance * math.sin(self._init_ball_theta), 1
]
self._ball_id = self._pybullet_client.loadURDF(
"%s/sphere_with_restitution.urdf" % self._urdf_root, self._ball_pos)
return self._get_observation()
def _get_observation(self):
world_translation_minitaur, world_rotation_minitaur = (
self._pybullet_client.getBasePositionAndOrientation(
self.minitaur.quadruped))
self._pybullet_client.getBasePositionAndOrientation(self.minitaur.quadruped))
world_translation_ball, world_rotation_ball = (
self._pybullet_client.getBasePositionAndOrientation(self._ball_id))
minitaur_translation_world, minitaur_rotation_world = (
self._pybullet_client.invertTransform(world_translation_minitaur,
world_rotation_minitaur))
minitaur_translation_ball, _ = (
self._pybullet_client.multiplyTransforms(minitaur_translation_world,
minitaur_rotation_world,
world_translation_ball,
world_rotation_ball))
distance = math.sqrt(minitaur_translation_ball[0]**2 +
minitaur_translation_ball[1]**2)
angle = math.atan2(minitaur_translation_ball[0],
minitaur_translation_ball[1])
minitaur_translation_world, minitaur_rotation_world = (self._pybullet_client.invertTransform(
world_translation_minitaur, world_rotation_minitaur))
minitaur_translation_ball, _ = (self._pybullet_client.multiplyTransforms(
minitaur_translation_world, minitaur_rotation_world, world_translation_ball,
world_rotation_ball))
distance = math.sqrt(minitaur_translation_ball[0]**2 + minitaur_translation_ball[1]**2)
angle = math.atan2(minitaur_translation_ball[0], minitaur_translation_ball[1])
self._observation = [angle - math.pi / 2, distance]
return self._observation
def _transform_action_to_motor_command(self, action):
if self._leg_model_enabled:
for i, action_component in enumerate(action):
if not (-self._action_bound - ACTION_EPS <=
action_component <= self._action_bound + ACTION_EPS):
raise ValueError("{}th action {} out of bounds.".format
(i, action_component))
if not (-self._action_bound - ACTION_EPS <= action_component <=
self._action_bound + ACTION_EPS):
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
action = self._apply_steering_to_locomotion(action)
action = self.minitaur.ConvertFromLegModel(action)
return action
@@ -126,15 +117,12 @@ class MinitaurBallGymEnv(minitaur_gym_env.MinitaurGymEnv):
return action
def _distance_to_ball(self):
world_translation_minitaur, _ = (
self._pybullet_client.getBasePositionAndOrientation(
self.minitaur.quadruped))
world_translation_ball, _ = (
self._pybullet_client.getBasePositionAndOrientation(
self._ball_id))
distance = math.sqrt(
(world_translation_ball[0] - world_translation_minitaur[0])**2 +
(world_translation_ball[1] - world_translation_minitaur[1])**2)
world_translation_minitaur, _ = (self._pybullet_client.getBasePositionAndOrientation(
self.minitaur.quadruped))
world_translation_ball, _ = (self._pybullet_client.getBasePositionAndOrientation(
self._ball_id))
distance = math.sqrt((world_translation_ball[0] - world_translation_minitaur[0])**2 +
(world_translation_ball[1] - world_translation_minitaur[1])**2)
return distance
def _goal_state(self):

View File

@@ -31,4 +31,4 @@ def main():
if __name__ == '__main__':
main()
main()

View File

@@ -6,13 +6,13 @@ It is the result of first pass system identification for the derpy robot. The
"""
import math
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import numpy as np
from pybullet_envs.minitaur.envs import minitaur
from pybullet_envs.minitaur.envs import minitaur
KNEE_CONSTRAINT_POINT_LONG = [0, 0.0055, 0.088]
KNEE_CONSTRAINT_POINT_SHORT = [0, 0.0055, 0.100]
@@ -46,13 +46,12 @@ class MinitaurDerpy(minitaur.Minitaur):
"%s/quadruped/minitaur_derpy.urdf" % self._urdf_root,
init_position,
useFixedBase=self._on_rack,
flags=(
self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_PARENT))
flags=(self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_PARENT))
else:
self.quadruped = self._pybullet_client.loadURDF(
"%s/quadruped/minitaur_derpy.urdf" % self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self.quadruped = self._pybullet_client.loadURDF("%s/quadruped/minitaur_derpy.urdf" %
self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self._BuildJointNameToIdDict()
self._BuildUrdfIds()
if self._remove_default_joint_damping:
@@ -62,10 +61,9 @@ class MinitaurDerpy(minitaur.Minitaur):
self._RecordInertiaInfoFromURDF()
self.ResetPose(add_constraint=True)
else:
self._pybullet_client.resetBasePositionAndOrientation(
self.quadruped, init_position, minitaur.INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
[0, 0, 0])
self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, init_position,
minitaur.INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
self.ResetPose(add_constraint=False)
self._overheat_counter = np.zeros(self.num_motors)
@@ -103,68 +101,60 @@ class MinitaurDerpy(minitaur.Minitaur):
knee_angle = -2.1834
leg_position = minitaur.LEG_POSITION[leg_id]
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
if add_constraint:
if leg_id < 2:
self._pybullet_client.createConstraint(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_SHORT, KNEE_CONSTRAINT_POINT_LONG)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_SHORT,
KNEE_CONSTRAINT_POINT_LONG)
else:
self._pybullet_client.createConstraint(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_LONG, KNEE_CONSTRAINT_POINT_SHORT)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_LONG,
KNEE_CONSTRAINT_POINT_SHORT)
if self._accurate_motor_model_enabled or self._pd_control_enabled:
# Disable the default motor in pybullet.
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
else:
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,

View File

@@ -32,10 +32,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
action to zero and the height of the robot base. It prefers a similar pose to
the signal while keeping balance.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 166
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 166}
def __init__(self,
urdf_version=None,
@@ -101,23 +98,23 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
self._extension_offset = np.zeros(NUM_LEGS)
self._use_angular_velocity_in_observation = use_motor_angle_in_observation
self._use_motor_angle_in_observation = use_motor_angle_in_observation
super(MinitaurFourLegStandEnv, self).__init__(
urdf_version=urdf_version,
control_time_step=control_time_step,
action_repeat=action_repeat,
remove_default_joint_damping=remove_default_joint_damping,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
hard_reset=hard_reset,
motor_kp=motor_kp,
motor_kd=motor_kd,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
env_randomizer=env_randomizer,
reflection = False,
log_path=log_path)
super(MinitaurFourLegStandEnv,
self).__init__(urdf_version=urdf_version,
control_time_step=control_time_step,
action_repeat=action_repeat,
remove_default_joint_damping=remove_default_joint_damping,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
hard_reset=hard_reset,
motor_kp=motor_kp,
motor_kd=motor_kd,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
env_randomizer=env_randomizer,
reflection=False,
log_path=log_path)
action_dim = 4
action_low = np.array([-1.0] * action_dim)
@@ -138,20 +135,17 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
# [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
# extension leg 1, extension leg 2, extension leg 3, extension leg 4]
init_pose = [
INIT_SWING_POS + self._swing_offset[0],
INIT_SWING_POS + self._swing_offset[1],
INIT_SWING_POS + self._swing_offset[2],
INIT_SWING_POS + self._swing_offset[3],
INIT_SWING_POS + self._swing_offset[0], INIT_SWING_POS + self._swing_offset[1],
INIT_SWING_POS + self._swing_offset[2], INIT_SWING_POS + self._swing_offset[3],
INIT_EXTENSION_POS + self._extension_offset[0],
INIT_EXTENSION_POS + self._extension_offset[1],
INIT_EXTENSION_POS + self._extension_offset[2],
INIT_EXTENSION_POS + self._extension_offset[3]
]
initial_motor_angles = self._convert_from_leg_model(init_pose)
self._pybullet_client.resetBasePositionAndOrientation(
0, [0, 0, 0], [0, 0, 0, 1])
super(MinitaurFourLegStandEnv, self).reset(
initial_motor_angles=initial_motor_angles, reset_duration=0.5)
self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], [0, 0, 0, 1])
super(MinitaurFourLegStandEnv, self).reset(initial_motor_angles=initial_motor_angles,
reset_duration=0.5)
return self._get_observation()
def step(self, action):
@@ -180,10 +174,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
time.sleep(time_to_sleep)
base_pos = self.minitaur.GetBasePosition()
# Keep the previous orientation of the camera set by the user.
[yaw, pitch,
dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch,
base_pos)
[yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
action = self._transform_action_to_motor_command(action)
t = self._env_step_counter % MOVING_FLOOR_TOTAL_STEP
if t == 0:
@@ -197,8 +189,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
t = float(float(t) / float(MOVING_FLOOR_TOTAL_STEP))
ori = map(operator.add, [x * (1.0 - t) for x in self._cur_ori],
[x * t for x in self._goal_ori])
ori=list(ori)
print("ori=",ori)
ori = list(ori)
print("ori=", ori)
self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], ori)
if self._env_step_counter % PERTURBATION_TOTAL_STEP == 0:
self._perturbation_magnitude = random.uniform(0.0, 0.0)
@@ -218,8 +210,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
obs = self._get_true_observation()
reward = self._reward()
if self._log_path is not None:
minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur,
action, self._env_step_counter)
minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur, action,
self._env_step_counter)
if done:
self.minitaur.Terminate()
return np.array(self._get_observation()), reward, done, {}
@@ -228,15 +220,13 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
motor_pose = np.zeros(NUM_MOTORS)
for i in range(NUM_LEGS):
motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
motor_pose[2 * i
+ 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
motor_pose[2 * i + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
return motor_pose
def _signal(self, t):
initial_pose = np.array([
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS
])
signal = initial_pose
return signal
@@ -268,8 +258,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
local_up = rot_mat[6:]
_, _, height = self.minitaur.GetBasePosition()
local_global_up_dot_product = np.dot(
np.asarray([0, 0, 1]), np.asarray(local_up))
local_global_up_dot_product = np.dot(np.asarray([0, 0, 1]), np.asarray(local_up))
return local_global_up_dot_product < 0.85 or height < 0.15
def _reward(self):

View File

@@ -5,7 +5,6 @@ import numpy as np
import tensorflow as tf
from pybullet_envs.minitaur.envs import minitaur_four_leg_stand_env
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("log_path", None, "The directory to write the log file.")
NUM_LEGS = 4

View File

@@ -4,10 +4,10 @@
import math
import time
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from gym import spaces
@@ -64,10 +64,7 @@ class MinitaurGymEnv(gym.Env):
expenditure.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 100
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 100}
def __init__(self,
urdf_root=pybullet_data.getDataPath(),
@@ -184,17 +181,14 @@ class MinitaurGymEnv(gym.Env):
self._action_repeat = 1
self.control_time_step = self._time_step * self._action_repeat
# TODO(b/73829334): Fix the value of self._num_bullet_solver_iterations.
self._num_bullet_solver_iterations = int(
NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
self._num_bullet_solver_iterations = int(NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
self._urdf_root = urdf_root
self._self_collision_enabled = self_collision_enabled
self._motor_velocity_limit = motor_velocity_limit
self._observation = []
self._true_observation = []
self._objectives = []
self._objective_weights = [
distance_weight, energy_weight, drift_weight, shake_weight
]
self._objective_weights = [distance_weight, energy_weight, drift_weight, shake_weight]
self._env_step_counter = 0
self._num_steps_to_log = num_steps_to_log
self._is_render = render
@@ -226,12 +220,10 @@ class MinitaurGymEnv(gym.Env):
self._urdf_version = urdf_version
self._ground_id = None
self._reflection = reflection
self._env_randomizers = convert_to_list(
env_randomizer) if env_randomizer else []
self._env_randomizers = convert_to_list(env_randomizer) if env_randomizer else []
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
if self._is_render:
self._pybullet_client = bullet_client.BulletClient(
connection_mode=pybullet.GUI)
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._pybullet_client = bullet_client.BulletClient()
if self._urdf_version is None:
@@ -249,7 +241,7 @@ class MinitaurGymEnv(gym.Env):
self._hard_reset = hard_reset # This assignment need to be after reset()
def close(self):
if self._env_step_counter>0:
if self._env_step_counter > 0:
self.logging.save_episode(self._episode_proto)
self.minitaur.Terminate()
@@ -257,53 +249,48 @@ class MinitaurGymEnv(gym.Env):
self._env_randomizers.append(env_randomizer)
def reset(self, initial_motor_angles=None, reset_duration=1.0):
self._pybullet_client.configureDebugVisualizer(
self._pybullet_client.COV_ENABLE_RENDERING, 0)
if self._env_step_counter>0:
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_RENDERING, 0)
if self._env_step_counter > 0:
self.logging.save_episode(self._episode_proto)
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
minitaur_logging.preallocate_episode_proto(self._episode_proto,
self._num_steps_to_log)
minitaur_logging.preallocate_episode_proto(self._episode_proto, self._num_steps_to_log)
if self._hard_reset:
self._pybullet_client.resetSimulation()
self._pybullet_client.setPhysicsEngineParameter(
numSolverIterations=int(self._num_bullet_solver_iterations))
self._pybullet_client.setTimeStep(self._time_step)
self._ground_id = self._pybullet_client.loadURDF(
"%s/plane.urdf" % self._urdf_root)
self._ground_id = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
if (self._reflection):
self._pybullet_client.changeVisualShape(self._ground_id,-1,rgbaColor=[1,1,1,0.8])
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,self._ground_id)
self._pybullet_client.changeVisualShape(self._ground_id, -1, rgbaColor=[1, 1, 1, 0.8])
self._pybullet_client.configureDebugVisualizer(
self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION, self._ground_id)
self._pybullet_client.setGravity(0, 0, -10)
acc_motor = self._accurate_motor_model_enabled
motor_protect = self._motor_overheat_protection
if self._urdf_version not in MINIATUR_URDF_VERSION_MAP:
raise ValueError(
"%s is not a supported urdf_version." % self._urdf_version)
raise ValueError("%s is not a supported urdf_version." % self._urdf_version)
else:
self.minitaur = (
MINIATUR_URDF_VERSION_MAP[self._urdf_version](
pybullet_client=self._pybullet_client,
action_repeat=self._action_repeat,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
remove_default_joint_damping=self._remove_default_joint_damping,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
control_latency=self._control_latency,
pd_latency=self._pd_latency,
observation_noise_stdev=self._observation_noise_stdev,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack))
self.minitaur.Reset(
reload_urdf=False,
default_motor_angles=initial_motor_angles,
reset_time=reset_duration)
self.minitaur = (MINIATUR_URDF_VERSION_MAP[self._urdf_version](
pybullet_client=self._pybullet_client,
action_repeat=self._action_repeat,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
remove_default_joint_damping=self._remove_default_joint_damping,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
control_latency=self._control_latency,
pd_latency=self._pd_latency,
observation_noise_stdev=self._observation_noise_stdev,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack))
self.minitaur.Reset(reload_urdf=False,
default_motor_angles=initial_motor_angles,
reset_time=reset_duration)
# Loop over all env randomizers.
for env_randomizer in self._env_randomizers:
@@ -313,10 +300,9 @@ class MinitaurGymEnv(gym.Env):
self._env_step_counter = 0
self._last_base_position = [0, 0, 0]
self._objectives = []
self._pybullet_client.resetDebugVisualizerCamera(
self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
self._pybullet_client.configureDebugVisualizer(
self._pybullet_client.COV_ENABLE_RENDERING, 1)
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
self._cam_pitch, [0, 0, 0])
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_RENDERING, 1)
return self._get_observation()
def seed(self, seed=None):
@@ -328,8 +314,7 @@ class MinitaurGymEnv(gym.Env):
for i, action_component in enumerate(action):
if not (-self._action_bound - ACTION_EPS <= action_component <=
self._action_bound + ACTION_EPS):
raise ValueError("{}th action {} out of bounds.".format(
i, action_component))
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
action = self.minitaur.ConvertFromLegModel(action)
return action
@@ -361,10 +346,8 @@ class MinitaurGymEnv(gym.Env):
time.sleep(time_to_sleep)
base_pos = self.minitaur.GetBasePosition()
# Keep the previous orientation of the camera set by the user.
[yaw, pitch,
dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch,
base_pos)
[yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
for env_randomizer in self._env_randomizers:
env_randomizer.randomize_step(self)
@@ -374,8 +357,8 @@ class MinitaurGymEnv(gym.Env):
reward = self._reward()
done = self._termination()
if self._log_path is not None:
minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur,
action, self._env_step_counter)
minitaur_logging.update_episode_proto(self._episode_proto, self.minitaur, action,
self._env_step_counter)
self._env_step_counter += 1
if done:
self.minitaur.Terminate()
@@ -392,11 +375,11 @@ class MinitaurGymEnv(gym.Env):
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
fov=60,
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) /
RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._pybullet_client.getCameraImage(
width=RENDER_WIDTH,
height=RENDER_HEIGHT,
@@ -413,9 +396,8 @@ class MinitaurGymEnv(gym.Env):
Returns:
A numpy array of motor angles.
"""
return np.array(
self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_velocities(self):
"""Get the minitaur's motor velocities.
@@ -424,8 +406,8 @@ class MinitaurGymEnv(gym.Env):
A numpy array of motor velocities.
"""
return np.array(
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_torques(self):
"""Get the minitaur's motor torques.
@@ -434,8 +416,8 @@ class MinitaurGymEnv(gym.Env):
A numpy array of motor torques.
"""
return np.array(
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_base_orientation(self):
"""Get the minitaur's base orientation, represented by a quaternion.
@@ -459,8 +441,7 @@ class MinitaurGymEnv(gym.Env):
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
local_up = rot_mat[6:]
pos = self.minitaur.GetBasePosition()
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
pos[2] < 0.13)
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)
def _termination(self):
position = self.minitaur.GetBasePosition()
@@ -483,9 +464,7 @@ class MinitaurGymEnv(gym.Env):
np.dot(self.minitaur.GetMotorTorques(),
self.minitaur.GetMotorVelocities())) * self._time_step
objectives = [forward_reward, energy_reward, drift_reward, shake_reward]
weighted_objectives = [
o * w for o, w in zip(objectives, self._objective_weights)
]
weighted_objectives = [o * w for o, w in zip(objectives, self._objective_weights)]
reward = sum(weighted_objectives)
self._objectives.append(objectives)
return reward
@@ -552,10 +531,8 @@ class MinitaurGymEnv(gym.Env):
upper_bound = np.zeros(self._get_observation_dimension())
num_motors = self.minitaur.num_motors
upper_bound[0:num_motors] = math.pi # Joint angle.
upper_bound[num_motors:2 * num_motors] = (
motor.MOTOR_SPEED_LIMIT) # Joint velocity.
upper_bound[2 * num_motors:3 * num_motors] = (
motor.OBSERVED_TORQUE_LIMIT) # Joint torque.
upper_bound[num_motors:2 * num_motors] = (motor.MOTOR_SPEED_LIMIT) # Joint velocity.
upper_bound[2 * num_motors:3 * num_motors] = (motor.OBSERVED_TORQUE_LIMIT) # Joint torque.
upper_bound[3 * num_motors:] = 1.0 # Quaternion of base orientation.
return upper_bound
@@ -577,7 +554,6 @@ class MinitaurGymEnv(gym.Env):
_seed = seed
_step = step
def set_time_step(self, control_step, simulation_step=0.001):
"""Sets the time step of the environment.
@@ -591,18 +567,15 @@ class MinitaurGymEnv(gym.Env):
ValueError: If the control step is smaller than the simulation step.
"""
if control_step < simulation_step:
raise ValueError(
"Control step should be larger than or equal to simulation step.")
raise ValueError("Control step should be larger than or equal to simulation step.")
self.control_time_step = control_step
self._time_step = simulation_step
self._action_repeat = int(round(control_step / simulation_step))
self._num_bullet_solver_iterations = (
NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
self._num_bullet_solver_iterations = (NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
self._pybullet_client.setPhysicsEngineParameter(
numSolverIterations=self._num_bullet_solver_iterations)
self._pybullet_client.setTimeStep(self._time_step)
self.minitaur.SetTimeSteps(
action_repeat=self._action_repeat, simulation_step=self._time_step)
self.minitaur.SetTimeSteps(action_repeat=self._action_repeat, simulation_step=self._time_step)
@property
def pybullet_client(self):

View File

@@ -8,8 +8,8 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(os.path.dirname(currentdir)))
print("parentdir=",parentdir)
os.sys.path.insert(0,parentdir)
print("parentdir=", parentdir)
os.sys.path.insert(0, parentdir)
import argparse
import numpy as np
@@ -17,7 +17,6 @@ import tensorflow as tf
from pybullet_envs.minitaur.envs import minitaur_gym_env
import time
#FLAGS = flags.FLAGS
#flags.DEFINE_enum(
# "example_name", "sine", ["sine", "reset", "stand", "overheat"],
@@ -60,7 +59,7 @@ def ResetPoseExample(log_path=None):
action = [math.pi / 2] * 8
for _ in range(steps):
_, _, done, _ = environment.step(action)
time.sleep(1./100.)
time.sleep(1. / 100.)
if done:
break
@@ -104,7 +103,7 @@ def MotorOverheatExample(log_path=None):
observation, _, _, _ = environment.step(action)
current_row.extend(observation.tolist())
actions_and_observations.append(current_row)
time.sleep(1./100.)
time.sleep(1. / 100.)
if FLAGS.output_filename is not None:
WriteToCSV(FLAGS.output_filename, actions_and_observations)
@@ -151,7 +150,7 @@ def SineStandExample(log_path=None):
observation, _, _, _ = environment.step(action)
current_row.extend(observation.tolist())
actions_and_observations.append(current_row)
time.sleep(1./100.)
time.sleep(1. / 100.)
if FLAGS.output_filename is not None:
WriteToCSV(FLAGS.output_filename, actions_and_observations)
@@ -199,7 +198,7 @@ def SinePolicyExample(log_path=None):
a4 = math.sin(t * speed + math.pi) * amplitude2
action = [a1, a2, a2, a1, a3, a4, a4, a3]
_, reward, done, _ = environment.step(action)
time.sleep(1./100.)
time.sleep(1. / 100.)
sum_reward += reward
if done:
@@ -207,14 +206,15 @@ def SinePolicyExample(log_path=None):
environment.reset()
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0)
parser.add_argument('--env',
help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',
type=int,
default=0)
args = parser.parse_args()
print("--env=" + str(args.env))
if (args.env == 0):
SinePolicyExample()
if (args.env == 1):
@@ -224,6 +224,6 @@ def main():
if (args.env == 3):
MotorOverheatExample()
if __name__ == '__main__':
main()
if __name__ == '__main__':
main()

View File

@@ -10,10 +10,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import datetime
import os
@@ -74,9 +74,8 @@ def update_episode_proto(episode_proto, minitaur, action, step):
"""
max_num_steps = len(episode_proto.state_action)
if step >= max_num_steps:
tf.logging.warning(
"{}th step is not recorded in the logging since only {} steps were "
"pre-allocated.".format(step, max_num_steps))
tf.logging.warning("{}th step is not recorded in the logging since only {} steps were "
"pre-allocated.".format(step, max_num_steps))
return
step_log = episode_proto.state_action[step]
step_log.info_valid = minitaur.IsObservationValid()
@@ -95,8 +94,7 @@ def update_episode_proto(episode_proto, minitaur, action, step):
_update_base_state(step_log.base_position, minitaur.GetBasePosition())
_update_base_state(step_log.base_orientation, minitaur.GetBaseRollPitchYaw())
_update_base_state(step_log.base_angular_vel,
minitaur.GetBaseRollPitchYawRate())
_update_base_state(step_log.base_angular_vel, minitaur.GetBaseRollPitchYawRate())
class MinitaurLogging(object):
@@ -124,10 +122,8 @@ class MinitaurLogging(object):
if not tf.gfile.Exists(self._log_path):
tf.gfile.MakeDirs(self._log_path)
ts = time.time()
time_stamp = datetime.datetime.fromtimestamp(ts).strftime(
"%Y-%m-%d-%H%M%S")
log_path = os.path.join(self._log_path,
"minitaur_log_{}".format(time_stamp))
time_stamp = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d-%H%M%S")
log_path = os.path.join(self._log_path, "minitaur_log_{}".format(time_stamp))
with tf.gfile.Open(log_path, "w") as f:
f.write(episode_proto.SerializeToString())
return log_path

View File

@@ -3,12 +3,12 @@
import sys
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
@@ -18,168 +18,271 @@ from google.protobuf import descriptor_pb2
_sym_db = _symbol_database.Default()
from pybullet_envs.minitaur.envs import timestamp_pb2 as timestamp__pb2
from pybullet_envs.minitaur.envs import vector_pb2 as vector__pb2
DESCRIPTOR = _descriptor.FileDescriptor(
name='minitaur_logging.proto',
package='robotics.reinforcement_learning.minitaur.envs',
syntax='proto3',
serialized_pb=_b('\n\x16minitaur_logging.proto\x12-robotics.reinforcement_learning.minitaur.envs\x1a\x0ftimestamp.proto\x1a\x0cvector.proto\"k\n\x0fMinitaurEpisode\x12X\n\x0cstate_action\x18\x01 \x03(\x0b\x32\x42.robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction\"U\n\x12MinitaurMotorState\x12\r\n\x05\x61ngle\x18\x01 \x01(\x01\x12\x10\n\x08velocity\x18\x02 \x01(\x01\x12\x0e\n\x06torque\x18\x03 \x01(\x01\x12\x0e\n\x06\x61\x63tion\x18\x04 \x01(\x01\"\xce\x02\n\x13MinitaurStateAction\x12\x12\n\ninfo_valid\x18\x06 \x01(\x08\x12(\n\x04time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x32\n\rbase_position\x18\x02 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12\x35\n\x10\x62\x61se_orientation\x18\x03 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12\x35\n\x10\x62\x61se_angular_vel\x18\x04 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12W\n\x0cmotor_states\x18\x05 \x03(\x0b\x32\x41.robotics.reinforcement_learning.minitaur.envs.MinitaurMotorStateb\x06proto3')
,
dependencies=[timestamp__pb2.DESCRIPTOR,vector__pb2.DESCRIPTOR,])
name='minitaur_logging.proto',
package='robotics.reinforcement_learning.minitaur.envs',
syntax='proto3',
serialized_pb=_b(
'\n\x16minitaur_logging.proto\x12-robotics.reinforcement_learning.minitaur.envs\x1a\x0ftimestamp.proto\x1a\x0cvector.proto\"k\n\x0fMinitaurEpisode\x12X\n\x0cstate_action\x18\x01 \x03(\x0b\x32\x42.robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction\"U\n\x12MinitaurMotorState\x12\r\n\x05\x61ngle\x18\x01 \x01(\x01\x12\x10\n\x08velocity\x18\x02 \x01(\x01\x12\x0e\n\x06torque\x18\x03 \x01(\x01\x12\x0e\n\x06\x61\x63tion\x18\x04 \x01(\x01\"\xce\x02\n\x13MinitaurStateAction\x12\x12\n\ninfo_valid\x18\x06 \x01(\x08\x12(\n\x04time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x32\n\rbase_position\x18\x02 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12\x35\n\x10\x62\x61se_orientation\x18\x03 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12\x35\n\x10\x62\x61se_angular_vel\x18\x04 \x01(\x0b\x32\x1b.robotics.messages.Vector3d\x12W\n\x0cmotor_states\x18\x05 \x03(\x0b\x32\x41.robotics.reinforcement_learning.minitaur.envs.MinitaurMotorStateb\x06proto3'
),
dependencies=[
timestamp__pb2.DESCRIPTOR,
vector__pb2.DESCRIPTOR,
])
_MINITAUREPISODE = _descriptor.Descriptor(
name='MinitaurEpisode',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='state_action', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode.state_action', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=104,
serialized_end=211,
name='MinitaurEpisode',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='state_action',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode.state_action',
index=0,
number=1,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=104,
serialized_end=211,
)
_MINITAURMOTORSTATE = _descriptor.Descriptor(
name='MinitaurMotorState',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='angle', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.angle', index=0,
number=1, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='velocity', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.velocity', index=1,
number=2, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='torque', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.torque', index=2,
number=3, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.action', index=3,
number=4, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=213,
serialized_end=298,
name='MinitaurMotorState',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='angle',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.angle',
index=0,
number=1,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='velocity',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.velocity',
index=1,
number=2,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='torque',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.torque',
index=2,
number=3,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState.action',
index=3,
number=4,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=213,
serialized_end=298,
)
_MINITAURSTATEACTION = _descriptor.Descriptor(
name='MinitaurStateAction',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='info_valid', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.info_valid', index=0,
number=6, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='time', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.time', index=1,
number=1, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_position', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_position', index=2,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_orientation', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_orientation', index=3,
number=3, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_angular_vel', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_angular_vel', index=4,
number=4, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='motor_states', full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.motor_states', index=5,
number=5, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=301,
serialized_end=635,
name='MinitaurStateAction',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='info_valid',
full_name=
'robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.info_valid',
index=0,
number=6,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='time',
full_name='robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.time',
index=1,
number=1,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_position',
full_name=
'robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_position',
index=2,
number=2,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_orientation',
full_name=
'robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_orientation',
index=3,
number=3,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='base_angular_vel',
full_name=
'robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.base_angular_vel',
index=4,
number=4,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='motor_states',
full_name=
'robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction.motor_states',
index=5,
number=5,
type=11,
cpp_type=10,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=301,
serialized_end=635,
)
_MINITAUREPISODE.fields_by_name['state_action'].message_type = _MINITAURSTATEACTION
@@ -193,26 +296,34 @@ DESCRIPTOR.message_types_by_name['MinitaurMotorState'] = _MINITAURMOTORSTATE
DESCRIPTOR.message_types_by_name['MinitaurStateAction'] = _MINITAURSTATEACTION
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
MinitaurEpisode = _reflection.GeneratedProtocolMessageType('MinitaurEpisode', (_message.Message,), dict(
DESCRIPTOR = _MINITAUREPISODE,
__module__ = 'minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode)
))
MinitaurEpisode = _reflection.GeneratedProtocolMessageType(
'MinitaurEpisode',
(_message.Message,),
dict(
DESCRIPTOR=_MINITAUREPISODE,
__module__='minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurEpisode)
))
_sym_db.RegisterMessage(MinitaurEpisode)
MinitaurMotorState = _reflection.GeneratedProtocolMessageType('MinitaurMotorState', (_message.Message,), dict(
DESCRIPTOR = _MINITAURMOTORSTATE,
__module__ = 'minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState)
))
MinitaurMotorState = _reflection.GeneratedProtocolMessageType(
'MinitaurMotorState',
(_message.Message,),
dict(
DESCRIPTOR=_MINITAURMOTORSTATE,
__module__='minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurMotorState)
))
_sym_db.RegisterMessage(MinitaurMotorState)
MinitaurStateAction = _reflection.GeneratedProtocolMessageType('MinitaurStateAction', (_message.Message,), dict(
DESCRIPTOR = _MINITAURSTATEACTION,
__module__ = 'minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction)
))
MinitaurStateAction = _reflection.GeneratedProtocolMessageType(
'MinitaurStateAction',
(_message.Message,),
dict(
DESCRIPTOR=_MINITAURSTATEACTION,
__module__='minitaur_logging_pb2'
# @@protoc_insertion_point(class_scope:robotics.reinforcement_learning.minitaur.envs.MinitaurStateAction)
))
_sym_db.RegisterMessage(MinitaurStateAction)
# @@protoc_insertion_point(module_scope)

View File

@@ -15,13 +15,13 @@ fields = episode.ListFields()
recs = []
for rec in fields[0][1]:
#print(rec.time)
for motorState in rec.motor_states:
#print("motorState.angle=",motorState.angle)
#print("motorState.velocity=",motorState.velocity)
#print("motorState.action=",motorState.action)
#print("motorState.torque=",motorState.torque)
recs.append([motorState.angle,motorState.velocity,motorState.action,motorState.torque])
#print(rec.time)
for motorState in rec.motor_states:
#print("motorState.angle=",motorState.angle)
#print("motorState.velocity=",motorState.velocity)
#print("motorState.action=",motorState.action)
#print("motorState.torque=",motorState.torque)
recs.append([motorState.angle, motorState.velocity, motorState.action, motorState.torque])
a = numpy.array(recs)
numpy.savetxt("motorq_qdot_action_torque.csv", a, delimiter=",")
numpy.savetxt("motorq_qdot_action_torque.csv", a, delimiter=",")

View File

@@ -17,8 +17,8 @@ DIAGONAL_LEG_PAIR_2 = (1, 2)
class BehaviorParameters(
collections.namedtuple("BehaviorParameters", [
"stance_duration", "desired_forward_speed", "turning_speed",
"standing_height", "desired_incline_angle"
"stance_duration", "desired_forward_speed", "turning_speed", "standing_height",
"desired_incline_angle"
])):
__slots__ = ()
@@ -28,18 +28,16 @@ class BehaviorParameters(
turning_speed=0,
standing_height=0.21,
desired_incline_angle=0):
return super(BehaviorParameters, cls).__new__(
cls, stance_duration, desired_forward_speed, turning_speed,
standing_height, desired_incline_angle)
return super(BehaviorParameters,
cls).__new__(cls, stance_duration, desired_forward_speed, turning_speed,
standing_height, desired_incline_angle)
def motor_angles_to_leg_pose(motor_angles):
leg_pose = np.zeros(_NUM_MOTORS)
for i in range(_NUM_LEGS):
leg_pose[i] = 0.5 * (-1)**(i // 2) * (
motor_angles[2 * i + 1] - motor_angles[2 * i])
leg_pose[_NUM_LEGS + i] = 0.5 * (
motor_angles[2 * i] + motor_angles[2 * i + 1])
leg_pose[i] = 0.5 * (-1)**(i // 2) * (motor_angles[2 * i + 1] - motor_angles[2 * i])
leg_pose[_NUM_LEGS + i] = 0.5 * (motor_angles[2 * i] + motor_angles[2 * i + 1])
return leg_pose
@@ -47,8 +45,7 @@ def leg_pose_to_motor_angles(leg_pose):
motor_pose = np.zeros(_NUM_MOTORS)
for i in range(_NUM_LEGS):
motor_pose[2 * i] = leg_pose[_NUM_LEGS + i] - (-1)**(i // 2) * leg_pose[i]
motor_pose[2 * i + 1] = (
leg_pose[_NUM_LEGS + i] + (-1)**(i // 2) * leg_pose[i])
motor_pose[2 * i + 1] = (leg_pose[_NUM_LEGS + i] + (-1)**(i // 2) * leg_pose[i])
return motor_pose
@@ -85,8 +82,7 @@ def foot_position_to_leg_pose(foot_position):
ext = math.acos(cos_ext)
hip_toe = math.sqrt(hip_toe_sqr)
cos_theta = (hip_toe_sqr + hip_ankle_sqr -
(l3 - l2)**2) / (2 * hip_ankle * hip_toe)
cos_theta = (hip_toe_sqr + hip_ankle_sqr - (l3 - l2)**2) / (2 * hip_ankle * hip_toe)
assert cos_theta > 0
theta = math.acos(cos_theta)
@@ -94,8 +90,7 @@ def foot_position_to_leg_pose(foot_position):
return (-sw, ext)
def foot_horizontal_position_to_leg_swing(foot_horizontal_position,
leg_extension):
def foot_horizontal_position_to_leg_swing(foot_horizontal_position, leg_extension):
"""Computes the target leg swing.
Sometimes it is more convenient to plan in the hybrid space.
@@ -119,8 +114,7 @@ def foot_horizontal_position_to_leg_swing(foot_horizontal_position,
# Cap the foot horizontal (projected) position so the target leg pose is
# always feasible.
foot_position = max(
min(toe_hip_len * 0.8, foot_horizontal_position), -toe_hip_len * 0.5)
foot_position = max(min(toe_hip_len * 0.8, foot_horizontal_position), -toe_hip_len * 0.5)
sw_and_theta = math.asin(foot_position / toe_hip_len)
@@ -171,9 +165,7 @@ def generate_swing_trajectory(phase, init_pose, end_pose):
b = (phi * phi * delta_2 - delta_1) / delta_p
delta = (
a * normalized_phase * normalized_phase + b * normalized_phase +
init_delta)
delta = (a * normalized_phase * normalized_phase + b * normalized_phase + init_delta)
l1 = _UPPER_LEG_LEN
l2 = _LOWER_SHORT_LEG_LEN
@@ -209,10 +201,9 @@ class RaibertSwingLegController(object):
leg_pose_set = []
for i in raibiert_controller.swing_set:
target_foot_horizontal_position = (
raibiert_controller.behavior_parameters.stance_duration / 2 *
current_speed + self._speed_gain *
(current_speed -
raibiert_controller.behavior_parameters.desired_forward_speed))
raibiert_controller.behavior_parameters.stance_duration / 2 * current_speed +
self._speed_gain *
(current_speed - raibiert_controller.behavior_parameters.desired_forward_speed))
# Use the swing phase [0, 1] to track the foot. The idea is
# straightforward:
@@ -221,19 +212,19 @@ class RaibertSwingLegController(object):
# 3) Find the next leg pose on the curve based on how much time left.
# 1) Convert the target foot
target_leg_extension = (
raibiert_controller.nominal_leg_extension -
self._leg_extension_clearance)
target_leg_swing = foot_horizontal_position_to_leg_swing(
target_foot_horizontal_position, leg_extension=target_leg_extension)
target_leg_extension = (raibiert_controller.nominal_leg_extension -
self._leg_extension_clearance)
target_leg_swing = foot_horizontal_position_to_leg_swing(target_foot_horizontal_position,
leg_extension=target_leg_extension)
target_leg_pose = (target_leg_swing, target_leg_extension)
# 2) Generates the curve from the current leg pose to the target leg pose.
# and Find the next leg pose on the curve based on current swing phase.
desired_leg_pose = self._leg_trajectory_generator(
phase, raibiert_controller.swing_start_leg_pose, target_leg_pose)
desired_leg_pose = self._leg_trajectory_generator(phase,
raibiert_controller.swing_start_leg_pose,
target_leg_pose)
leg_pose_set.append(desired_leg_pose)
@@ -244,9 +235,7 @@ class RaibertSwingLegController(object):
class RaibertStanceLegController(object):
def __init__(self,
speed_gain=0.1,
leg_trajectory_generator=generate_stance_trajectory):
def __init__(self, speed_gain=0.1, leg_trajectory_generator=generate_stance_trajectory):
self._speed_gain = speed_gain
self._leg_trajectory_generator = leg_trajectory_generator
@@ -255,20 +244,18 @@ class RaibertStanceLegController(object):
current_speed = raibiert_controller.estimate_base_velocity()
leg_pose_set = []
for i in raibiert_controller.stance_set:
desired_forward_speed = (
raibiert_controller.behavior_parameters.desired_forward_speed)
target_foot_position = -(
raibiert_controller.behavior_parameters.stance_duration / 2 *
current_speed - self._speed_gain *
(current_speed - desired_forward_speed))
desired_forward_speed = (raibiert_controller.behavior_parameters.desired_forward_speed)
target_foot_position = -(raibiert_controller.behavior_parameters.stance_duration / 2 *
current_speed - self._speed_gain *
(current_speed - desired_forward_speed))
target_leg_pose = (foot_horizontal_position_to_leg_swing(
target_foot_position,
leg_extension=raibiert_controller.nominal_leg_extension),
target_foot_position, leg_extension=raibiert_controller.nominal_leg_extension),
raibiert_controller.nominal_leg_extension)
desired_leg_pose = self._leg_trajectory_generator(
phase, raibiert_controller.stance_start_leg_pose, target_leg_pose)
desired_leg_pose = self._leg_trajectory_generator(phase,
raibiert_controller.stance_start_leg_pose,
target_leg_pose)
leg_pose_set.append(desired_leg_pose)
@@ -288,8 +275,7 @@ class MinitaurRaibertTrottingController(object):
self._robot = robot
self._behavior_parameters = behavior_parameters
nominal_leg_pose = foot_position_to_leg_pose(
(0, -self._behavior_parameters.standing_height))
nominal_leg_pose = foot_position_to_leg_pose((0, -self._behavior_parameters.standing_height))
self._nominal_leg_extension = nominal_leg_pose[1]
self._swing_leg_controller = swing_leg_controller
@@ -337,8 +323,7 @@ class MinitaurRaibertTrottingController(object):
# extract the swing leg pose from the current_leg_pose
leg_pose = []
for index in leg_indices:
leg_pose.append(
[current_leg_pose[index], current_leg_pose[index + _NUM_LEGS]])
leg_pose.append([current_leg_pose[index], current_leg_pose[index + _NUM_LEGS]])
leg_pose = np.array(leg_pose)
return np.mean(leg_pose, axis=0)
@@ -353,13 +338,13 @@ class MinitaurRaibertTrottingController(object):
def get_phase(self):
"""Compute the current stance/swing phase."""
return math.fmod(self._time, self._behavior_parameters.stance_duration
) / self._behavior_parameters.stance_duration
return math.fmod(
self._time,
self._behavior_parameters.stance_duration) / self._behavior_parameters.stance_duration
def update_swing_stance_set(self):
"""Switch the set of swing/stance legs based on timing."""
swing_stance_phase = math.fmod(
self._time, 2 * self._behavior_parameters.stance_duration)
swing_stance_phase = math.fmod(self._time, 2 * self._behavior_parameters.stance_duration)
if swing_stance_phase < self._behavior_parameters.stance_duration:
return (DIAGONAL_LEG_PAIR_1, DIAGONAL_LEG_PAIR_2)
return (DIAGONAL_LEG_PAIR_2, DIAGONAL_LEG_PAIR_1)
@@ -387,8 +372,8 @@ class MinitaurRaibertTrottingController(object):
toe_hip_len = math.sqrt(x**2 + y**2)
horizontal_dist = toe_hip_len * delta_sw
phase = self.get_phase()
speed = 0 if phase < 0.1 else horizontal_dist / (
self._behavior_parameters.stance_duration * phase)
speed = 0 if phase < 0.1 else horizontal_dist / (self._behavior_parameters.stance_duration *
phase)
return speed
def get_swing_leg_action(self):

View File

@@ -1,6 +1,5 @@
#The example to run the raibert controller in a Minitaur gym env.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@@ -9,8 +8,7 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(os.path.dirname(currentdir)))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import tensorflow as tf
from pybullet_envs.minitaur.envs import minitaur_raibert_controller
@@ -21,9 +19,8 @@ FLAGS = tf.app.flags.FLAGS
flags.DEFINE_float("motor_kp", 1.0, "The position gain of the motor.")
flags.DEFINE_float("motor_kd", 0.015, "The speed gain of the motor.")
flags.DEFINE_float(
"control_latency", 0.02, "The latency between sensor measurement and action"
" execution the robot.")
flags.DEFINE_float("control_latency", 0.02, "The latency between sensor measurement and action"
" execution the robot.")
flags.DEFINE_string("log_path", None, "The directory to write the log file.")
@@ -55,20 +52,18 @@ def main(argv):
log_path=FLAGS.log_path)
env.reset()
controller = minitaur_raibert_controller.MinitaurRaibertTrottingController(
env.minitaur)
controller = minitaur_raibert_controller.MinitaurRaibertTrottingController(env.minitaur)
tstart = env.minitaur.GetTimeSinceReset()
for _ in range(1000):
t = env.minitaur.GetTimeSinceReset() - tstart
controller.behavior_parameters = (
minitaur_raibert_controller.BehaviorParameters(
desired_forward_speed=speed(t)))
controller.behavior_parameters = (minitaur_raibert_controller.BehaviorParameters(
desired_forward_speed=speed(t)))
controller.update(t)
env.step(controller.get_action())
finally:
env.close()
if __name__ == "__main__":
tf.app.run(main)

View File

@@ -4,10 +4,10 @@ It is the result of first pass system identification for the rainbow dash robot.
"""
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
@@ -46,13 +46,12 @@ class MinitaurRainbowDash(minitaur.Minitaur):
"%s/quadruped/minitaur_rainbow_dash.urdf" % self._urdf_root,
init_position,
useFixedBase=self._on_rack,
flags=(
self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_PARENT))
flags=(self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_PARENT))
else:
self.quadruped = self._pybullet_client.loadURDF(
"%s/quadruped/minitaur_rainbow_dash.urdf" % self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self.quadruped = self._pybullet_client.loadURDF("%s/quadruped/minitaur_rainbow_dash.urdf" %
self._urdf_root,
init_position,
useFixedBase=self._on_rack)
self._BuildJointNameToIdDict()
self._BuildUrdfIds()
if self._remove_default_joint_damping:
@@ -62,10 +61,9 @@ class MinitaurRainbowDash(minitaur.Minitaur):
self._RecordInertiaInfoFromURDF()
self.ResetPose(add_constraint=True)
else:
self._pybullet_client.resetBasePositionAndOrientation(
self.quadruped, init_position, minitaur.INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
[0, 0, 0])
self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, init_position,
minitaur.INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
self.ResetPose(add_constraint=False)
self._overheat_counter = np.zeros(self.num_motors)
@@ -103,68 +101,60 @@ class MinitaurRainbowDash(minitaur.Minitaur):
knee_angle = -2.1834
leg_position = minitaur.LEG_POSITION[leg_id]
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
if add_constraint:
if leg_id < 2:
self._pybullet_client.createConstraint(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_SHORT, KNEE_CONSTRAINT_POINT_LONG)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_SHORT,
KNEE_CONSTRAINT_POINT_LONG)
else:
self._pybullet_client.createConstraint(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_LONG, KNEE_CONSTRAINT_POINT_SHORT)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_joint"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_joint"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_LONG,
KNEE_CONSTRAINT_POINT_SHORT)
if self._accurate_motor_model_enabled or self._pd_control_enabled:
# Disable the default motor in pybullet.
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(
self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
else:
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id + 1] * half_pi)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,

View File

@@ -42,20 +42,17 @@ class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv):
fileName=terrain_file_name,
flags=1,
meshScale=[0.5, 0.5, 0.5])
self._pybullet_client.createMultiBody(terrain_mass,
terrain_collision_shape_id,
terrain_visual_shape_id,
terrain_position,
self._pybullet_client.createMultiBody(terrain_mass, terrain_collision_shape_id,
terrain_visual_shape_id, terrain_position,
terrain_orientation)
self._pybullet_client.setGravity(0, 0, -10)
self.minitaur = (minitaur.Minitaur(
pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
on_rack=self._on_rack))
self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
on_rack=self._on_rack))
self._last_base_position = [0, 0, 0]
for _ in xrange(100):
if self._pd_control_enabled:
@@ -78,6 +75,5 @@ class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv):
asset_source = os.path.join(terrain_dir, terrain_file_name)
asset_destination = os.path.join(FLAGS.storage_dir, terrain_file_name)
gfile.Copy(asset_source, asset_destination, overwrite=True)
terrain_file_name_complete = os.path.join(FLAGS.storage_dir,
terrain_file_name)
terrain_file_name_complete = os.path.join(FLAGS.storage_dir, terrain_file_name)
return terrain_file_name_complete

View File

@@ -21,10 +21,7 @@ def ResetTerrainExample():
num_reset = 10
steps = 100
env = minitaur_randomize_terrain_gym_env.MinitaurRandomizeTerrainGymEnv(
render=True,
leg_model_enabled=False,
motor_velocity_limit=np.inf,
pd_control_enabled=True)
render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True)
action = [math.pi / 2] * 8
for _ in xrange(num_reset):
env.reset()
@@ -37,10 +34,7 @@ def ResetTerrainExample():
def SinePolicyExample():
"""An example of minitaur walking with a sine gait."""
env = minitaur_randomize_terrain_gym_env.MinitaurRandomizeTerrainGymEnv(
render=True,
motor_velocity_limit=np.inf,
pd_control_enabled=True,
on_rack=False)
render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, on_rack=False)
sum_reward = 0
steps = 200
amplitude_1_bound = 0.5

View File

@@ -2,10 +2,10 @@
"""
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import collections
import math
@@ -19,8 +19,7 @@ NUM_LEGS = 4
NUM_MOTORS = 2 * NUM_LEGS
MinitaurPose = collections.namedtuple(
"MinitaurPose",
"swing_angle_1, swing_angle_2, swing_angle_3, swing_angle_4, "
"MinitaurPose", "swing_angle_1, swing_angle_2, swing_angle_3, swing_angle_4, "
"extension_angle_1, extension_angle_2, extension_angle_3, "
"extension_angle_4")
@@ -35,10 +34,7 @@ class MinitaurReactiveEnv(minitaur_gym_env.MinitaurGymEnv):
expenditure.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 166
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 166}
def __init__(self,
urdf_version=None,
@@ -96,24 +92,24 @@ class MinitaurReactiveEnv(minitaur_gym_env.MinitaurGymEnv):
"""
self._use_angle_in_observation = use_angle_in_observation
super(MinitaurReactiveEnv, self).__init__(
urdf_version=urdf_version,
energy_weight=energy_weight,
accurate_motor_model_enabled=accurate_motor_model_enabled,
motor_overheat_protection=True,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
hard_reset=hard_reset,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
super(MinitaurReactiveEnv,
self).__init__(urdf_version=urdf_version,
energy_weight=energy_weight,
accurate_motor_model_enabled=accurate_motor_model_enabled,
motor_overheat_protection=True,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
hard_reset=hard_reset,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
action_dim = 8
action_low = np.array([-0.5] * action_dim)
@@ -126,34 +122,31 @@ class MinitaurReactiveEnv(minitaur_gym_env.MinitaurGymEnv):
def reset(self):
# TODO(b/73666007): Use composition instead of inheritance.
# (http://go/design-for-testability-no-inheritance).
init_pose = MinitaurPose(
swing_angle_1=INIT_SWING_POS,
swing_angle_2=INIT_SWING_POS,
swing_angle_3=INIT_SWING_POS,
swing_angle_4=INIT_SWING_POS,
extension_angle_1=INIT_EXTENSION_POS,
extension_angle_2=INIT_EXTENSION_POS,
extension_angle_3=INIT_EXTENSION_POS,
extension_angle_4=INIT_EXTENSION_POS)
init_pose = MinitaurPose(swing_angle_1=INIT_SWING_POS,
swing_angle_2=INIT_SWING_POS,
swing_angle_3=INIT_SWING_POS,
swing_angle_4=INIT_SWING_POS,
extension_angle_1=INIT_EXTENSION_POS,
extension_angle_2=INIT_EXTENSION_POS,
extension_angle_3=INIT_EXTENSION_POS,
extension_angle_4=INIT_EXTENSION_POS)
# TODO(b/73734502): Refactor input of _convert_from_leg_model to namedtuple.
initial_motor_angles = self._convert_from_leg_model(list(init_pose))
super(MinitaurReactiveEnv, self).reset(
initial_motor_angles=initial_motor_angles, reset_duration=0.5)
super(MinitaurReactiveEnv, self).reset(initial_motor_angles=initial_motor_angles,
reset_duration=0.5)
return self._get_observation()
def _convert_from_leg_model(self, leg_pose):
motor_pose = np.zeros(NUM_MOTORS)
for i in range(NUM_LEGS):
motor_pose[int(2 * i)] = leg_pose[NUM_LEGS + i] - (-1)**int(i / 2) * leg_pose[i]
motor_pose[int(2 * i + 1)] = (
leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i])
motor_pose[int(2 * i + 1)] = (leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i])
return motor_pose
def _signal(self, t):
initial_pose = np.array([
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS
INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_SWING_POS, INIT_EXTENSION_POS,
INIT_EXTENSION_POS, INIT_EXTENSION_POS, INIT_EXTENSION_POS
])
return initial_pose
@@ -214,8 +207,7 @@ class MinitaurReactiveEnv(minitaur_gym_env.MinitaurGymEnv):
upper_bound_pitch_dot = 2 * math.pi / self._time_step
upper_bound_motor_angle = 2 * math.pi
upper_bound = [
upper_bound_roll, upper_bound_pitch, upper_bound_roll_dot,
upper_bound_pitch_dot
upper_bound_roll, upper_bound_pitch, upper_bound_roll_dot, upper_bound_pitch_dot
]
if self._use_angle_in_observation:

View File

@@ -10,17 +10,14 @@ import time
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(os.path.dirname(currentdir)))
print("parentdir=",parentdir)
os.sys.path.insert(0,parentdir)
print("parentdir=", parentdir)
os.sys.path.insert(0, parentdir)
import tensorflow as tf
from pybullet_envs.minitaur.agents.scripts import utility
import pybullet_data
from pybullet_envs.minitaur.envs import simple_ppo_agent
flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
LOG_DIR = os.path.join(pybullet_data.getDataPath(), "policies/ppo/minitaur_reactive_env")
@@ -36,13 +33,12 @@ def main(argv):
network = config.network
with tf.Session() as sess:
agent = simple_ppo_agent.SimplePPOPolicy(
sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(LOG_DIR, CHECKPOINT))
agent = simple_ppo_agent.SimplePPOPolicy(sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(LOG_DIR, CHECKPOINT))
sum_reward = 0
observation = env.reset()

View File

@@ -29,10 +29,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
function is based on how long the minitaur stays standing up.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 50
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
def __init__(self,
urdf_root=pybullet_data.getDataPath(),
@@ -53,16 +50,15 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
pd_control_enabled: Whether to use PD controller for each motor.
render: Whether to render the simulation.
"""
super(MinitaurStandGymEnv, self).__init__(
urdf_root=urdf_root,
action_repeat=action_repeat,
observation_noise_stdev=observation_noise_stdev,
self_collision_enabled=self_collision_enabled,
motor_velocity_limit=motor_velocity_limit,
pd_control_enabled=pd_control_enabled,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
render=render)
super(MinitaurStandGymEnv, self).__init__(urdf_root=urdf_root,
action_repeat=action_repeat,
observation_noise_stdev=observation_noise_stdev,
self_collision_enabled=self_collision_enabled,
motor_velocity_limit=motor_velocity_limit,
pd_control_enabled=pd_control_enabled,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
render=render)
# Set the action dimension to 1, and reset the action space.
action_dim = 1
action_high = np.array([self._action_bound] * action_dim)
@@ -85,8 +81,8 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
for t in range(5000):
if self._is_render:
base_pos = self.minitaur.GetBasePosition()
self._pybullet_client.resetDebugVisualizerCamera(
self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
self._cam_pitch, base_pos)
state = self._get_true_observation()
action = self._policy_flip(t, state[24:28])
self.minitaur.ApplyAction(action)
@@ -226,8 +222,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
# Lower the signal a little, so that it becomes positive only for a short
# amount time.
lower_signal = -0.94
signal_unit = math.copysign(intensity,
math.sin(time_step * speed) + lower_signal)
signal_unit = math.copysign(intensity, math.sin(time_step * speed) + lower_signal)
# Only extend the leg, don't shorten.
if signal_unit < 0:
signal_unit = 0

View File

@@ -13,9 +13,8 @@ from pybullet_envs.minitaur.envs import minitaur_stand_gym_env
def StandUpExample():
"""An example that the minitaur stands up."""
steps = 1000
environment = minitaur_stand_gym_env.MinitaurStandGymEnv(
render=True,
motor_velocity_limit=np.inf)
environment = minitaur_stand_gym_env.MinitaurStandGymEnv(render=True,
motor_velocity_limit=np.inf)
action = [0.5]
_, _, done, _ = environment.step(action)
for t in range(steps):

View File

@@ -24,10 +24,7 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
controller (e.g. a neural network).
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 166
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 166}
def __init__(self,
urdf_version=None,
@@ -103,8 +100,8 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
# The reset position.
self._init_pose = [
init_swing, init_swing, init_swing, init_swing, init_extension,
init_extension, init_extension, init_extension
init_swing, init_swing, init_swing, init_swing, init_extension, init_extension,
init_extension, init_extension
]
self._step_frequency = step_frequency
@@ -112,23 +109,23 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
self._swing_amplitude = swing_amplitude
self._use_signal_in_observation = use_signal_in_observation
self._use_angle_in_observation = use_angle_in_observation
super(MinitaurTrottingEnv, self).__init__(
urdf_version=urdf_version,
accurate_motor_model_enabled=accurate_motor_model_enabled,
motor_overheat_protection=True,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
hard_reset=hard_reset,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
super(MinitaurTrottingEnv,
self).__init__(urdf_version=urdf_version,
accurate_motor_model_enabled=accurate_motor_model_enabled,
motor_overheat_protection=True,
motor_kp=motor_kp,
motor_kd=motor_kd,
remove_default_joint_damping=remove_default_joint_damping,
control_latency=control_latency,
pd_latency=pd_latency,
on_rack=on_rack,
render=render,
hard_reset=hard_reset,
num_steps_to_log=num_steps_to_log,
env_randomizer=env_randomizer,
log_path=log_path,
control_time_step=control_time_step,
action_repeat=action_repeat)
action_dim = NUM_LEGS * 2
action_high = np.array([0.25] * action_dim)
@@ -144,8 +141,8 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
# [swing leg 1, swing leg 2, swing leg 3, swing leg 4,
# extension leg 1, extension leg 2, extension leg 3, extension leg 4]
initial_motor_angles = self._convert_from_leg_model(self._init_pose)
super(MinitaurTrottingEnv, self).reset(
initial_motor_angles=initial_motor_angles, reset_duration=0.5)
super(MinitaurTrottingEnv, self).reset(initial_motor_angles=initial_motor_angles,
reset_duration=0.5)
return self._get_observation()
def _convert_from_leg_model(self, leg_pose):
@@ -161,8 +158,7 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
motor_pose = np.zeros(NUM_MOTORS)
for i in range(NUM_LEGS):
motor_pose[int(2 * i)] = leg_pose[NUM_LEGS + i] - (-1)**int(i / 2) * leg_pose[i]
motor_pose[int(2 * i
+ 1)] = leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i]
motor_pose[int(2 * i + 1)] = leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i]
return motor_pose
def _gen_signal(self, t, phase):
@@ -179,8 +175,7 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
The desired leg extension and swing angle at the current time.
"""
period = 1 / self._step_frequency
extension = self._extension_amplitude * math.cos(
2 * math.pi / period * t + phase)
extension = self._extension_amplitude * math.cos(2 * math.pi / period * t + phase)
swing = self._swing_amplitude * math.sin(2 * math.pi / period * t + phase)
return extension, swing
@@ -198,8 +193,8 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
ext_second_pair, sw_second_pair = self._gen_signal(t, math.pi)
trotting_signal = np.array([
sw_first_pair, sw_second_pair, sw_second_pair, sw_first_pair,
ext_first_pair, ext_second_pair, ext_second_pair, ext_first_pair
sw_first_pair, sw_second_pair, sw_second_pair, sw_first_pair, ext_first_pair,
ext_second_pair, ext_second_pair, ext_first_pair
])
signal = np.array(self._init_pose) + trotting_signal
return signal
@@ -286,8 +281,7 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
"""
upper_bound = []
upper_bound.extend([2 * math.pi] * 2) # Roll, pitch, yaw of the base.
upper_bound.extend(
[2 * math.pi / self._time_step] * 2) # Roll, pitch, yaw rate.
upper_bound.extend([2 * math.pi / self._time_step] * 2) # Roll, pitch, yaw rate.
if self._use_signal_in_observation:
upper_bound.extend([2 * math.pi] * NUM_MOTORS) # Signal
if self._use_angle_in_observation:

View File

@@ -26,13 +26,12 @@ def main(argv):
network = config.network
with tf.Session() as sess:
agent = simple_ppo_agent.SimplePPOPolicy(
sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(LOG_DIR, CHECKPOINT))
agent = simple_ppo_agent.SimplePPOPolicy(sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(LOG_DIR, CHECKPOINT))
sum_reward = 0
observation = env.reset()
@@ -48,4 +47,3 @@ def main(argv):
if __name__ == "__main__":
tf.app.run(main)

View File

@@ -8,8 +8,7 @@ MOTOR_VOLTAGE = 16.0
MOTOR_RESISTANCE = 0.186
MOTOR_TORQUE_CONSTANT = 0.0954
MOTOR_VISCOUS_DAMPING = 0
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (
MOTOR_VISCOUS_DAMPING + MOTOR_TORQUE_CONSTANT)
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING + MOTOR_TORQUE_CONSTANT)
NUM_MOTORS = 8
@@ -124,21 +123,19 @@ class MotorModel(object):
observed_torque: The torque observed by the sensor.
"""
observed_torque = np.clip(
self._torque_constant *
(np.asarray(pwm) * self._voltage / self._resistance),
self._torque_constant * (np.asarray(pwm) * self._voltage / self._resistance),
-OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
# Net voltage is clipped at 50V by diodes on the motor controller.
voltage_net = np.clip(
np.asarray(pwm) * self._voltage -
(self._torque_constant + self._viscous_damping) *
np.asarray(true_motor_velocity), -VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
(self._torque_constant + self._viscous_damping) * np.asarray(true_motor_velocity),
-VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
current = voltage_net / self._resistance
current_sign = np.sign(current)
current_magnitude = np.absolute(current)
# Saturate torque based on empirical current relation.
actual_torque = np.interp(current_magnitude, self._current_table,
self._torque_table)
actual_torque = np.interp(current_magnitude, self._current_table, self._torque_table)
actual_torque = np.multiply(current_sign, actual_torque)
actual_torque = np.multiply(self._strength_ratios, actual_torque)
return actual_torque, observed_torque

View File

@@ -4,7 +4,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from pybullet_envs.agents.ppo import normalize
from pybullet_envs.agents import utility
@@ -20,29 +19,25 @@ class SimplePPOPolicy(object):
https://cs.corp.google.com/piper///depot/google3/robotics/reinforcement_learning/agents/scripts/visualize.py.
"""
def __init__(self, sess, env, network, policy_layers, value_layers,
checkpoint):
def __init__(self, sess, env, network, policy_layers, value_layers, checkpoint):
self.env = env
self.sess = sess
observation_size = len(env.observation_space.low)
action_size = len(env.action_space.low)
self.observation_placeholder = tf.placeholder(
tf.float32, [None, observation_size], name="Input")
self._observ_filter = normalize.StreamingNormalize(
self.observation_placeholder[0],
center=True,
scale=True,
clip=5,
name="normalize_observ")
self._restore_policy(
network,
policy_layers=policy_layers,
value_layers=value_layers,
action_size=action_size,
checkpoint=checkpoint)
self.observation_placeholder = tf.placeholder(tf.float32, [None, observation_size],
name="Input")
self._observ_filter = normalize.StreamingNormalize(self.observation_placeholder[0],
center=True,
scale=True,
clip=5,
name="normalize_observ")
self._restore_policy(network,
policy_layers=policy_layers,
value_layers=value_layers,
action_size=action_size,
checkpoint=checkpoint)
def _restore_policy(self, network, policy_layers, value_layers, action_size,
checkpoint):
def _restore_policy(self, network, policy_layers, value_layers, action_size, checkpoint):
"""Restore the PPO policy from a TensorFlow checkpoint.
Args:
@@ -56,24 +51,21 @@ class SimplePPOPolicy(object):
"""
observ = self._observ_filter.transform(self.observation_placeholder)
with tf.variable_scope("network/rnn"):
self.network = network(
policy_layers=policy_layers,
value_layers=value_layers,
action_size=action_size)
self.network = network(policy_layers=policy_layers,
value_layers=value_layers,
action_size=action_size)
with tf.variable_scope("temporary"):
self.last_state = tf.Variable(
self.network.zero_state(1, tf.float32), False)
self.last_state = tf.Variable(self.network.zero_state(1, tf.float32), False)
self.sess.run(self.last_state.initializer)
with tf.variable_scope("network"):
(mean_action, _, _), new_state = tf.nn.dynamic_rnn(
self.network,
observ[:, None],
tf.ones(1),
self.last_state,
tf.float32,
swap_memory=True)
(mean_action, _, _), new_state = tf.nn.dynamic_rnn(self.network,
observ[:, None],
tf.ones(1),
self.last_state,
tf.float32,
swap_memory=True)
self.mean_action = mean_action
self.update_state = self.last_state.assign(new_state)

View File

@@ -29,8 +29,7 @@ from pybullet_envs.minitaur.agents import simple_ppo_agent
flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
flags.DEFINE_string("logdir", None,
"The directory that contains checkpoint and config.")
flags.DEFINE_string("logdir", None, "The directory that contains checkpoint and config.")
flags.DEFINE_string("checkpoint", None, "The checkpoint file path.")
flags.DEFINE_string("log_path", None, "The output path to write log.")
@@ -44,13 +43,13 @@ def main(argv):
network = config.network
with tf.Session() as sess:
agent = simple_ppo_agent.SimplePPOPolicy(
sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(FLAGS.logdir, FLAGS.checkpoint))
agent = simple_ppo_agent.SimplePPOPolicy(sess,
env,
network,
policy_layers=policy_layers,
value_layers=value_layers,
checkpoint=os.path.join(FLAGS.logdir,
FLAGS.checkpoint))
sum_reward = 0
observation = env.reset()

View File

@@ -2,7 +2,7 @@
# source: timestamp.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
@@ -12,65 +12,76 @@ from google.protobuf import descriptor_pb2
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='timestamp.proto',
package='google.protobuf',
syntax='proto3',
serialized_pb=_b('\n\x0ftimestamp.proto\x12\x0fgoogle.protobuf\"+\n\tTimestamp\x12\x0f\n\x07seconds\x18\x01 \x01(\x03\x12\r\n\x05nanos\x18\x02 \x01(\x05\x62\x06proto3')
)
name='timestamp.proto',
package='google.protobuf',
syntax='proto3',
serialized_pb=_b(
'\n\x0ftimestamp.proto\x12\x0fgoogle.protobuf\"+\n\tTimestamp\x12\x0f\n\x07seconds\x18\x01 \x01(\x03\x12\r\n\x05nanos\x18\x02 \x01(\x05\x62\x06proto3'
))
_TIMESTAMP = _descriptor.Descriptor(
name='Timestamp',
full_name='google.protobuf.Timestamp',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='seconds', full_name='google.protobuf.Timestamp.seconds', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='nanos', full_name='google.protobuf.Timestamp.nanos', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=36,
serialized_end=79,
name='Timestamp',
full_name='google.protobuf.Timestamp',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='seconds',
full_name='google.protobuf.Timestamp.seconds',
index=0,
number=1,
type=3,
cpp_type=2,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='nanos',
full_name='google.protobuf.Timestamp.nanos',
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=36,
serialized_end=79,
)
DESCRIPTOR.message_types_by_name['Timestamp'] = _TIMESTAMP
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
Timestamp = _reflection.GeneratedProtocolMessageType('Timestamp', (_message.Message,), dict(
DESCRIPTOR = _TIMESTAMP,
__module__ = 'timestamp_pb2'
# @@protoc_insertion_point(class_scope:google.protobuf.Timestamp)
))
Timestamp = _reflection.GeneratedProtocolMessageType(
'Timestamp',
(_message.Message,),
dict(DESCRIPTOR=_TIMESTAMP,
__module__='timestamp_pb2'
# @@protoc_insertion_point(class_scope:google.protobuf.Timestamp)
))
_sym_db.RegisterMessage(Timestamp)
# @@protoc_insertion_point(module_scope)

View File

@@ -2,7 +2,7 @@
# source: vector.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
@@ -12,348 +12,494 @@ from google.protobuf import descriptor_pb2
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='vector.proto',
package='robotics.messages',
syntax='proto3',
serialized_pb=_b('\n\x0cvector.proto\x12\x11robotics.messages\"6\n\x08Vector4d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\x12\t\n\x01z\x18\x03 \x01(\x01\x12\t\n\x01w\x18\x04 \x01(\x01\"6\n\x08Vector4f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x12\t\n\x01w\x18\x04 \x01(\x02\"+\n\x08Vector3d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\x12\t\n\x01z\x18\x03 \x01(\x01\"+\n\x08Vector3f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\" \n\x08Vector2d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\" \n\x08Vector2f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\"\x1b\n\x07Vectord\x12\x10\n\x04\x64\x61ta\x18\x01 \x03(\x01\x42\x02\x10\x01\"\x1b\n\x07Vectorf\x12\x10\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x02\x10\x01\x62\x06proto3')
)
name='vector.proto',
package='robotics.messages',
syntax='proto3',
serialized_pb=_b(
'\n\x0cvector.proto\x12\x11robotics.messages\"6\n\x08Vector4d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\x12\t\n\x01z\x18\x03 \x01(\x01\x12\t\n\x01w\x18\x04 \x01(\x01\"6\n\x08Vector4f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x12\t\n\x01w\x18\x04 \x01(\x02\"+\n\x08Vector3d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\x12\t\n\x01z\x18\x03 \x01(\x01\"+\n\x08Vector3f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\" \n\x08Vector2d\x12\t\n\x01x\x18\x01 \x01(\x01\x12\t\n\x01y\x18\x02 \x01(\x01\" \n\x08Vector2f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\"\x1b\n\x07Vectord\x12\x10\n\x04\x64\x61ta\x18\x01 \x03(\x01\x42\x02\x10\x01\"\x1b\n\x07Vectorf\x12\x10\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x02\x10\x01\x62\x06proto3'
))
_VECTOR4D = _descriptor.Descriptor(
name='Vector4d',
full_name='robotics.messages.Vector4d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector4d.x', index=0,
number=1, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector4d.y', index=1,
number=2, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='z', full_name='robotics.messages.Vector4d.z', index=2,
number=3, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='w', full_name='robotics.messages.Vector4d.w', index=3,
number=4, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=35,
serialized_end=89,
name='Vector4d',
full_name='robotics.messages.Vector4d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector4d.x',
index=0,
number=1,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector4d.y',
index=1,
number=2,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='z',
full_name='robotics.messages.Vector4d.z',
index=2,
number=3,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='w',
full_name='robotics.messages.Vector4d.w',
index=3,
number=4,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=35,
serialized_end=89,
)
_VECTOR4F = _descriptor.Descriptor(
name='Vector4f',
full_name='robotics.messages.Vector4f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector4f.x', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector4f.y', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='z', full_name='robotics.messages.Vector4f.z', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='w', full_name='robotics.messages.Vector4f.w', index=3,
number=4, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=91,
serialized_end=145,
name='Vector4f',
full_name='robotics.messages.Vector4f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector4f.x',
index=0,
number=1,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector4f.y',
index=1,
number=2,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='z',
full_name='robotics.messages.Vector4f.z',
index=2,
number=3,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='w',
full_name='robotics.messages.Vector4f.w',
index=3,
number=4,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=91,
serialized_end=145,
)
_VECTOR3D = _descriptor.Descriptor(
name='Vector3d',
full_name='robotics.messages.Vector3d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector3d.x', index=0,
number=1, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector3d.y', index=1,
number=2, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='z', full_name='robotics.messages.Vector3d.z', index=2,
number=3, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=147,
serialized_end=190,
name='Vector3d',
full_name='robotics.messages.Vector3d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector3d.x',
index=0,
number=1,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector3d.y',
index=1,
number=2,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='z',
full_name='robotics.messages.Vector3d.z',
index=2,
number=3,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=147,
serialized_end=190,
)
_VECTOR3F = _descriptor.Descriptor(
name='Vector3f',
full_name='robotics.messages.Vector3f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector3f.x', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector3f.y', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='z', full_name='robotics.messages.Vector3f.z', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=192,
serialized_end=235,
name='Vector3f',
full_name='robotics.messages.Vector3f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector3f.x',
index=0,
number=1,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector3f.y',
index=1,
number=2,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='z',
full_name='robotics.messages.Vector3f.z',
index=2,
number=3,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=192,
serialized_end=235,
)
_VECTOR2D = _descriptor.Descriptor(
name='Vector2d',
full_name='robotics.messages.Vector2d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector2d.x', index=0,
number=1, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector2d.y', index=1,
number=2, type=1, cpp_type=5, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=237,
serialized_end=269,
name='Vector2d',
full_name='robotics.messages.Vector2d',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector2d.x',
index=0,
number=1,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector2d.y',
index=1,
number=2,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=237,
serialized_end=269,
)
_VECTOR2F = _descriptor.Descriptor(
name='Vector2f',
full_name='robotics.messages.Vector2f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='x', full_name='robotics.messages.Vector2f.x', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='y', full_name='robotics.messages.Vector2f.y', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=271,
serialized_end=303,
name='Vector2f',
full_name='robotics.messages.Vector2f',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='x',
full_name='robotics.messages.Vector2f.x',
index=0,
number=1,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
_descriptor.FieldDescriptor(name='y',
full_name='robotics.messages.Vector2f.y',
index=1,
number=2,
type=2,
cpp_type=6,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None,
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=271,
serialized_end=303,
)
_VECTORD = _descriptor.Descriptor(
name='Vectord',
full_name='robotics.messages.Vectord',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='data', full_name='robotics.messages.Vectord.data', index=0,
number=1, type=1, cpp_type=5, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001')), file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=305,
serialized_end=332,
name='Vectord',
full_name='robotics.messages.Vectord',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='data',
full_name='robotics.messages.Vectord.data',
index=0,
number=1,
type=1,
cpp_type=5,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=_descriptor._ParseOptions(
descriptor_pb2.FieldOptions(), _b('\020\001')),
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=305,
serialized_end=332,
)
_VECTORF = _descriptor.Descriptor(
name='Vectorf',
full_name='robotics.messages.Vectorf',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='data', full_name='robotics.messages.Vectorf.data', index=0,
number=1, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001')), file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=334,
serialized_end=361,
name='Vectorf',
full_name='robotics.messages.Vectorf',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(name='data',
full_name='robotics.messages.Vectorf.data',
index=0,
number=1,
type=2,
cpp_type=6,
label=3,
has_default_value=False,
default_value=[],
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=_descriptor._ParseOptions(
descriptor_pb2.FieldOptions(), _b('\020\001')),
file=DESCRIPTOR),
],
extensions=[],
nested_types=[],
enum_types=[],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[],
serialized_start=334,
serialized_end=361,
)
DESCRIPTOR.message_types_by_name['Vector4d'] = _VECTOR4D
@@ -366,65 +512,82 @@ DESCRIPTOR.message_types_by_name['Vectord'] = _VECTORD
DESCRIPTOR.message_types_by_name['Vectorf'] = _VECTORF
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
Vector4d = _reflection.GeneratedProtocolMessageType('Vector4d', (_message.Message,), dict(
DESCRIPTOR = _VECTOR4D,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector4d)
))
Vector4d = _reflection.GeneratedProtocolMessageType(
'Vector4d',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR4D,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector4d)
))
_sym_db.RegisterMessage(Vector4d)
Vector4f = _reflection.GeneratedProtocolMessageType('Vector4f', (_message.Message,), dict(
DESCRIPTOR = _VECTOR4F,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector4f)
))
Vector4f = _reflection.GeneratedProtocolMessageType(
'Vector4f',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR4F,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector4f)
))
_sym_db.RegisterMessage(Vector4f)
Vector3d = _reflection.GeneratedProtocolMessageType('Vector3d', (_message.Message,), dict(
DESCRIPTOR = _VECTOR3D,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector3d)
))
Vector3d = _reflection.GeneratedProtocolMessageType(
'Vector3d',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR3D,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector3d)
))
_sym_db.RegisterMessage(Vector3d)
Vector3f = _reflection.GeneratedProtocolMessageType('Vector3f', (_message.Message,), dict(
DESCRIPTOR = _VECTOR3F,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector3f)
))
Vector3f = _reflection.GeneratedProtocolMessageType(
'Vector3f',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR3F,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector3f)
))
_sym_db.RegisterMessage(Vector3f)
Vector2d = _reflection.GeneratedProtocolMessageType('Vector2d', (_message.Message,), dict(
DESCRIPTOR = _VECTOR2D,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector2d)
))
Vector2d = _reflection.GeneratedProtocolMessageType(
'Vector2d',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR2D,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector2d)
))
_sym_db.RegisterMessage(Vector2d)
Vector2f = _reflection.GeneratedProtocolMessageType('Vector2f', (_message.Message,), dict(
DESCRIPTOR = _VECTOR2F,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector2f)
))
Vector2f = _reflection.GeneratedProtocolMessageType(
'Vector2f',
(_message.Message,),
dict(DESCRIPTOR=_VECTOR2F,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vector2f)
))
_sym_db.RegisterMessage(Vector2f)
Vectord = _reflection.GeneratedProtocolMessageType('Vectord', (_message.Message,), dict(
DESCRIPTOR = _VECTORD,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vectord)
))
Vectord = _reflection.GeneratedProtocolMessageType(
'Vectord',
(_message.Message,),
dict(DESCRIPTOR=_VECTORD,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vectord)
))
_sym_db.RegisterMessage(Vectord)
Vectorf = _reflection.GeneratedProtocolMessageType('Vectorf', (_message.Message,), dict(
DESCRIPTOR = _VECTORF,
__module__ = 'vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vectorf)
))
Vectorf = _reflection.GeneratedProtocolMessageType(
'Vectorf',
(_message.Message,),
dict(DESCRIPTOR=_VECTORF,
__module__='vector_pb2'
# @@protoc_insertion_point(class_scope:robotics.messages.Vectorf)
))
_sym_db.RegisterMessage(Vectorf)
_VECTORD.fields_by_name['data'].has_options = True
_VECTORD.fields_by_name['data']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
_VECTORD.fields_by_name['data']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(),
_b('\020\001'))
_VECTORF.fields_by_name['data'].has_options = True
_VECTORF.fields_by_name['data']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
_VECTORF.fields_by_name['data']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(),
_b('\020\001'))
# @@protoc_insertion_point(module_scope)