diff --git a/examples/pybullet/gym/pybullet_envs/ARS/ars.py b/examples/pybullet/gym/pybullet_envs/ARS/ars.py deleted file mode 100644 index cf59ede85..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/ars.py +++ /dev/null @@ -1,397 +0,0 @@ -"""Internal implementation of the Augmented Random Search method.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os, inspect -currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) -os.sys.path.insert(0,currentdir) - -from concurrent import futures -import copy -import os -import time -import gym -import numpy as np -import logz -import utils -import optimizers -#from google3.pyglib import gfile -import policies -import shared_noise -import utility - -class Worker(object): - """Object class for parallel rollout generation.""" - - def __init__(self, - env_seed, - env_callback, - policy_params=None, - deltas=None, - rollout_length=1000, - delta_std=0.02): - - # initialize OpenAI environment for each worker - self.env = env_callback() - self.env.seed(env_seed) - - # each worker gets access to the shared noise table - # with independent random streams for sampling - # from the shared noise table. - self.deltas = shared_noise.SharedNoiseTable(deltas, env_seed + 7) - self.policy_params = policy_params - if policy_params['type'] == 'linear': - self.policy = policies.LinearPolicy(policy_params) - else: - raise NotImplementedError - - self.delta_std = delta_std - self.rollout_length = rollout_length - - def get_weights_plus_stats(self): - """ - Get current policy weights and current statistics of past states. - """ - assert self.policy_params['type'] == 'linear' - return self.policy.get_weights_plus_stats() - - def rollout(self, shift=0., rollout_length=None): - """Performs one rollout of maximum length rollout_length. - - At each time-step it substracts shift from the reward. - """ - - if rollout_length is None: - rollout_length = self.rollout_length - - total_reward = 0. - steps = 0 - - ob = self.env.reset() - for i in range(rollout_length): - action = self.policy.act(ob) - ob, reward, done, _ = self.env.step(action) - steps += 1 - total_reward += (reward - shift) - if done: - break - - return total_reward, steps - - def do_rollouts(self, w_policy, num_rollouts=1, shift=1, evaluate=False): - """ - Generate multiple rollouts with a policy parametrized by w_policy. - """ - print('Doing {} rollouts'.format(num_rollouts)) - rollout_rewards, deltas_idx = [], [] - steps = 0 - - for i in range(num_rollouts): - - if evaluate: - self.policy.update_weights(w_policy) - deltas_idx.append(-1) - - # set to false so that evaluation rollouts are not used for updating state statistics - self.policy.update_filter = False - - # for evaluation we do not shift the rewards (shift = 0) and we use the - # default rollout length (1000 for the MuJoCo locomotion tasks) - reward, r_steps = self.rollout( - shift=0., rollout_length=self.rollout_length) - rollout_rewards.append(reward) - - else: - idx, delta = self.deltas.get_delta(w_policy.size) - - delta = (self.delta_std * delta).reshape(w_policy.shape) - deltas_idx.append(idx) - - # set to true so that state statistics are updated - self.policy.update_filter = True - - # compute reward and number of timesteps used for positive perturbation rollout - self.policy.update_weights(w_policy + delta) - pos_reward, pos_steps = self.rollout(shift=shift) - - # compute reward and number of timesteps used for negative pertubation rollout - self.policy.update_weights(w_policy - delta) - neg_reward, neg_steps = self.rollout(shift=shift) - steps += pos_steps + neg_steps - - rollout_rewards.append([pos_reward, neg_reward]) - - return { - 'deltas_idx': deltas_idx, - 'rollout_rewards': rollout_rewards, - 'steps': steps - } - - def stats_increment(self): - self.policy.observation_filter.stats_increment() - return - - def get_weights(self): - return self.policy.get_weights() - - def get_filter(self): - return self.policy.observation_filter - - def sync_filter(self, other): - self.policy.observation_filter.sync(other) - return - - -class ARSLearner(object): - """ - Object class implementing the ARS algorithm. - """ - - def __init__(self, - env_callback, - policy_params=None, - num_workers=32, - num_deltas=320, - deltas_used=320, - delta_std=0.02, - logdir=None, - rollout_length=1000, - step_size=0.01, - shift='constant zero', - params=None, - seed=123): - - logz.configure_output_dir(logdir) - # params_to_save = copy.deepcopy(params) - # params_to_save['env'] = None - # logz.save_params(params_to_save) - utility.save_config(params, logdir) - env = env_callback() - - self.timesteps = 0 - self.action_size = env.action_space.shape[0] - self.ob_size = env.observation_space.shape[0] - self.num_deltas = num_deltas - self.deltas_used = deltas_used - self.rollout_length = rollout_length - self.step_size = step_size - self.delta_std = delta_std - self.logdir = logdir - self.shift = shift - self.params = params - self.max_past_avg_reward = float('-inf') - self.num_episodes_used = float('inf') - - # create shared table for storing noise - print('Creating deltas table.') - deltas = shared_noise.create_shared_noise() - self.deltas = shared_noise.SharedNoiseTable(deltas, seed=seed + 3) - print('Created deltas table.') - - # initialize workers with different random seeds - print('Initializing workers.') - self.num_workers = num_workers - self.workers = [ - Worker( - seed + 7 * i, - env_callback=env_callback, - policy_params=policy_params, - deltas=deltas, - rollout_length=rollout_length, - delta_std=delta_std) for i in range(num_workers) - ] - - # initialize policy - if policy_params['type'] == 'linear': - self.policy = policies.LinearPolicy(policy_params) - self.w_policy = self.policy.get_weights() - else: - raise NotImplementedError - - # initialize optimization algorithm - self.optimizer = optimizers.SGD(self.w_policy, self.step_size) - print('Initialization of ARS complete.') - - def aggregate_rollouts(self, num_rollouts=None, evaluate=False): - """ - Aggregate update step from rollouts generated in parallel. - """ - - if num_rollouts is None: - num_deltas = self.num_deltas - else: - num_deltas = num_rollouts - - results_one = [] #rollout_ids_one - results_two = [] #rollout_ids_two - - t1 = time.time() - num_rollouts = int(num_deltas / self.num_workers) -# if num_rollouts > 0: -# with futures.ThreadPoolExecutor( -# max_workers=self.num_workers) as executor: -# workers = [ -# executor.submit( -# worker.do_rollouts, -# self.w_policy, -# num_rollouts=num_rollouts, -# shift=self.shift, -# evaluate=evaluate) for worker in self.workers -# ] -# for worker in futures.as_completed(workers): -# results_one.append(worker.result()) -# -# workers = [ -# executor.submit( -# worker.do_rollouts, -# self.w_policy, -# num_rollouts=1, -# shift=self.shift, -# evaluate=evaluate) -# for worker in self.workers[:(num_deltas % self.num_workers)] -# ] -# for worker in futures.as_completed(workers): -# results_two.append(worker.result()) - - # parallel generation of rollouts - rollout_ids_one = [ - worker.do_rollouts( - self.w_policy, - num_rollouts=num_rollouts, - shift=self.shift, - evaluate=evaluate) for worker in self.workers - ] - - rollout_ids_two = [ - worker.do_rollouts( - self.w_policy, num_rollouts=1, shift=self.shift, evaluate=evaluate) - for worker in self.workers[:(num_deltas % self.num_workers)] - ] - results_one = rollout_ids_one - results_two = rollout_ids_two -# gather results - - rollout_rewards, deltas_idx = [], [] - - for result in results_one: - if not evaluate: - self.timesteps += result['steps'] - deltas_idx += result['deltas_idx'] - rollout_rewards += result['rollout_rewards'] - - for result in results_two: - if not evaluate: - self.timesteps += result['steps'] - deltas_idx += result['deltas_idx'] - rollout_rewards += result['rollout_rewards'] - - deltas_idx = np.array(deltas_idx) - rollout_rewards = np.array(rollout_rewards, dtype=np.float64) - - print('Maximum reward of collected rollouts:', rollout_rewards.max()) - info_dict = { - "max_reward": rollout_rewards.max() - } - t2 = time.time() - - print('Time to generate rollouts:', t2 - t1) - - if evaluate: - return rollout_rewards - - # select top performing directions if deltas_used < num_deltas - max_rewards = np.max(rollout_rewards, axis=1) - if self.deltas_used > self.num_deltas: - self.deltas_used = self.num_deltas - - idx = np.arange(max_rewards.size)[max_rewards >= np.percentile( - max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas)))] - deltas_idx = deltas_idx[idx] - rollout_rewards = rollout_rewards[idx, :] - - # normalize rewards by their standard deviation - rollout_rewards /= np.std(rollout_rewards) - - t1 = time.time() - # aggregate rollouts to form g_hat, the gradient used to compute SGD step - g_hat, count = utils.batched_weighted_sum( - rollout_rewards[:, 0] - rollout_rewards[:, 1], - (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx), - batch_size=500) - g_hat /= deltas_idx.size - t2 = time.time() - print('time to aggregate rollouts', t2 - t1) - return g_hat, info_dict - - def train_step(self): - """ - Perform one update step of the policy weights. - """ - - g_hat, info_dict = self.aggregate_rollouts() - print('Euclidean norm of update step:', np.linalg.norm(g_hat)) - self.w_policy -= self.optimizer._compute_step(g_hat).reshape( - self.w_policy.shape) - return info_dict - - def train(self, num_iter): - - start = time.time() - for i in range(num_iter): - - t1 = time.time() - info_dict = self.train_step() - t2 = time.time() - print('total time of one step', t2 - t1) - print('iter ', i, ' done') - - # record statistics every 10 iterations - if ((i) % 10 == 0): - - rewards = self.aggregate_rollouts(num_rollouts=8, evaluate=True) - w = self.workers[0].get_weights_plus_stats() - - checkpoint_filename = os.path.join( - self.logdir, 'lin_policy_plus_{:03d}.npz'.format(i)) - print('Save checkpoints to {}...', checkpoint_filename) - checkpoint_file = open(checkpoint_filename, 'w') - np.savez(checkpoint_file, w) - print('End save checkpoints.') - print(sorted(self.params.items())) - logz.log_tabular('Time', time.time() - start) - logz.log_tabular('Iteration', i + 1) - logz.log_tabular('AverageReward', np.mean(rewards)) - logz.log_tabular('StdRewards', np.std(rewards)) - logz.log_tabular('MaxRewardRollout', np.max(rewards)) - logz.log_tabular('MinRewardRollout', np.min(rewards)) - logz.log_tabular('timesteps', self.timesteps) - logz.dump_tabular() - - t1 = time.time() - # get statistics from all workers - for j in range(self.num_workers): - self.policy.observation_filter.update(self.workers[j].get_filter()) - self.policy.observation_filter.stats_increment() - - # make sure master filter buffer is clear - self.policy.observation_filter.clear_buffer() - # sync all workers - #filter_id = ray.put(self.policy.observation_filter) - setting_filters_ids = [ - worker.sync_filter(self.policy.observation_filter) - for worker in self.workers - ] - # waiting for sync of all workers - #ray.get(setting_filters_ids) - - increment_filters_ids = [ - worker.stats_increment() for worker in self.workers - ] - # waiting for increment of all workers - #ray.get(increment_filters_ids) - t2 = time.time() - print('Time to sync statistics:', t2 - t1) - - return info_dict diff --git a/examples/pybullet/gym/pybullet_envs/ARS/ars_server.py b/examples/pybullet/gym/pybullet_envs/ARS/ars_server.py deleted file mode 100644 index f680dd632..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/ars_server.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -blaze build -c opt //experimental/users/jietan/ARS:ars_server - -blaze-bin/experimental/users/jietan/ARS/ars_server \ ---config_name=MINITAUR_GYM_CONFIG -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time -from absl import app -from absl import flags -from concurrent import futures -import grpc -from grpc import loas2 -from google3.robotics.reinforcement_learning.minitaur.envs import minitaur_gym_env -from google3.robotics.reinforcement_learning.minitaur.envs import minitaur_reactive_env -from google3.robotics.reinforcement_learning.minitaur.envs.env_randomizers import minitaur_env_randomizer -from google3.robotics.reinforcement_learning.minitaur.envs.env_randomizers import minitaur_env_randomizer_from_config as randomizer_config_lib -from google3.experimental.users.jietan.ARS import ars_evaluation_service_pb2_grpc -from google3.experimental.users.jietan.ARS import ars_evaluation_service - -FLAGS = flags.FLAGS -flags.DEFINE_integer("server_id", 0, "number of servers") -flags.DEFINE_integer("port", 20000, "port number.") -flags.DEFINE_string("config_name", None, "The name of the config dictionary.") -flags.DEFINE_bool('run_on_borg', False, - 'Whether the servers are running on borg.') - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - - -def main(unused_argv): - servers = [] - server_creds = loas2.loas2_server_credentials() - port = FLAGS.port - if not FLAGS.run_on_borg: - port = 20000 + FLAGS.server_id - server = grpc.server( - futures.ThreadPoolExecutor(max_workers=10), ports=(port,)) - servicer = ars_evaluation_service.ParameterEvaluationServicer( - FLAGS.config_name, worker_id=FLAGS.server_id) - ars_evaluation_service_pb2_grpc.add_EvaluationServicer_to_server( - servicer, server) - server.add_secure_port("[::]:{}".format(port), server_creds) - servers.append(server) - server.start() - print("Start server {}".format(FLAGS.server_id)) - - # prevent the main thread from exiting - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - for server in servers: - server.stop(0) - - -if __name__ == "__main__": - app.run(main) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/config_ars.py b/examples/pybullet/gym/pybullet_envs/ARS/config_ars.py deleted file mode 100644 index e333f88ab..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/config_ars.py +++ /dev/null @@ -1,83 +0,0 @@ - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -from pybullet_envs.minitaur.envs import minitaur_gym_env -from pybullet_envs.minitaur.envs import minitaur_reactive_env -from pybullet_envs.minitaur.envs.env_randomizers import minitaur_env_randomizer -from pybullet_envs.minitaur.envs.env_randomizers import minitaur_env_randomizer_from_config as randomizer_config_lib - -MAX_LENGTH = 1000 - - -def merge_two_dicts(x, y): - """Given two dicts, merge them into a new dict as a shallow copy.""" - z = dict(x) - z.update(y) - return z - - -# The default configurations. -DEFAULT_CONFIG = dict( - num_workers=8, - num_directions=8, - num_iterations=1000, - deltas_used=8, - step_size=0.02, - delta_std=0.03, - rollout_length=MAX_LENGTH, - shift=0, - seed=237, - policy_type="linear", - filter="MeanStdFilter", -) - -# Configuration specific to minitaur_gym_env.MinitaurGymEnv class. -MINITAUR_GYM_CONFIG_ADDITIONS = dict( - env=functools.partial( - minitaur_gym_env.MinitaurGymEnv, - urdf_version=minitaur_gym_env.DERPY_V0_URDF_VERSION, - accurate_motor_model_enabled=True, - motor_overheat_protection=True, - pd_control_enabled=True, - env_randomizer=None,#minitaur_env_randomizer.MinitaurEnvRandomizer(), - render=False, - num_steps_to_log=MAX_LENGTH)) -MINITAUR_GYM_CONFIG = merge_two_dicts(DEFAULT_CONFIG, - MINITAUR_GYM_CONFIG_ADDITIONS) - -# Configuration specific to MinitaurReactiveEnv class. -MINITAUR_REACTIVE_CONFIG_ADDITIONS = dict( - env=functools.partial( - minitaur_reactive_env.MinitaurReactiveEnv, - urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION, - energy_weight=0.005, - accurate_motor_model_enabled=True, - pd_latency=0.003, - control_latency=0.02, - motor_kd=0.015, - remove_default_joint_damping=True, - env_randomizer=None, - render=False, - num_steps_to_log=MAX_LENGTH)) -MINITAUR_REACTIVE_CONFIG = merge_two_dicts(DEFAULT_CONFIG, - MINITAUR_REACTIVE_CONFIG_ADDITIONS) - -# Configuration specific to MinitaurReactiveEnv class with randomizer. -MINITAUR_REACTIVE_RANDOMIZER_CONFIG_ADDITIONS = dict( - env=functools.partial( - minitaur_reactive_env.MinitaurReactiveEnv, - urdf_version=minitaur_gym_env.RAINBOW_DASH_V0_URDF_VERSION, - energy_weight=0.005, - accurate_motor_model_enabled=True, - pd_latency=0.003, - control_latency=0.02, - motor_kd=0.015, - remove_default_joint_damping=True, - env_randomizer=randomizer_config_lib.MinitaurEnvRandomizerFromConfig(), - render=False, - num_steps_to_log=MAX_LENGTH)) -MINITAUR_REACTIVE_RANDOMIZER_CONFIG = merge_two_dicts( - DEFAULT_CONFIG, MINITAUR_REACTIVE_RANDOMIZER_CONFIG_ADDITIONS) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/eval_ars.py b/examples/pybullet/gym/pybullet_envs/ARS/eval_ars.py deleted file mode 100644 index d0c144bfa..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/eval_ars.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -blaze run -c opt //experimental/users/jietan/ARS:eval_ars -- \ ---logdir=/cns/ij-d/home/jietan/experiment/ARS/ars_react_nr01.191950338.191950550/ \ ---checkpoint=lin_policy_plus_990.npz \ ---num_rollouts=10 -""" - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os, inspect -import time - -currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) -os.sys.path.insert(0,currentdir) - -from absl import app -from absl import flags - -import pdb -import os -import numpy as np -import gym -import config_ars -import utility -import policies - -FLAGS = flags.FLAGS - -flags.DEFINE_string('logdir', None, 'The path of the checkpoint.') -flags.DEFINE_string('checkpoint', None, 'The file name of the checkpoint.') -flags.DEFINE_integer('num_rollouts', 1, 'The number of rollouts.') - - -def main(argv): - del argv # Unused. - - print('loading and building expert policy') - checkpoint_file = os.path.join(FLAGS.logdir, FLAGS.checkpoint) - lin_policy = np.load(checkpoint_file, encoding='bytes') - lin_policy = lin_policy.items()[0][1] - - M = lin_policy[0] - # mean and std of state vectors estimated online by ARS. - mean = lin_policy[1] - std = lin_policy[2] - - config = utility.load_config(FLAGS.logdir) - print("config=",config) - env = config['env'](hard_reset=True, render=True) - ob_dim = env.observation_space.shape[0] - ac_dim = env.action_space.shape[0] - - # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1. - policy_params = { - 'type': 'linear', - 'ob_filter': config['filter'], - 'ob_dim': ob_dim, - 'ac_dim': ac_dim, - "weights": M, - "mean": mean, - "std": std, - } - policy = policies.LinearPolicy(policy_params, update_filter=False) - returns = [] - observations = [] - actions = [] - for i in range(FLAGS.num_rollouts): - print('iter', i) - obs = env.reset() - done = False - totalr = 0. - steps = 0 - while not done: - action = policy.act(obs) - observations.append(obs) - actions.append(action) - - obs, r, done, _ = env.step(action) - time.sleep(1./100.) - totalr += r - steps += 1 - if steps % 100 == 0: - print('%i/%i' % (steps, config['rollout_length'])) - if steps >= config['rollout_length']: - break - returns.append(totalr) - - print('returns', returns) - print('mean return', np.mean(returns)) - print('std of return', np.std(returns)) - - -if __name__ == '__main__': - flags.mark_flag_as_required('logdir') - flags.mark_flag_as_required('checkpoint') - app.run(main) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/filter.py b/examples/pybullet/gym/pybullet_envs/ARS/filter.py deleted file mode 100644 index ac68a2341..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/filter.py +++ /dev/null @@ -1,280 +0,0 @@ -# Code in this file is copied and adapted from -# https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -class Filter(object): - """Processes input, possibly statefully.""" - - def update(self, other, *args, **kwargs): - """Updates self with "new state" from other filter.""" - raise NotImplementedError - - def copy(self): - """Creates a new object with same state as self. - - Returns: - copy (Filter): Copy of self""" - raise NotImplementedError - - def sync(self, other): - """Copies all state from other filter to self.""" - raise NotImplementedError - - -class NoFilter(Filter): - def __init__(self, *args): - pass - - def __call__(self, x, update=True): - return np.asarray(x, dtype = np.float64) - - def update(self, other, *args, **kwargs): - pass - - def copy(self): - return self - - def sync(self, other): - pass - - def stats_increment(self): - pass - - def clear_buffer(self): - pass - - def get_stats(self): - return 0, 1 - - @property - def mean(self): - return 0 - - @property - def var(self): - return 1 - - @property - def std(self): - return 1 - - - -# http://www.johndcook.com/blog/standard_deviation/ -class RunningStat(object): - - def __init__(self, shape=None): - self._n = 0 - self._M = np.zeros(shape, dtype = np.float64) - self._S = np.zeros(shape, dtype = np.float64) - self._M2 = np.zeros(shape, dtype = np.float64) - - def copy(self): - other = RunningStat() - other._n = self._n - other._M = np.copy(self._M) - other._S = np.copy(self._S) - return other - - def push(self, x): - x = np.asarray(x) - # Unvectorized update of the running statistics. - assert x.shape == self._M.shape, ("x.shape = {}, self.shape = {}" - .format(x.shape, self._M.shape)) - n1 = self._n - self._n += 1 - if self._n == 1: - self._M[...] = x - else: - delta = x - self._M - deltaM2 = np.square(x) - self._M2 - self._M[...] += delta / self._n - self._S[...] += delta * delta * n1 / self._n - - - def update(self, other): - n1 = self._n - n2 = other._n - n = n1 + n2 - delta = self._M - other._M - delta2 = delta * delta - M = (n1 * self._M + n2 * other._M) / n - S = self._S + other._S + delta2 * n1 * n2 / n - self._n = n - self._M = M - self._S = S - - def __repr__(self): - return '(n={}, mean_mean={}, mean_std={})'.format( - self.n, np.mean(self.mean), np.mean(self.std)) - - @property - def n(self): - return self._n - - @property - def mean(self): - return self._M - - @property - def var(self): - return self._S / (self._n - 1) if self._n > 1 else np.square(self._M) - - @property - def std(self): - return np.sqrt(self.var) - - @property - def shape(self): - return self._M.shape - - -class MeanStdFilter(Filter): - """Keeps track of a running mean for seen states""" - - def __init__(self, shape, demean=True, destd=True): - self.shape = shape - self.demean = demean - self.destd = destd - self.rs = RunningStat(shape) - # In distributed rollouts, each worker sees different states. - # The buffer is used to keep track of deltas amongst all the - # observation filters. - - self.buffer = RunningStat(shape) - - self.mean = np.zeros(shape, dtype = np.float64) - self.std = np.ones(shape, dtype = np.float64) - - def clear_buffer(self): - self.buffer = RunningStat(self.shape) - return - - def update(self, other, copy_buffer=False): - """Takes another filter and only applies the information from the - buffer. - - Using notation `F(state, buffer)` - Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, - `update` modifies `Filter1` to `Filter1(x1 + yt, y1)` - If `copy_buffer`, then `Filter1` is modified to - `Filter1(x1 + yt, yt)`. - """ - self.rs.update(other.buffer) - if copy_buffer: - self.buffer = other.buffer.copy() - return - - def copy(self): - """Returns a copy of Filter.""" - other = MeanStdFilter(self.shape) - other.demean = self.demean - other.destd = self.destd - other.rs = self.rs.copy() - other.buffer = self.buffer.copy() - return other - - def sync(self, other): - """Syncs all fields together from other filter. - - Using notation `F(state, buffer)` - Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, - `sync` modifies `Filter1` to `Filter1(x2, yt)` - """ - assert other.shape == self.shape, "Shapes don't match!" - self.demean = other.demean - self.destd = other.destd - self.rs = other.rs.copy() - self.buffer = other.buffer.copy() - return - - def __call__(self, x, update=True): - x = np.asarray(x, dtype = np.float64) - if update: - if len(x.shape) == len(self.rs.shape) + 1: - # The vectorized case. - for i in range(x.shape[0]): - self.rs.push(x[i]) - self.buffer.push(x[i]) - else: - # The unvectorized case. - self.rs.push(x) - self.buffer.push(x) - if self.demean: - x = x - self.mean - if self.destd: - x = x / (self.std + 1e-8) - return x - - def stats_increment(self): - self.mean = self.rs.mean - self.std = self.rs.std - - # Set values for std less than 1e-7 to +inf to avoid - # dividing by zero. State elements with zero variance - # are set to zero as a result. - self.std[self.std < 1e-7] = float("inf") - return - - def get_stats(self): - return self.rs.mean, (self.rs.std + 1e-8) - - def __repr__(self): - return 'MeanStdFilter({}, {}, {}, {}, {}, {})'.format( - self.shape, self.demean, - self.rs, self.buffer) - - -def get_filter(filter_config, shape = None): - if filter_config == "MeanStdFilter": - return MeanStdFilter(shape) - elif filter_config == "NoFilter": - return NoFilter() - else: - raise Exception("Unknown observation_filter: " + - str(filter_config)) - - -def test_running_stat(): - for shp in ((), (3,), (3, 4)): - li = [] - rs = RunningStat(shp) - for _ in range(5): - val = np.random.randn(*shp) - rs.push(val) - li.append(val) - m = np.mean(li, axis=0) - assert np.allclose(rs.mean, m) - v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0) - assert np.allclose(rs.var, v) - - -def test_combining_stat(): - for shape in [(), (3,), (3, 4)]: - li = [] - rs1 = RunningStat(shape) - rs2 = RunningStat(shape) - rs = RunningStat(shape) - for _ in range(5): - val = np.random.randn(*shape) - rs1.push(val) - rs.push(val) - li.append(val) - for _ in range(9): - rs2.push(val) - rs.push(val) - li.append(val) - rs1.update(rs2) - assert np.allclose(rs.mean, rs1.mean) - assert np.allclose(rs.std, rs1.std) - - -test_running_stat() -test_combining_stat() diff --git a/examples/pybullet/gym/pybullet_envs/ARS/log/config.yaml b/examples/pybullet/gym/pybullet_envs/ARS/log/config.yaml deleted file mode 100644 index 36690d51d..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/log/config.yaml +++ /dev/null @@ -1,29 +0,0 @@ -delta_std: 0.03 -deltas_used: 8 -env: !!python/object/apply:functools.partial - args: - - &id001 !!python/name:pybullet_envs.minitaur.envs.minitaur_reactive_env.MinitaurReactiveEnv '' - state: !!python/tuple - - *id001 - - !!python/tuple [] - - accurate_motor_model_enabled: true - control_latency: 0.02 - energy_weight: 0.005 - env_randomizer: null - motor_kd: 0.015 - num_steps_to_log: 1000 - pd_latency: 0.003 - remove_default_joint_damping: true - render: false - urdf_version: rainbow_dash_v0 - - null -filter: MeanStdFilter -num_directions: 8 -num_iterations: 1000 -num_workers: 8 -policy_type: linear -rollout_length: 1000 -seed: 237 -shift: 0 -step_size: 0.02 - diff --git a/examples/pybullet/gym/pybullet_envs/ARS/log/lin_policy_plus_990_bla.npz b/examples/pybullet/gym/pybullet_envs/ARS/log/lin_policy_plus_990_bla.npz deleted file mode 100644 index 54fd39ba7..000000000 Binary files a/examples/pybullet/gym/pybullet_envs/ARS/log/lin_policy_plus_990_bla.npz and /dev/null differ diff --git a/examples/pybullet/gym/pybullet_envs/ARS/logz.py b/examples/pybullet/gym/pybullet_envs/ARS/logz.py deleted file mode 100644 index 790d83c21..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/logz.py +++ /dev/null @@ -1,104 +0,0 @@ -# Code in this file is copied and adapted from -# https://github.com/berkeleydeeprlcourse - -import json - -""" - -Some simple logging functionality, inspired by rllab's logging. -Assumes that each diagnostic gets logged each iteration - -Call logz.configure_output_dir() to start logging to a -tab-separated-values file (some_folder_name/log.txt) - -""" - -import os.path as osp, shutil, time, atexit, os, subprocess - -color2num = dict( - gray=30, - red=31, - green=32, - yellow=33, - blue=34, - magenta=35, - cyan=36, - white=37, - crimson=38 -) - -def colorize(string, color, bold=False, highlight=False): - attr = [] - num = color2num[color] - if highlight: num += 10 - attr.append(str(num)) - if bold: attr.append('1') - return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) - -class G(object): - output_dir = None - output_file = None - first_row = True - log_headers = [] - log_current_row = {} - -def configure_output_dir(d=None): - """ - Set output directory to d, or to /tmp/somerandomnumber if d is None - """ - G.first_row = True - G.log_headers = [] - G.log_current_row = {} - - G.output_dir = d or "/tmp/experiments/%i"%int(time.time()) - if not osp.exists(G.output_dir): - os.makedirs(G.output_dir) - G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w') - atexit.register(G.output_file.close) - print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True)) - -def log_tabular(key, val): - """ - Log a value of some diagnostic - Call this once for each diagnostic quantity, each iteration - """ - if G.first_row: - G.log_headers.append(key) - else: - assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key - assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key - G.log_current_row[key] = val - - -def save_params(params): - with open(osp.join(G.output_dir, "params.json"), 'w') as out: - out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True)) - - -def dump_tabular(): - """ - Write all of the diagnostics from the current iteration - """ - vals = [] - key_lens = [len(key) for key in G.log_headers] - max_key_len = max(15,max(key_lens)) - keystr = '%'+'%d'%max_key_len - fmt = "| " + keystr + "s | %15s |" - n_slashes = 22 + max_key_len - print("-"*n_slashes) - for key in G.log_headers: - val = G.log_current_row.get(key, "") - if hasattr(val, "__float__"): valstr = "%8.3g"%val - else: valstr = val - print(fmt%(key, valstr)) - vals.append(val) - print("-"*n_slashes) - if G.output_file is not None: - if G.first_row: - G.output_file.write("\t".join(G.log_headers)) - G.output_file.write("\n") - G.output_file.write("\t".join(map(str,vals))) - G.output_file.write("\n") - G.output_file.flush() - G.log_current_row.clear() - G.first_row=False diff --git a/examples/pybullet/gym/pybullet_envs/ARS/optimizers.py b/examples/pybullet/gym/pybullet_envs/ARS/optimizers.py deleted file mode 100644 index 063be9b87..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/optimizers.py +++ /dev/null @@ -1,35 +0,0 @@ -# Code in this file is copied and adapted from -# https://github.com/openai/evolution-strategies-starter. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -# OPTIMIZERS FOR MINIMIZING OBJECTIVES -class Optimizer(object): - def __init__(self, w_policy): - self.w_policy = w_policy.flatten() - self.dim = w_policy.size - self.t = 0 - - def update(self, globalg): - self.t += 1 - step = self._compute_step(globalg) - ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5) - return self.w_policy + step, ratio - - def _compute_step(self, globalg): - raise NotImplementedError - - -class SGD(Optimizer): - def __init__(self, pi, stepsize): - Optimizer.__init__(self, pi) - self.stepsize = stepsize - - def _compute_step(self, globalg): - step = -self.stepsize * globalg - return step - diff --git a/examples/pybullet/gym/pybullet_envs/ARS/policies.py b/examples/pybullet/gym/pybullet_envs/ARS/policies.py deleted file mode 100644 index 0adc27451..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/policies.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Policy class for computing action from weights and observation vector. -Horia Mania --- hmania@berkeley.edu -Aurelia Guy -Benjamin Recht -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import filter - - -class Policy(object): - - def __init__(self, policy_params): - - self.ob_dim = policy_params['ob_dim'] - self.ac_dim = policy_params['ac_dim'] - self.weights = np.empty(0) - - # a filter for updating statistics of the observations and normalizing - # inputs to the policies - self.observation_filter = filter.get_filter( - policy_params['ob_filter'], shape=(self.ob_dim,)) - self.update_filter = True - - def update_weights(self, new_weights): - self.weights[:] = new_weights[:] - return - - def get_weights(self): - return self.weights - - def get_observation_filter(self): - return self.observation_filter - - def act(self, ob): - raise NotImplementedError - - def copy(self): - raise NotImplementedError - - -class LinearPolicy(Policy): - """ - Linear policy class that computes action as . - """ - - def __init__(self, policy_params, update_filter=True): - Policy.__init__(self, policy_params) - self.weights = np.zeros(self.ac_dim * self.ob_dim, dtype=np.float64) - if "weights" in policy_params: - self.weights = policy_params["weights"] - if "mean" in policy_params: - self.observation_filter.mean = policy_params["mean"] - if "std" in policy_params: - self.observation_filter.std = policy_params["std"] - self.update_filter = update_filter - - def act(self, ob): - ob = self.observation_filter(ob, update=self.update_filter) - matrix_weights = np.reshape(self.weights, (self.ac_dim, self.ob_dim)) - return np.clip(np.dot(matrix_weights, ob), -1.0, 1.0) - - def get_weights_plus_stats(self): - - mu, std = self.observation_filter.get_stats() - aux = np.asarray([self.weights, mu, std]) - return aux diff --git a/examples/pybullet/gym/pybullet_envs/ARS/shared_noise.py b/examples/pybullet/gym/pybullet_envs/ARS/shared_noise.py deleted file mode 100644 index 036f6b26c..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/shared_noise.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Code in this file is copied and adapted from -https://github.com/ray-project/ray/tree/master/python/ray/rllib/es -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np - - -def create_shared_noise(): - """ - Create a large array of noise to be shared by all workers. Used - for avoiding the communication of the random perturbations delta. - """ - - seed = 12345 - count = 250000000 - noise = np.random.RandomState(seed).randn(count).astype(np.float64) - return noise - - -class SharedNoiseTable(object): - def __init__(self, noise, seed = 11): - - self.rg = np.random.RandomState(seed) - self.noise = noise - assert self.noise.dtype == np.float64 - - def get(self, i, dim): - return self.noise[i:i + dim] - - def sample_index(self, dim): - return self.rg.randint(0, len(self.noise) - dim + 1) - - def get_delta(self, dim): - idx = self.sample_index(dim) - return idx, self.get(idx, dim) - diff --git a/examples/pybullet/gym/pybullet_envs/ARS/start_ars_servers.py b/examples/pybullet/gym/pybullet_envs/ARS/start_ars_servers.py deleted file mode 100644 index 7dc27547e..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/start_ars_servers.py +++ /dev/null @@ -1,27 +0,0 @@ -""" - -blaze build -c opt //experimental/users/jietan/ARS:start_ars_servers -blaze-bin/experimental/users/jietan/ARS/start_ars_servers - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import time -import subprocess -from absl import app -from absl import flags - -FLAGS = flags.FLAGS -flags.DEFINE_integer("num_servers", 8, "number of servers") - -def main(argv): - del argv # Unused. - for server_id in xrange(FLAGS.num_servers): - args = ["blaze-bin/experimental/users/jietan/ARS/ars_server", "--config_name=MINITAUR_GYM_CONFIG", "--server_id={}".format(server_id), "--run_on_borg=False"] - subprocess.Popen(args) - - -if __name__ == '__main__': - app.run(main) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/train_ars.borg b/examples/pybullet/gym/pybullet_envs/ARS/train_ars.borg deleted file mode 100644 index c09775b50..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/train_ars.borg +++ /dev/null @@ -1,93 +0,0 @@ -// Example borg file to do a parameter sweep. -// -// To run: -// echo `srcfs get_readonly`-`g4 p | head -1 | awk '{print $2}'` -// blaze build -c opt experimental/users/jietan/ARS:ars_server.par -// blaze build -c opt experimental/users/jietan/ARS:ars_client.par -// borgcfg --skip_confirmation --vars 'base_cl=191950338,my_cl=191950550,label=ars_react_nr01,config=MINITAUR_REACTIVE_CONFIG' experimental/users/jietan/ARS/train_ars.borg reload -// borgcfg --skip_confirmation --vars 'base_cl=191950338,my_cl=191950550,label=ars_react_rd01,config=MINITAUR_REACTIVE_RANDOMIZER_CONFIG' experimental/users/jietan/ARS/train_ars.borg reload - - -import '//production/borg/templates/lambda/buildtool_support.borg' as build -import '//production/borg/templates/lambda/dnsname.borg' as dns - -vars = { - cell = 'atlanta' - charged_user = 'robotics' - base_cl = 0 - my_cl = 0 - label = external - user = real_username() - workers = 8 - config = external - cns_home = "/cns/ij-d/home/%user%" - logdir = "%cns_home%/experiment/ARS/%label%.%base_cl%.%my_cl%/" -} - -service augmented_random_search { - runtime { - cell = vars.cell - } - - scheduling = { - priority = 100 - batch_quota = { - strategy = 'RUN_SOON' - } - deadline = 3600 * 24 - } - accounting = { - charged_user = vars.charged_user - } - requirements { - autopilot = true - } - params = { - mygoogle3 = build.google3dir(myfilename()) - experiment_dir = 'experimental/users/jietan/ARS/' - } - - job ars_server = { - runtime { - cell = vars.cell - } - name = real_username() + '_server_' + vars.label - replicas = vars.workers - binary_path = build.binfile_v2(params.mygoogle3, - params.experiment_dir + 'ars_server') - runfiles = binary_path + '.runfiles/google3/' - packages = { - package third_party = { - directory = runfiles + 'third_party/' - } - } - binary = build.binfile(params.mygoogle3, - params.experiment_dir + 'ars_server.par') - args = { - server_id = '%task%' - config_name = vars.config - port = '%port%' - run_on_borg = true - } - } - job ars_client = { - name = real_username() + '_client_' + vars.label - binary_path = build.binfile_v2(params.mygoogle3, - params.experiment_dir + 'ars_client') - runfiles = binary_path + '.runfiles/google3/' - packages = { - package third_party = { - directory = runfiles + 'third_party/' - } - } - binary = build.binfile(params.mygoogle3, - params.experiment_dir + 'ars_client.par') - args = { - server_address = dns.borg_dns_name(ars_server) - num_servers = vars.workers - config_name = vars.config - logdir = vars.logdir - run_on_borg = true - } - } -} diff --git a/examples/pybullet/gym/pybullet_envs/ARS/train_ars.py b/examples/pybullet/gym/pybullet_envs/ARS/train_ars.py deleted file mode 100644 index 2b07f4aa0..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/train_ars.py +++ /dev/null @@ -1,64 +0,0 @@ -""" - -blaze build -c opt //experimental/users/jietan/ARS:train_ars -blaze-bin/experimental/users/jietan/ARS/train_ars \ ---logdir=/cns/ij-d/home/jietan/experiment/ARS/test1 \ ---config_name=MINITAUR_GYM_CONFIG -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import os -from absl import app -from absl import flags -import ars -import config_ars - -FLAGS = flags.FLAGS -flags.DEFINE_string('logdir', None, 'The directory to write the log file.') -flags.DEFINE_string('config_name', None, 'The name of the config dictionary') - - -def run_ars(config, logdir): - - env = config["env"]() - ob_dim = env.observation_space.shape[0] - ac_dim = env.action_space.shape[0] - - # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1. - policy_params = { - 'type': 'linear', - 'ob_filter': config['filter'], - 'ob_dim': ob_dim, - 'ac_dim': ac_dim - } - - ARS = ars.ARSLearner( - env_callback=config['env'], - policy_params=policy_params, - num_deltas=config['num_directions'], - deltas_used=config['deltas_used'], - step_size=config['step_size'], - delta_std=config['delta_std'], - logdir=logdir, - rollout_length=config['rollout_length'], - shift=config['shift'], - params=config, - seed=config['seed']) - - return ARS.train(config['num_iterations']) - - -def main(argv): - del argv # Unused. - config = getattr(config_ars, FLAGS.config_name) - run_ars(config=config, logdir=FLAGS.logdir) - - -if __name__ == '__main__': - flags.mark_flag_as_required('logdir') - flags.mark_flag_as_required('config_name') - app.run(main) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/train_ars_test.py b/examples/pybullet/gym/pybullet_envs/ARS/train_ars_test.py deleted file mode 100644 index 13f149fc5..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/train_ars_test.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Tests for google3.experimental.users.jietan.ARS.train_ars. -blaze build -c opt //experimental/users/jietan/ARS:train_ars_test -blaze-bin/experimental/users/jietan/ARS/train_ars_test -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -from google3.testing.pybase import googletest -from google3.experimental.users.jietan.ARS import train_ars -from google3.experimental.users.jietan.ARS import config_ars - -FLAGS = flags.FLAGS -MAX_RETURN_AFTER_TWO_ITEATIONS = 0.0890905394617 - -class TrainArsTest(googletest.TestCase): - - def testArsTwoStepResult(self): - config = getattr(config_ars, "MINITAUR_REACTIVE_CONFIG") - config['num_iterations'] = 2 - info = train_ars.run_ars(config=config, logdir=FLAGS.test_tmpdir) - print (info) - self.assertAlmostEqual(info["max_reward"], MAX_RETURN_AFTER_TWO_ITEATIONS) - - -if __name__ == '__main__': - googletest.main() diff --git a/examples/pybullet/gym/pybullet_envs/ARS/utility.py b/examples/pybullet/gym/pybullet_envs/ARS/utility.py deleted file mode 100644 index 591418be5..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/utility.py +++ /dev/null @@ -1,52 +0,0 @@ - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import ruamel.yaml as yaml - -def save_config(config, logdir): - """Save a new configuration by name. - - If a logging directory is specified, is will be created and the configuration - will be stored there. Otherwise, a log message will be printed. - - Args: - config: Configuration object. - logdir: Location for writing summaries and checkpoints if specified. - - Returns: - Configuration object. - """ - message = 'Start a new run and write summaries and checkpoints to {}.' - print(message.format(logdir)) - config_path = os.path.join(logdir, 'config.yaml') - yaml.dump(config, config_path, default_flow_style=False) - return config - - -def load_config(logdir): - """Load a configuration from the log directory. - - Args: - logdir: The logging directory containing the configuration file. - - Raises: - IOError: The logging directory does not contain a configuration file. - - Returns: - Configuration object. - """ - config_path = logdir and os.path.join(logdir, 'config.yaml') - if not config_path: - message = ( - 'Cannot resume an existing run since the logging directory does not ' - 'contain a configuration file.') - raise IOError(message) - print("config_path=",config_path) - - stream = open(config_path, 'r') - config = yaml.load(stream) - message = 'Resume run and write summaries and checkpoints to {}.' - print(message.format(logdir)) - return config diff --git a/examples/pybullet/gym/pybullet_envs/ARS/utils.py b/examples/pybullet/gym/pybullet_envs/ARS/utils.py deleted file mode 100644 index 3b7e03afa..000000000 --- a/examples/pybullet/gym/pybullet_envs/ARS/utils.py +++ /dev/null @@ -1,28 +0,0 @@ -# Code in this file is copied and adapted from -# https://github.com/openai/evolution-strategies-starter. - -import numpy as np - -def itergroups(items, group_size): - assert group_size >= 1 - group = [] - for x in items: - group.append(x) - if len(group) == group_size: - yield tuple(group) - del group[:] - if group: - yield tuple(group) - - - -def batched_weighted_sum(weights, vecs, batch_size): - total = 0 - num_items_summed = 0 - for batch_weights, batch_vecs in zip(itergroups(weights, batch_size), - itergroups(vecs, batch_size)): - assert len(batch_weights) == len(batch_vecs) <= batch_size - total += np.dot(np.asarray(batch_weights, dtype=np.float64), - np.asarray(batch_vecs, dtype=np.float64)) - num_items_summed += len(batch_weights) - return total, num_items_summed