apply yapf
This commit is contained in:
@@ -190,18 +190,19 @@ def train(env, policy, normalizer, hp, parentPipes, args):
|
|||||||
for k in range(hp.nb_directions):
|
for k in range(hp.nb_directions):
|
||||||
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
|
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
|
||||||
|
|
||||||
|
|
||||||
# Getting the negative rewards in the negative/opposite directions
|
# Getting the negative rewards in the negative/opposite directions
|
||||||
for k in range(hp.nb_directions):
|
for k in range(hp.nb_directions):
|
||||||
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
|
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
|
||||||
|
|
||||||
|
|
||||||
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
|
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
|
||||||
all_rewards = np.array(positive_rewards + negative_rewards)
|
all_rewards = np.array(positive_rewards + negative_rewards)
|
||||||
sigma_r = all_rewards.std()
|
sigma_r = all_rewards.std()
|
||||||
|
|
||||||
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
|
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
|
||||||
scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))}
|
scores = {
|
||||||
|
k: max(r_pos, r_neg)
|
||||||
|
for k, (r_pos, r_neg) in enumerate(zip(positive_rewards, negative_rewards))
|
||||||
|
}
|
||||||
order = sorted(scores.keys(), key=lambda x: -scores[x])[:hp.nb_best_directions]
|
order = sorted(scores.keys(), key=lambda x: -scores[x])[:hp.nb_best_directions]
|
||||||
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
|
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
|
||||||
|
|
||||||
@@ -212,6 +213,7 @@ def train(env, policy, normalizer, hp, parentPipes, args):
|
|||||||
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
|
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
|
||||||
print('Step:', step, 'Reward:', reward_evaluation)
|
print('Step:', step, 'Reward:', reward_evaluation)
|
||||||
|
|
||||||
|
|
||||||
# Running the main code
|
# Running the main code
|
||||||
|
|
||||||
|
|
||||||
@@ -226,19 +228,15 @@ if __name__ == "__main__":
|
|||||||
mp.freeze_support()
|
mp.freeze_support()
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
parser.add_argument('--env',
|
parser.add_argument(
|
||||||
help='Gym environment name',
|
'--env', help='Gym environment name', type=str, default='HalfCheetahBulletEnv-v0')
|
||||||
type=str,
|
|
||||||
default='HalfCheetahBulletEnv-v0')
|
|
||||||
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
|
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
|
||||||
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
|
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
|
||||||
parser.add_argument('--movie', help='rgb_array gym movie', type=int, default=0)
|
parser.add_argument('--movie', help='rgb_array gym movie', type=int, default=0)
|
||||||
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
|
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
|
||||||
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
|
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
|
||||||
parser.add_argument('--logdir',
|
parser.add_argument(
|
||||||
help='Directory root to log policy files (npy)',
|
'--logdir', help='Directory root to log policy files (npy)', type=str, default='.')
|
||||||
type=str,
|
|
||||||
default='.')
|
|
||||||
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
|
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
Reference in New Issue
Block a user