From 61987dea857fb13f919e81259de1ae82baf0504c Mon Sep 17 00:00:00 2001 From: YungKC Date: Tue, 16 Apr 2019 12:20:08 -0700 Subject: [PATCH] Should sort best directions that gives the highest rewards Found bug where the sort order would incorporate the lowest N rewards, instead of the highest. With this fix, one can now use fewer best directions instead. --- examples/pybullet/gym/pybullet_envs/ARS/ars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/ARS/ars.py b/examples/pybullet/gym/pybullet_envs/ARS/ars.py index 760ebd732..6ebe28149 100644 --- a/examples/pybullet/gym/pybullet_envs/ARS/ars.py +++ b/examples/pybullet/gym/pybullet_envs/ARS/ars.py @@ -26,7 +26,7 @@ class Hp(): self.episode_length = 1000 self.learning_rate = 0.02 self.nb_directions = 16 - self.nb_best_directions = 16 + self.nb_best_directions = 8 assert self.nb_best_directions <= self.nb_directions self.noise = 0.03 self.seed = 1 @@ -194,7 +194,7 @@ def train(env, policy, normalizer, hp, parentPipes, args): # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))} - order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions] + order = sorted(scores.keys(), key = lambda x:-scores[x])[:hp.nb_best_directions] rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order] # Updating our policy