From a57c480f28181e4156601161e7469da15694a347 Mon Sep 17 00:00:00 2001 From: Alexis David Jacq Date: Fri, 15 Jun 2018 17:38:35 +0200 Subject: [PATCH 1/4] Update gym_locomotion_envs.py As suggested in https://github.com/bulletphysics/bullet3/pull/1759. The default isDone lets done = alive<0, and a special case is made for halfcheetah, forcing done=False. I had to pass the 'alive' condition as an additive parameter of WalkerBaseBulletEnv. --- .../gym/pybullet_envs/gym_locomotion_envs.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py index 6220b28cf..6e042efdf 100644 --- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py +++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py @@ -14,6 +14,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): self.walk_target_x = 1e3 # kilometer away self.walk_target_y = 0 self.stateId=-1 + self.alive = None def create_single_player_scene(self, bullet_client): @@ -39,6 +40,12 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): return r + + def _isDone(self): + if self._alive is not None: + return self._alive < 0 + else: + return False def move_robot(self, init_x, init_y, init_z): "Used by multiplayer stadium to move sideways, to another running lane." @@ -60,8 +67,8 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): state = self.robot.calc_state() # also calculates self.joints_at_limit - alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch - done = alive < 0 + self.alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch + done = self._isDone() if not np.isfinite(state).all(): print("~INF~", state) done = True @@ -135,6 +142,9 @@ class HalfCheetahBulletEnv(WalkerBaseBulletEnv): def __init__(self): self.robot = HalfCheetah() WalkerBaseBulletEnv.__init__(self, self.robot) + + def _isDone(self): + return False class AntBulletEnv(WalkerBaseBulletEnv): def __init__(self): @@ -172,4 +182,3 @@ class HumanoidFlagrunHarderBulletEnv(HumanoidBulletEnv): s = HumanoidBulletEnv.create_single_player_scene(self, bullet_client) s.zero_at_running_strip_start_line = False return s - From 292a3f1cf69a2e64179177928f1895430f2d61a1 Mon Sep 17 00:00:00 2001 From: Alexis David Jacq Date: Fri, 15 Jun 2018 17:55:20 +0200 Subject: [PATCH 2/4] methode isAlive Method def isAlive(self), which defaults to return self._alive < 0, and each environment can override this method (Half Cheetah would implement return False) (In response to https://github.com/alexis-jacq/bullet3/commit/bea468fb93041149fb7dd0373a19000b1ba1ab52) --- .../gym/pybullet_envs/gym_locomotion_envs.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py index 6e042efdf..453b0e781 100644 --- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py +++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py @@ -14,7 +14,6 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): self.walk_target_x = 1e3 # kilometer away self.walk_target_y = 0 self.stateId=-1 - self.alive = None def create_single_player_scene(self, bullet_client): @@ -41,11 +40,8 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): return r - def _isDone(self): - if self._alive is not None: - return self._alive < 0 - else: - return False + def _isAlive(self): + return self._alive > 0 def move_robot(self, init_x, init_y, init_z): "Used by multiplayer stadium to move sideways, to another running lane." @@ -67,8 +63,8 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): state = self.robot.calc_state() # also calculates self.joints_at_limit - self.alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch - done = self._isDone() + self._alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch + done = not self._isAlive() if not np.isfinite(state).all(): print("~INF~", state) done = True @@ -96,7 +92,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): debugmode=0 if(debugmode): print("alive=") - print(alive) + print(self._alive) print("progress") print(progress) print("electricity_cost") @@ -143,8 +139,8 @@ class HalfCheetahBulletEnv(WalkerBaseBulletEnv): self.robot = HalfCheetah() WalkerBaseBulletEnv.__init__(self, self.robot) - def _isDone(self): - return False + def _isAlive(self): + return True class AntBulletEnv(WalkerBaseBulletEnv): def __init__(self): From 50b5edd7b5e2fad40e3752188ff0efb413b6f804 Mon Sep 17 00:00:00 2001 From: Alexis David Jacq Date: Sat, 16 Jun 2018 13:53:05 +0200 Subject: [PATCH 3/4] isDone instead of isAlive --- .../gym/pybullet_envs/gym_locomotion_envs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py index 453b0e781..68b355a13 100644 --- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py +++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py @@ -40,8 +40,8 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): return r - def _isAlive(self): - return self._alive > 0 + def _isDone(self): + return self._alive < 0 def move_robot(self, init_x, init_y, init_z): "Used by multiplayer stadium to move sideways, to another running lane." @@ -64,7 +64,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): state = self.robot.calc_state() # also calculates self.joints_at_limit self._alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch - done = not self._isAlive() + done = self._isAlive() if not np.isfinite(state).all(): print("~INF~", state) done = True @@ -103,7 +103,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): print(feet_collision_cost) self.rewards = [ - alive, + self._alive, progress, electricity_cost, joints_at_limit_cost, @@ -139,8 +139,8 @@ class HalfCheetahBulletEnv(WalkerBaseBulletEnv): self.robot = HalfCheetah() WalkerBaseBulletEnv.__init__(self, self.robot) - def _isAlive(self): - return True + def _isDone(self): + return False class AntBulletEnv(WalkerBaseBulletEnv): def __init__(self): From 6adea4964f07562f6c2ebc0b23c1f1eff9b708c1 Mon Sep 17 00:00:00 2001 From: Alexis David Jacq Date: Sat, 16 Jun 2018 14:00:05 +0200 Subject: [PATCH 4/4] forgot one "isAlive" to change --- examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py index 68b355a13..93cd8af33 100644 --- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py +++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py @@ -64,7 +64,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): state = self.robot.calc_state() # also calculates self.joints_at_limit self._alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch - done = self._isAlive() + done = self._isDone() if not np.isfinite(state).all(): print("~INF~", state) done = True