diff --git a/examples/pybullet/gym/pybullet_envs/env_bases.py b/examples/pybullet/gym/pybullet_envs/env_bases.py index 04340da07..35f725822 100644 --- a/examples/pybullet/gym/pybullet_envs/env_bases.py +++ b/examples/pybullet/gym/pybullet_envs/env_bases.py @@ -21,20 +21,18 @@ class MJCFBaseBulletEnv(gym.Env): self.camera = Camera() self.isRender = render self.robot = robot - self._seed() self.action_space = robot.action_space self.observation_space = robot.observation_space - + def configure(self, args): + self.robot.args = args def _seed(self, seed=None): self.np_random, seed = gym.utils.seeding.np_random(seed) self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env return [seed] def _reset(self): - print("self.isRender=") - print(self.isRender) if (self.physicsClientId<0): if (self.isRender): self.physicsClientId = p.connect(p.GUI) diff --git a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py index 482b78be3..1ec52c3de 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py +++ b/examples/pybullet/gym/pybullet_envs/examples/enjoy_TF_AntBulletEnv_v0_2017may.py @@ -51,6 +51,8 @@ def main(): time.sleep(0.01) a = pi.act(obs) obs, r, done, _ = env.step(a) + #print("reward") + #print(r) score += r frame += 1 distance=5 diff --git a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py index bc75f7bfd..102a39cf4 100644 --- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py +++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py @@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): self.walk_target_y = 0 def create_single_player_scene(self): - self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4) + self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4) return self.stadium_scene def _reset(self): @@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): feet_collision_cost = 0.0 for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code contact_ids = set((x[2], x[4]) for x in f.contact_list()) - #print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) ) - self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0 - if contact_ids - self.ground_ids: - feet_collision_cost += self.foot_collision_cost + #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) ) + if (self.ground_ids & contact_ids): + #disable feet_collision_cost update, for compatibility with Roboschool + #see Issue 63: https://github.com/openai/roboschool/issues/63 + #feet_collision_cost += self.foot_collision_cost + self.robot.feet_contact[i] = 1.0 + else: + self.robot.feet_contact[i] = 0.0 + electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking electricity_cost += self.stall_torque_cost * float(np.square(a).mean()) joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit) + debugmode=0 + if(debugmode): + print("alive=") + print(alive) + print("progress") + print(progress) + print("electricity_cost") + print(electricity_cost) + print("joints_at_limit_cost") + print(joints_at_limit_cost) + print("feet_collision_cost") + print(feet_collision_cost) self.rewards = [ alive, @@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv): joints_at_limit_cost, feet_collision_cost ] - + if (debugmode): + print("rewards=") + print(self.rewards) + print("sum rewards") + print(sum(self.rewards)) self.HUD(state, a, done) + self.reward += sum(self.rewards) + return state, sum(self.rewards), bool(done), {} def camera_adjust(self): diff --git a/examples/pybullet/gym/pybullet_envs/robot_locomotors.py b/examples/pybullet/gym/pybullet_envs/robot_locomotors.py index 772dee5c6..5a0bf2cf5 100644 --- a/examples/pybullet/gym/pybullet_envs/robot_locomotors.py +++ b/examples/pybullet/gym/pybullet_envs/robot_locomotors.py @@ -62,6 +62,16 @@ class WalkerBase(MJCFBasedRobot): def calc_potential(self): # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), # all rewards have rew/frame units and close to 1.0 + debugmode=0 + if (debugmode): + print("calc_potential: self.walk_target_dist") + print(self.walk_target_dist) + print("self.scene.dt") + print(self.scene.dt) + print("self.scene.frame_skip") + print(self.scene.frame_skip) + print("self.scene.timestep") + print(self.scene.timestep) return - self.walk_target_dist / self.scene.dt diff --git a/examples/pybullet/gym/pybullet_envs/scene_abstract.py b/examples/pybullet/gym/pybullet_envs/scene_abstract.py index 152427a25..11ae8d489 100644 --- a/examples/pybullet/gym/pybullet_envs/scene_abstract.py +++ b/examples/pybullet/gym/pybullet_envs/scene_abstract.py @@ -12,8 +12,9 @@ class Scene: self.np_random, seed = gym.utils.seeding.np_random(None) self.timestep = timestep self.frame_skip = frame_skip + self.dt = self.timestep * self.frame_skip - self.cpp_world = World(gravity, timestep) + self.cpp_world = World(gravity, timestep, frame_skip) #self.cpp_world.set_glsl_path(os.path.join(os.path.dirname(__file__), "cpp-household/glsl")) #self.big_caption = self.cpp_world.test_window_big_caption # that's a function you can call @@ -60,16 +61,17 @@ class SingleRobotEmptyScene(Scene): class World: - def __init__(self, gravity, timestep): + def __init__(self, gravity, timestep, frame_skip): self.gravity = gravity self.timestep = timestep + self.frame_skip = frame_skip self.clean_everything() def clean_everything(self): p.resetSimulation() p.setGravity(0, 0, -self.gravity) p.setDefaultContactERP(0.9) - p.setPhysicsEngineParameter(fixedTimeStep=self.timestep, numSolverIterations=5, numSubSteps=4) + p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=5, numSubSteps=self.frame_skip) def step(self, frame_skip): p.stepSimulation()