more fixes in Gym Ant to make reward the same as Roboschool,

apparently feet_collision_cost is not properly updated in Roboschool,
for now, disable it in pybullet too: see https://github.com/openai/roboschool/issues/63
This commit is contained in:
Erwin Coumans
2017-09-09 12:36:53 -07:00
parent c144d9c045
commit 4f47a223ef
5 changed files with 48 additions and 13 deletions

View File

@@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
self.walk_target_y = 0
def create_single_player_scene(self):
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4)
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
return self.stadium_scene
def _reset(self):
@@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
feet_collision_cost = 0.0
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
contact_ids = set((x[2], x[4]) for x in f.contact_list())
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
if contact_ids - self.ground_ids:
feet_collision_cost += self.foot_collision_cost
#print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
if (self.ground_ids & contact_ids):
#disable feet_collision_cost update, for compatibility with Roboschool
#see Issue 63: https://github.com/openai/roboschool/issues/63
#feet_collision_cost += self.foot_collision_cost
self.robot.feet_contact[i] = 1.0
else:
self.robot.feet_contact[i] = 0.0
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
debugmode=0
if(debugmode):
print("alive=")
print(alive)
print("progress")
print(progress)
print("electricity_cost")
print(electricity_cost)
print("joints_at_limit_cost")
print(joints_at_limit_cost)
print("feet_collision_cost")
print(feet_collision_cost)
self.rewards = [
alive,
@@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
joints_at_limit_cost,
feet_collision_cost
]
if (debugmode):
print("rewards=")
print(self.rewards)
print("sum rewards")
print(sum(self.rewards))
self.HUD(state, a, done)
self.reward += sum(self.rewards)
return state, sum(self.rewards), bool(done), {}
def camera_adjust(self):