more fixes in Gym Ant to make reward the same as Roboschool,
apparently feet_collision_cost is not properly updated in Roboschool, for now, disable it in pybullet too: see https://github.com/openai/roboschool/issues/63
This commit is contained in:
@@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
self.walk_target_y = 0
|
||||
|
||||
def create_single_player_scene(self):
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4)
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
|
||||
return self.stadium_scene
|
||||
|
||||
def _reset(self):
|
||||
@@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
feet_collision_cost = 0.0
|
||||
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
|
||||
self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
|
||||
if contact_ids - self.ground_ids:
|
||||
feet_collision_cost += self.foot_collision_cost
|
||||
#print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
|
||||
if (self.ground_ids & contact_ids):
|
||||
#disable feet_collision_cost update, for compatibility with Roboschool
|
||||
#see Issue 63: https://github.com/openai/roboschool/issues/63
|
||||
#feet_collision_cost += self.foot_collision_cost
|
||||
self.robot.feet_contact[i] = 1.0
|
||||
else:
|
||||
self.robot.feet_contact[i] = 0.0
|
||||
|
||||
|
||||
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||
|
||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
||||
debugmode=0
|
||||
if(debugmode):
|
||||
print("alive=")
|
||||
print(alive)
|
||||
print("progress")
|
||||
print(progress)
|
||||
print("electricity_cost")
|
||||
print(electricity_cost)
|
||||
print("joints_at_limit_cost")
|
||||
print(joints_at_limit_cost)
|
||||
print("feet_collision_cost")
|
||||
print(feet_collision_cost)
|
||||
|
||||
self.rewards = [
|
||||
alive,
|
||||
@@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
joints_at_limit_cost,
|
||||
feet_collision_cost
|
||||
]
|
||||
|
||||
if (debugmode):
|
||||
print("rewards=")
|
||||
print(self.rewards)
|
||||
print("sum rewards")
|
||||
print(sum(self.rewards))
|
||||
self.HUD(state, a, done)
|
||||
self.reward += sum(self.rewards)
|
||||
|
||||
return state, sum(self.rewards), bool(done), {}
|
||||
|
||||
def camera_adjust(self):
|
||||
|
||||
Reference in New Issue
Block a user