more fixes in Gym Ant to make reward the same as Roboschool,

apparently feet_collision_cost is not properly updated in Roboschool, for now, disable it in pybullet too: see https://github.com/openai/roboschool/issues/63
2017-09-09 12:36:53 -07:00
parent c144d9c045
commit 4f47a223ef
5 changed files with 48 additions and 13 deletions
--- a/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py
+++ b/examples/pybullet/gym/pybullet_envs/gym_locomotion_envs.py
@@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
 		self.walk_target_y = 0

 	def create_single_player_scene(self):
-		self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4)
+		self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
 		return self.stadium_scene

 	def _reset(self):
@@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
 		feet_collision_cost = 0.0
 		for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
 			contact_ids = set((x[2], x[4]) for x in f.contact_list())
-			#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
-			self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
-			if contact_ids - self.ground_ids:
-				feet_collision_cost += self.foot_collision_cost
+			#print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
+			if (self.ground_ids & contact_ids):
+				#disable feet_collision_cost update, for compatibility with Roboschool
+                        	#see Issue 63: https://github.com/openai/roboschool/issues/63
+				#feet_collision_cost += self.foot_collision_cost
+				self.robot.feet_contact[i] = 1.0
+			else:
+				self.robot.feet_contact[i] = 0.0
+

 		electricity_cost  = self.electricity_cost  * float(np.abs(a*self.robot.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
 		electricity_cost += self.stall_torque_cost * float(np.square(a).mean())

 		joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
+		debugmode=0
+		if(debugmode):
+			print("alive=")
+			print(alive)
+			print("progress")
+			print(progress)
+			print("electricity_cost")
+			print(electricity_cost)
+			print("joints_at_limit_cost")
+			print(joints_at_limit_cost)
+			print("feet_collision_cost")
+			print(feet_collision_cost)

 		self.rewards = [
 			alive,
@@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
 			joints_at_limit_cost,
 			feet_collision_cost
 			]
-
+		if (debugmode):
+			print("rewards=")
+			print(self.rewards)
+			print("sum rewards")
+			print(sum(self.rewards))
 		self.HUD(state, a, done)
+		self.reward += sum(self.rewards)
+
 		return state, sum(self.rewards), bool(done), {}

 	def camera_adjust(self):