more fixes in Gym Ant to make reward the same as Roboschool,
apparently feet_collision_cost is not properly updated in Roboschool, for now, disable it in pybullet too: see https://github.com/openai/roboschool/issues/63
This commit is contained in:
@@ -21,20 +21,18 @@ class MJCFBaseBulletEnv(gym.Env):
|
|||||||
self.camera = Camera()
|
self.camera = Camera()
|
||||||
self.isRender = render
|
self.isRender = render
|
||||||
self.robot = robot
|
self.robot = robot
|
||||||
|
|
||||||
self._seed()
|
self._seed()
|
||||||
|
|
||||||
self.action_space = robot.action_space
|
self.action_space = robot.action_space
|
||||||
self.observation_space = robot.observation_space
|
self.observation_space = robot.observation_space
|
||||||
|
def configure(self, args):
|
||||||
|
self.robot.args = args
|
||||||
def _seed(self, seed=None):
|
def _seed(self, seed=None):
|
||||||
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
||||||
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
||||||
return [seed]
|
return [seed]
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
print("self.isRender=")
|
|
||||||
print(self.isRender)
|
|
||||||
if (self.physicsClientId<0):
|
if (self.physicsClientId<0):
|
||||||
if (self.isRender):
|
if (self.isRender):
|
||||||
self.physicsClientId = p.connect(p.GUI)
|
self.physicsClientId = p.connect(p.GUI)
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ def main():
|
|||||||
time.sleep(0.01)
|
time.sleep(0.01)
|
||||||
a = pi.act(obs)
|
a = pi.act(obs)
|
||||||
obs, r, done, _ = env.step(a)
|
obs, r, done, _ = env.step(a)
|
||||||
|
#print("reward")
|
||||||
|
#print(r)
|
||||||
score += r
|
score += r
|
||||||
frame += 1
|
frame += 1
|
||||||
distance=5
|
distance=5
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
|||||||
self.walk_target_y = 0
|
self.walk_target_y = 0
|
||||||
|
|
||||||
def create_single_player_scene(self):
|
def create_single_player_scene(self):
|
||||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4)
|
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
|
||||||
return self.stadium_scene
|
return self.stadium_scene
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
@@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
|||||||
feet_collision_cost = 0.0
|
feet_collision_cost = 0.0
|
||||||
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
||||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||||
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
|
#print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
|
||||||
self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
|
if (self.ground_ids & contact_ids):
|
||||||
if contact_ids - self.ground_ids:
|
#disable feet_collision_cost update, for compatibility with Roboschool
|
||||||
feet_collision_cost += self.foot_collision_cost
|
#see Issue 63: https://github.com/openai/roboschool/issues/63
|
||||||
|
#feet_collision_cost += self.foot_collision_cost
|
||||||
|
self.robot.feet_contact[i] = 1.0
|
||||||
|
else:
|
||||||
|
self.robot.feet_contact[i] = 0.0
|
||||||
|
|
||||||
|
|
||||||
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||||
|
|
||||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
||||||
|
debugmode=0
|
||||||
|
if(debugmode):
|
||||||
|
print("alive=")
|
||||||
|
print(alive)
|
||||||
|
print("progress")
|
||||||
|
print(progress)
|
||||||
|
print("electricity_cost")
|
||||||
|
print(electricity_cost)
|
||||||
|
print("joints_at_limit_cost")
|
||||||
|
print(joints_at_limit_cost)
|
||||||
|
print("feet_collision_cost")
|
||||||
|
print(feet_collision_cost)
|
||||||
|
|
||||||
self.rewards = [
|
self.rewards = [
|
||||||
alive,
|
alive,
|
||||||
@@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
|||||||
joints_at_limit_cost,
|
joints_at_limit_cost,
|
||||||
feet_collision_cost
|
feet_collision_cost
|
||||||
]
|
]
|
||||||
|
if (debugmode):
|
||||||
|
print("rewards=")
|
||||||
|
print(self.rewards)
|
||||||
|
print("sum rewards")
|
||||||
|
print(sum(self.rewards))
|
||||||
self.HUD(state, a, done)
|
self.HUD(state, a, done)
|
||||||
|
self.reward += sum(self.rewards)
|
||||||
|
|
||||||
return state, sum(self.rewards), bool(done), {}
|
return state, sum(self.rewards), bool(done), {}
|
||||||
|
|
||||||
def camera_adjust(self):
|
def camera_adjust(self):
|
||||||
|
|||||||
@@ -62,6 +62,16 @@ class WalkerBase(MJCFBasedRobot):
|
|||||||
def calc_potential(self):
|
def calc_potential(self):
|
||||||
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
|
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
|
||||||
# all rewards have rew/frame units and close to 1.0
|
# all rewards have rew/frame units and close to 1.0
|
||||||
|
debugmode=0
|
||||||
|
if (debugmode):
|
||||||
|
print("calc_potential: self.walk_target_dist")
|
||||||
|
print(self.walk_target_dist)
|
||||||
|
print("self.scene.dt")
|
||||||
|
print(self.scene.dt)
|
||||||
|
print("self.scene.frame_skip")
|
||||||
|
print(self.scene.frame_skip)
|
||||||
|
print("self.scene.timestep")
|
||||||
|
print(self.scene.timestep)
|
||||||
return - self.walk_target_dist / self.scene.dt
|
return - self.walk_target_dist / self.scene.dt
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,9 @@ class Scene:
|
|||||||
self.np_random, seed = gym.utils.seeding.np_random(None)
|
self.np_random, seed = gym.utils.seeding.np_random(None)
|
||||||
self.timestep = timestep
|
self.timestep = timestep
|
||||||
self.frame_skip = frame_skip
|
self.frame_skip = frame_skip
|
||||||
|
|
||||||
self.dt = self.timestep * self.frame_skip
|
self.dt = self.timestep * self.frame_skip
|
||||||
self.cpp_world = World(gravity, timestep)
|
self.cpp_world = World(gravity, timestep, frame_skip)
|
||||||
#self.cpp_world.set_glsl_path(os.path.join(os.path.dirname(__file__), "cpp-household/glsl"))
|
#self.cpp_world.set_glsl_path(os.path.join(os.path.dirname(__file__), "cpp-household/glsl"))
|
||||||
|
|
||||||
#self.big_caption = self.cpp_world.test_window_big_caption # that's a function you can call
|
#self.big_caption = self.cpp_world.test_window_big_caption # that's a function you can call
|
||||||
@@ -60,16 +61,17 @@ class SingleRobotEmptyScene(Scene):
|
|||||||
|
|
||||||
class World:
|
class World:
|
||||||
|
|
||||||
def __init__(self, gravity, timestep):
|
def __init__(self, gravity, timestep, frame_skip):
|
||||||
self.gravity = gravity
|
self.gravity = gravity
|
||||||
self.timestep = timestep
|
self.timestep = timestep
|
||||||
|
self.frame_skip = frame_skip
|
||||||
self.clean_everything()
|
self.clean_everything()
|
||||||
|
|
||||||
def clean_everything(self):
|
def clean_everything(self):
|
||||||
p.resetSimulation()
|
p.resetSimulation()
|
||||||
p.setGravity(0, 0, -self.gravity)
|
p.setGravity(0, 0, -self.gravity)
|
||||||
p.setDefaultContactERP(0.9)
|
p.setDefaultContactERP(0.9)
|
||||||
p.setPhysicsEngineParameter(fixedTimeStep=self.timestep, numSolverIterations=5, numSubSteps=4)
|
p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=5, numSubSteps=self.frame_skip)
|
||||||
|
|
||||||
def step(self, frame_skip):
|
def step(self, frame_skip):
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
|
|||||||
Reference in New Issue
Block a user