more fixes in Gym Ant to make reward the same as Roboschool,
apparently feet_collision_cost is not properly updated in Roboschool, for now, disable it in pybullet too: see https://github.com/openai/roboschool/issues/63
This commit is contained in:
@@ -21,20 +21,18 @@ class MJCFBaseBulletEnv(gym.Env):
|
||||
self.camera = Camera()
|
||||
self.isRender = render
|
||||
self.robot = robot
|
||||
|
||||
self._seed()
|
||||
|
||||
self.action_space = robot.action_space
|
||||
self.observation_space = robot.observation_space
|
||||
|
||||
def configure(self, args):
|
||||
self.robot.args = args
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
||||
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
print("self.isRender=")
|
||||
print(self.isRender)
|
||||
if (self.physicsClientId<0):
|
||||
if (self.isRender):
|
||||
self.physicsClientId = p.connect(p.GUI)
|
||||
|
||||
@@ -51,6 +51,8 @@ def main():
|
||||
time.sleep(0.01)
|
||||
a = pi.act(obs)
|
||||
obs, r, done, _ = env.step(a)
|
||||
#print("reward")
|
||||
#print(r)
|
||||
score += r
|
||||
frame += 1
|
||||
distance=5
|
||||
|
||||
@@ -15,7 +15,7 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
self.walk_target_y = 0
|
||||
|
||||
def create_single_player_scene(self):
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165, frame_skip=4)
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
|
||||
return self.stadium_scene
|
||||
|
||||
def _reset(self):
|
||||
@@ -63,15 +63,32 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
feet_collision_cost = 0.0
|
||||
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
|
||||
self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
|
||||
if contact_ids - self.ground_ids:
|
||||
feet_collision_cost += self.foot_collision_cost
|
||||
#print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
|
||||
if (self.ground_ids & contact_ids):
|
||||
#disable feet_collision_cost update, for compatibility with Roboschool
|
||||
#see Issue 63: https://github.com/openai/roboschool/issues/63
|
||||
#feet_collision_cost += self.foot_collision_cost
|
||||
self.robot.feet_contact[i] = 1.0
|
||||
else:
|
||||
self.robot.feet_contact[i] = 0.0
|
||||
|
||||
|
||||
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||
|
||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
||||
debugmode=0
|
||||
if(debugmode):
|
||||
print("alive=")
|
||||
print(alive)
|
||||
print("progress")
|
||||
print(progress)
|
||||
print("electricity_cost")
|
||||
print(electricity_cost)
|
||||
print("joints_at_limit_cost")
|
||||
print(joints_at_limit_cost)
|
||||
print("feet_collision_cost")
|
||||
print(feet_collision_cost)
|
||||
|
||||
self.rewards = [
|
||||
alive,
|
||||
@@ -80,8 +97,14 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
joints_at_limit_cost,
|
||||
feet_collision_cost
|
||||
]
|
||||
|
||||
if (debugmode):
|
||||
print("rewards=")
|
||||
print(self.rewards)
|
||||
print("sum rewards")
|
||||
print(sum(self.rewards))
|
||||
self.HUD(state, a, done)
|
||||
self.reward += sum(self.rewards)
|
||||
|
||||
return state, sum(self.rewards), bool(done), {}
|
||||
|
||||
def camera_adjust(self):
|
||||
|
||||
@@ -62,6 +62,16 @@ class WalkerBase(MJCFBasedRobot):
|
||||
def calc_potential(self):
|
||||
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
|
||||
# all rewards have rew/frame units and close to 1.0
|
||||
debugmode=0
|
||||
if (debugmode):
|
||||
print("calc_potential: self.walk_target_dist")
|
||||
print(self.walk_target_dist)
|
||||
print("self.scene.dt")
|
||||
print(self.scene.dt)
|
||||
print("self.scene.frame_skip")
|
||||
print(self.scene.frame_skip)
|
||||
print("self.scene.timestep")
|
||||
print(self.scene.timestep)
|
||||
return - self.walk_target_dist / self.scene.dt
|
||||
|
||||
|
||||
|
||||
@@ -12,8 +12,9 @@ class Scene:
|
||||
self.np_random, seed = gym.utils.seeding.np_random(None)
|
||||
self.timestep = timestep
|
||||
self.frame_skip = frame_skip
|
||||
|
||||
self.dt = self.timestep * self.frame_skip
|
||||
self.cpp_world = World(gravity, timestep)
|
||||
self.cpp_world = World(gravity, timestep, frame_skip)
|
||||
#self.cpp_world.set_glsl_path(os.path.join(os.path.dirname(__file__), "cpp-household/glsl"))
|
||||
|
||||
#self.big_caption = self.cpp_world.test_window_big_caption # that's a function you can call
|
||||
@@ -60,16 +61,17 @@ class SingleRobotEmptyScene(Scene):
|
||||
|
||||
class World:
|
||||
|
||||
def __init__(self, gravity, timestep):
|
||||
def __init__(self, gravity, timestep, frame_skip):
|
||||
self.gravity = gravity
|
||||
self.timestep = timestep
|
||||
self.frame_skip = frame_skip
|
||||
self.clean_everything()
|
||||
|
||||
def clean_everything(self):
|
||||
p.resetSimulation()
|
||||
p.setGravity(0, 0, -self.gravity)
|
||||
p.setDefaultContactERP(0.9)
|
||||
p.setPhysicsEngineParameter(fixedTimeStep=self.timestep, numSolverIterations=5, numSubSteps=4)
|
||||
p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=5, numSubSteps=self.frame_skip)
|
||||
|
||||
def step(self, frame_skip):
|
||||
p.stepSimulation()
|
||||
|
||||
Reference in New Issue
Block a user