Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env.

This commit is contained in:
Benelot
2017-08-17 00:25:16 +02:00
parent 9f20e40541
commit 3191291748
2 changed files with 104 additions and 86 deletions

View File

@@ -1,71 +1,29 @@
from scene_abstract import SingleRobotEmptyScene from scene_abstract import SingleRobotEmptyScene
from env_bases import MujocoXmlBaseBulletEnv from env_bases import MujocoXmlBaseBulletEnv
from robot_pendula import InvertedPendulum, InvertedPendulumSwingup, InvertedDoublePendulum
import gym, gym.spaces, gym.utils, gym.utils.seeding import gym, gym.spaces, gym.utils, gym.utils.seeding
import numpy as np import numpy as np
import os, sys import os, sys
class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv): class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
swingup = False
force_gain = 12 # TODO: Try to find out why we need to scale the force
def __init__(self): def __init__(self):
MujocoXmlBaseBulletEnv.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5) self.robot = InvertedPendulum()
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
def create_single_player_scene(self): def create_single_player_scene(self):
return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)
def robot_specific_reset(self):
self.pole = self.parts["pole"]
self.slider = self.jdict["slider"]
self.j1 = self.jdict["hinge"]
u = self.np_random.uniform(low=-.1, high=.1)
self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
self.j1.set_motor_torque(0)
def apply_action(self, a):
#assert( np.isfinite(a).all() )
if not np.isfinite(a).all():
print("a is inf")
a[0] = 0
self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
def calc_state(self):
self.theta, theta_dot = self.j1.current_position()
x, vx = self.slider.current_position()
#assert( np.isfinite(x) )
if not np.isfinite(x):
print("x is inf")
x = 0
if not np.isfinite(vx):
print("vx is inf")
vx = 0
if not np.isfinite(self.theta):
print("theta is inf")
self.theta = 0
if not np.isfinite(theta_dot):
print("theta_dot is inf")
theta_dot = 0
return np.array([
x, vx,
np.cos(self.theta), np.sin(self.theta), theta_dot
])
def _step(self, a): def _step(self, a):
self.apply_action(a) self.robot.apply_action(a)
self.scene.global_step() self.scene.global_step()
state = self.calc_state() # sets self.pos_x self.pos_y state = self.robot.calc_state() # sets self.pos_x self.pos_y
vel_penalty = 0 vel_penalty = 0
if self.swingup: if self.robot.swingup:
reward = np.cos(self.theta) reward = np.cos(self.robot.theta)
done = False done = False
else: else:
reward = 1.0 reward = 1.0
done = np.abs(self.theta) > .2 done = np.abs(self.robot.theta) > .2
self.rewards = [float(reward)] self.rewards = [float(reward)]
self.HUD(state, a, done) self.HUD(state, a, done)
return state, sum(self.rewards), done, {} return state, sum(self.rewards), done, {}
@@ -74,57 +32,30 @@ class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5) self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)
class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv): class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):
swingup = True def __init__(self):
force_gain = 2.2 # TODO: Try to find out why we need to scale the force self.robot = InvertedPendulumSwingup()
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv): class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv):
def __init__(self): def __init__(self):
MujocoXmlBaseBulletEnv.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9) self.robot = InvertedDoublePendulum()
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
def create_single_player_scene(self): def create_single_player_scene(self):
return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)
def robot_specific_reset(self):
self.pole2 = self.parts["pole2"]
self.slider = self.jdict["slider"]
self.j1 = self.jdict["hinge"]
self.j2 = self.jdict["hinge2"]
u = self.np_random.uniform(low=-.1, high=.1, size=[2])
self.j1.reset_current_position(float(u[0]), 0)
self.j2.reset_current_position(float(u[1]), 0)
self.j1.set_motor_torque(0)
self.j2.set_motor_torque(0)
def apply_action(self, a):
assert( np.isfinite(a).all() )
force_gain = 0.78 # TODO: Try to find out why we need to scale the force
self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
def calc_state(self):
theta, theta_dot = self.j1.current_position()
gamma, gamma_dot = self.j2.current_position()
x, vx = self.slider.current_position()
self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
assert( np.isfinite(x) )
return np.array([
x, vx,
self.pos_x,
np.cos(theta), np.sin(theta), theta_dot,
np.cos(gamma), np.sin(gamma), gamma_dot,
])
def _step(self, a): def _step(self, a):
self.apply_action(a) self.robot.apply_action(a)
self.scene.global_step() self.scene.global_step()
state = self.calc_state() # sets self.pos_x self.pos_y state = self.robot.calc_state() # sets self.pos_x self.pos_y
# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
dist_penalty = 0.01 * self.pos_x ** 2 + (self.pos_y + 0.3 - 2) ** 2 dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
# v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
#vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
vel_penalty = 0 vel_penalty = 0
alive_bonus = 10 alive_bonus = 10
done = self.pos_y + 0.3 <= 1 done = self.robot.pos_y + 0.3 <= 1
self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)] self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
self.HUD(state, a, done) self.HUD(state, a, done)
return state, sum(self.rewards), done, {} return state, sum(self.rewards), done, {}

View File

@@ -0,0 +1,87 @@
from robot_bases import MujocoXmlBasedRobot
import numpy as np
class InvertedPendulum(MujocoXmlBasedRobot):
swingup = False
force_gain = 12 # TODO: Try to find out why we need to scale the force
def __init__(self):
MujocoXmlBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
def robot_specific_reset(self):
self.pole = self.parts["pole"]
self.slider = self.jdict["slider"]
self.j1 = self.jdict["hinge"]
u = self.np_random.uniform(low=-.1, high=.1)
self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
self.j1.set_motor_torque(0)
def apply_action(self, a):
#assert( np.isfinite(a).all() )
if not np.isfinite(a).all():
print("a is inf")
a[0] = 0
self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
def calc_state(self):
self.theta, theta_dot = self.j1.current_position()
x, vx = self.slider.current_position()
#assert( np.isfinite(x) )
if not np.isfinite(x):
print("x is inf")
x = 0
if not np.isfinite(vx):
print("vx is inf")
vx = 0
if not np.isfinite(self.theta):
print("theta is inf")
self.theta = 0
if not np.isfinite(theta_dot):
print("theta_dot is inf")
theta_dot = 0
return np.array([
x, vx,
np.cos(self.theta), np.sin(self.theta), theta_dot
])
class InvertedPendulumSwingup(InvertedPendulum):
swingup = True
force_gain = 2.2 # TODO: Try to find out why we need to scale the force
class InvertedDoublePendulum(MujocoXmlBasedRobot):
def __init__(self):
MujocoXmlBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
def robot_specific_reset(self):
self.pole2 = self.parts["pole2"]
self.slider = self.jdict["slider"]
self.j1 = self.jdict["hinge"]
self.j2 = self.jdict["hinge2"]
u = self.np_random.uniform(low=-.1, high=.1, size=[2])
self.j1.reset_current_position(float(u[0]), 0)
self.j2.reset_current_position(float(u[1]), 0)
self.j1.set_motor_torque(0)
self.j2.set_motor_torque(0)
def apply_action(self, a):
assert( np.isfinite(a).all() )
force_gain = 0.78 # TODO: Try to find out why we need to scale the force
self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
def calc_state(self):
theta, theta_dot = self.j1.current_position()
gamma, gamma_dot = self.j2.current_position()
x, vx = self.slider.current_position()
self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
assert( np.isfinite(x) )
return np.array([
x, vx,
self.pos_x,
np.cos(theta), np.sin(theta), theta_dot,
np.cos(gamma), np.sin(gamma), gamma_dot,
])