add a few more discrete actions to cartpole_bullet.py so it trains faster + add option to train without rendering, enjoy with rendering.
This commit is contained in:
@@ -22,10 +22,14 @@ class CartPoleBulletEnv(gym.Env):
|
|||||||
'video.frames_per_second' : 50
|
'video.frames_per_second' : 50
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, renders=True):
|
||||||
# start the bullet physics server
|
# start the bullet physics server
|
||||||
p.connect(p.GUI)
|
self._renders = renders
|
||||||
#p.connect(p.DIRECT)
|
if (renders):
|
||||||
|
p.connect(p.GUI)
|
||||||
|
else:
|
||||||
|
p.connect(p.DIRECT)
|
||||||
|
|
||||||
observation_high = np.array([
|
observation_high = np.array([
|
||||||
np.finfo(np.float32).max,
|
np.finfo(np.float32).max,
|
||||||
np.finfo(np.float32).max,
|
np.finfo(np.float32).max,
|
||||||
@@ -33,7 +37,7 @@ class CartPoleBulletEnv(gym.Env):
|
|||||||
np.finfo(np.float32).max])
|
np.finfo(np.float32).max])
|
||||||
action_high = np.array([0.1])
|
action_high = np.array([0.1])
|
||||||
|
|
||||||
self.action_space = spaces.Discrete(5)
|
self.action_space = spaces.Discrete(9)
|
||||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||||
|
|
||||||
self.theta_threshold_radians = 1
|
self.theta_threshold_radians = 1
|
||||||
@@ -56,8 +60,8 @@ class CartPoleBulletEnv(gym.Env):
|
|||||||
self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
|
self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
|
||||||
theta, theta_dot, x, x_dot = self.state
|
theta, theta_dot, x, x_dot = self.state
|
||||||
|
|
||||||
dv = 0.4
|
dv = 0.1
|
||||||
deltav = [-2.*dv, -dv, 0, dv, 2.*dv][action]
|
deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action]
|
||||||
|
|
||||||
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3]))
|
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3]))
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ def callback(lcl, glb):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
env = gym.make('CartPoleBulletEnv-v0')
|
env = CartPoleBulletEnv(renders=False)
|
||||||
model = deepq.models.mlp([64])
|
model = deepq.models.mlp([64])
|
||||||
act = deepq.learn(
|
act = deepq.learn(
|
||||||
env,
|
env,
|
||||||
|
|||||||
Reference in New Issue
Block a user