add a few more discrete actions to cartpole_bullet.py so it trains faster + add option to train without rendering, enjoy with rendering.

2017-06-22 16:49:14 -07:00
parent fbff4c57ca
commit a136098120
2 changed files with 11 additions and 7 deletions
--- a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py
+++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py
@@ -22,10 +22,14 @@ class CartPoleBulletEnv(gym.Env):
    'video.frames_per_second' : 50
  }
-  def __init__(self):
+  def __init__(self, renders=True):
    # start the bullet physics server
-    p.connect(p.GUI)
+    self._renders = renders
-    #p.connect(p.DIRECT)
+    if (renders):
 	    p.connect(p.GUI)
    else:
    	p.connect(p.DIRECT)
    observation_high = np.array([
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
@@ -33,7 +37,7 @@ class CartPoleBulletEnv(gym.Env):
          np.finfo(np.float32).max])
    action_high = np.array([0.1])
-    self.action_space = spaces.Discrete(5)
+    self.action_space = spaces.Discrete(9)
    self.observation_space = spaces.Box(-observation_high, observation_high)
    self.theta_threshold_radians = 1
@@ -56,8 +60,8 @@ class CartPoleBulletEnv(gym.Env):
    self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
    theta, theta_dot, x, x_dot = self.state
-    dv = 0.4
+    dv = 0.1
-    deltav = [-2.*dv, -dv, 0, dv, 2.*dv][action]
+    deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action]
    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3]))
--- a/examples/pybullet/gym/train_pybullet_cartpole.py
+++ b/examples/pybullet/gym/train_pybullet_cartpole.py
@@ -12,7 +12,7 @@ def callback(lcl, glb):
 def main():
-    env = gym.make('CartPoleBulletEnv-v0')
+    env = CartPoleBulletEnv(renders=False)
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,