fix gym/envs/bullet/cartpole_bullet.py (velocity hyperparameter still needs to be tuned)

add enjoy_pybullet_cartpole.py
2017-06-16 17:06:11 -07:00
parent 37f5413a07
commit 40cb8006ee
2 changed files with 36 additions and 4 deletions
--- a/examples/pybullet/gym/enjoy_pybullet_cartpole.py
+++ b/examples/pybullet/gym/enjoy_pybullet_cartpole.py
@@ -0,0 +1,29 @@
 import gym
 from baselines import deepq
 from envs.bullet.cartpole_bullet import CartPoleBulletEnv
 def main():
    env = gym.make('CartPoleBulletEnv-v0')
    act = deepq.load("cartpole_model.pkl")
    while True:
        obs, done = env.reset(), False
        print("obs")
        print(obs)
        print("type(obs)")
        print(type(obs))
        episode_rew = 0
        while not done:
            env.render()
            o = obs[None]
            aa = act(o)
            a = aa[0]
            obs, rew, done, _ = env.step(a)
            episode_rew += rew
        print("Episode reward", episode_rew)
 if __name__ == '__main__':
    main()
--- a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py
+++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py
@@ -25,7 +25,7 @@ class CartPoleBulletEnv(gym.Env):
  def __init__(self):
    # start the bullet physics server
    p.connect(p.GUI)
-#    p.connect(p.DIRECT)
+    #p.connect(p.DIRECT)
    observation_high = np.array([
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
@@ -33,7 +33,7 @@ class CartPoleBulletEnv(gym.Env):
          np.finfo(np.float32).max])
    action_high = np.array([0.1])
-    self.action_space = spaces.Box(-action_high, action_high)
+    self.action_space = spaces.Discrete(5)
    self.observation_space = spaces.Box(-observation_high, observation_high)
    self.theta_threshold_radians = 1
@@ -55,8 +55,11 @@ class CartPoleBulletEnv(gym.Env):
 #    time.sleep(self.timeStep)
    self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
    theta, theta_dot, x, x_dot = self.state
-    force = action
+    
-    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(action + self.state[3]))
+    dv = 0.4
    deltav = [-2.*dv, -dv, 0, dv, 2.*dv][action]
    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3]))
    done =  x < -self.x_threshold \
                or x > self.x_threshold \