From c2b9dc936118f6b2d9adfc0977f218686962f08a Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Mon, 29 Oct 2018 16:44:12 -0700
Subject: [PATCH] update to CartPoleBulletEnv-v1 and check with latest
 baselines v0.1.5, works fine. (make it more similar to classical control
 cartpole)

---
 .../pybullet/gym/pybullet_data/cartpole.urdf  |  8 ++-
 .../pybullet/gym/pybullet_envs/__init__.py    |  8 +--
 .../baselines/enjoy_pybullet_cartpole.py      |  4 +-
 .../pybullet_envs/bullet/cartpole_bullet.py   | 56 ++++++++++---------
 4 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/examples/pybullet/gym/pybullet_data/cartpole.urdf b/examples/pybullet/gym/pybullet_data/cartpole.urdf
index c22ca920c..75530c483 100644
--- a/examples/pybullet/gym/pybullet_data/cartpole.urdf
+++ b/examples/pybullet/gym/pybullet_data/cartpole.urdf
@@ -59,9 +59,15 @@
     </visual>
     <inertial>
     <origin xyz="0 0 0.5"/>
-      <mass value="10"/>
+      <mass value="0.1"/>
       <inertia ixx="1.0" ixy="0.0" ixz="0.0" iyy="1.0" iyz="0.0" izz="1.0"/>
     </inertial>
+<collision>
+      <geometry>
+        <box size="0.05 0.05 1.0"/>
+      </geometry>
+      <origin rpy="0 0 0" xyz="0 0 0.5"/>
+    </collision>
   </link>
 
   <joint name="cart_to_pole" type="continuous">
diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py
index 25b82e2ca..d2a5a95b4 100644
--- a/examples/pybullet/gym/pybullet_envs/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/__init__.py
@@ -9,17 +9,17 @@ def register(id,*args,**kvargs):
 # ------------bullet-------------
 
 register(
-	id='CartPoleBulletEnv-v0',
+	id='CartPoleBulletEnv-v1',
 	entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
-	timestep_limit=1000,
-	reward_threshold=950.0,
+        max_episode_steps=200,
+	reward_threshold=190.0,
 )
 
 register(
     id='MinitaurBulletEnv-v0',
     entry_point='pybullet_envs.bullet:MinitaurBulletEnv',
     timestep_limit=1000,
-    reward_threshold=5.0,
+    reward_threshold=15.0,
 )
 
 register(
diff --git a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py
index 7ede98c6d..0de7dc074 100644
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py
@@ -5,12 +5,13 @@ parentdir = os.path.dirname(os.path.dirname(currentdir))
 os.sys.path.insert(0,parentdir)
 
 import gym
+import time
 
 from baselines import deepq
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
 
 def main():
-    env = gym.make('CartPoleBulletEnv-v0')
+    env = gym.make('CartPoleBulletEnv-v1')
     act = deepq.load("cartpole_model.pkl")
 
     while True:
@@ -28,6 +29,7 @@ def main():
             a = aa[0]
             obs, rew, done, _ = env.step(a)
             episode_rew += rew
+            time.sleep(1./240.)
         print("Episode reward", episode_rew)
 
 
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
index 38145c9bf..850ab9278 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
@@ -34,24 +34,26 @@ class CartPoleBulletEnv(gym.Env):
 	    p.connect(p.GUI)
     else:
     	p.connect(p.DIRECT)
+    self.theta_threshold_radians = 12 * 2 * math.pi / 360 
+    self.x_threshold = 0.4 #2.4   
+    high = np.array([
+         self.x_threshold * 2,
+         np.finfo(np.float32).max,
+         self.theta_threshold_radians * 2,
+         np.finfo(np.float32).max])
 
-    observation_high = np.array([
-          np.finfo(np.float32).max,
-          np.finfo(np.float32).max,
-          np.finfo(np.float32).max,
-          np.finfo(np.float32).max])
-    action_high = np.array([0.1])
+    self.force_mag = 10
 
-    self.action_space = spaces.Discrete(9)
-    self.observation_space = spaces.Box(-observation_high, observation_high)
+    self.action_space = spaces.Discrete(2)
+    self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 
-    self.theta_threshold_radians = 1
-    self.x_threshold = 2.4
     self._seed()
-#    self.reset()
+    #    self.reset()
     self.viewer = None
     self._configure()
 
+
+
   def _configure(self, display=None):
     self.display = display
 
@@ -60,41 +62,43 @@ class CartPoleBulletEnv(gym.Env):
     return [seed]
 
   def _step(self, action):
+    force = self.force_mag if action==1 else -self.force_mag
+
+    p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
     p.stepSimulation()
-#    time.sleep(self.timeStep)
+
     self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
     theta, theta_dot, x, x_dot = self.state
 
-    dv = 0.1
-    deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action]
-
-    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3]))
-
     done =  x < -self.x_threshold \
                 or x > self.x_threshold \
                 or theta < -self.theta_threshold_radians \
                 or theta > self.theta_threshold_radians
+    done = bool(done)
     reward = 1.0
-
+    #print("state=",self.state)
     return np.array(self.state), reward, done, {}
 
   def _reset(self):
 #    print("-----------reset simulation---------------")
     p.resetSimulation()
     self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cartpole.urdf"),[0,0,0])
-    self.timeStep = 0.01
+    p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
+    p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
+    p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
+    self.timeStep = 0.02
     p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
-    p.setGravity(0,0, -10)
+    p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
+    p.setGravity(0,0, -9.8)
     p.setTimeStep(self.timeStep)
     p.setRealTimeSimulation(0)
 
-    initialCartPos = self.np_random.uniform(low=-0.5, high=0.5, size=(1,))
-    initialAngle = self.np_random.uniform(low=-0.5, high=0.5, size=(1,))
-    p.resetJointState(self.cartpole, 1, initialAngle)
-    p.resetJointState(self.cartpole, 0, initialCartPos)
-
+    randstate = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+    p.resetJointState(self.cartpole, 1, randstate[0], randstate[1])
+    p.resetJointState(self.cartpole, 0, randstate[2], randstate[3])
+    #print("randstate=",randstate)
     self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
-
+    #print("self.state=", self.state)
     return np.array(self.state)
 
   def _render(self, mode='human', close=False):