From c2b9dc936118f6b2d9adfc0977f218686962f08a Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Mon, 29 Oct 2018 16:44:12 -0700 Subject: [PATCH] update to CartPoleBulletEnv-v1 and check with latest baselines v0.1.5, works fine. (make it more similar to classical control cartpole) --- .../pybullet/gym/pybullet_data/cartpole.urdf | 8 ++- .../pybullet/gym/pybullet_envs/__init__.py | 8 +-- .../baselines/enjoy_pybullet_cartpole.py | 4 +- .../pybullet_envs/bullet/cartpole_bullet.py | 56 ++++++++++--------- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/examples/pybullet/gym/pybullet_data/cartpole.urdf b/examples/pybullet/gym/pybullet_data/cartpole.urdf index c22ca920c..75530c483 100644 --- a/examples/pybullet/gym/pybullet_data/cartpole.urdf +++ b/examples/pybullet/gym/pybullet_data/cartpole.urdf @@ -59,9 +59,15 @@ - + + + + + + + diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index 25b82e2ca..d2a5a95b4 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -9,17 +9,17 @@ def register(id,*args,**kvargs): # ------------bullet------------- register( - id='CartPoleBulletEnv-v0', + id='CartPoleBulletEnv-v1', entry_point='pybullet_envs.bullet:CartPoleBulletEnv', - timestep_limit=1000, - reward_threshold=950.0, + max_episode_steps=200, + reward_threshold=190.0, ) register( id='MinitaurBulletEnv-v0', entry_point='pybullet_envs.bullet:MinitaurBulletEnv', timestep_limit=1000, - reward_threshold=5.0, + reward_threshold=15.0, ) register( diff --git a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py index 7ede98c6d..0de7dc074 100644 --- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py +++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_pybullet_cartpole.py @@ -5,12 +5,13 @@ parentdir = os.path.dirname(os.path.dirname(currentdir)) os.sys.path.insert(0,parentdir) import gym +import time from baselines import deepq from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv def main(): - env = gym.make('CartPoleBulletEnv-v0') + env = gym.make('CartPoleBulletEnv-v1') act = deepq.load("cartpole_model.pkl") while True: @@ -28,6 +29,7 @@ def main(): a = aa[0] obs, rew, done, _ = env.step(a) episode_rew += rew + time.sleep(1./240.) print("Episode reward", episode_rew) diff --git a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py index 38145c9bf..850ab9278 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py @@ -34,24 +34,26 @@ class CartPoleBulletEnv(gym.Env): p.connect(p.GUI) else: p.connect(p.DIRECT) + self.theta_threshold_radians = 12 * 2 * math.pi / 360 + self.x_threshold = 0.4 #2.4 + high = np.array([ + self.x_threshold * 2, + np.finfo(np.float32).max, + self.theta_threshold_radians * 2, + np.finfo(np.float32).max]) - observation_high = np.array([ - np.finfo(np.float32).max, - np.finfo(np.float32).max, - np.finfo(np.float32).max, - np.finfo(np.float32).max]) - action_high = np.array([0.1]) + self.force_mag = 10 - self.action_space = spaces.Discrete(9) - self.observation_space = spaces.Box(-observation_high, observation_high) + self.action_space = spaces.Discrete(2) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) - self.theta_threshold_radians = 1 - self.x_threshold = 2.4 self._seed() -# self.reset() + # self.reset() self.viewer = None self._configure() + + def _configure(self, display=None): self.display = display @@ -60,41 +62,43 @@ class CartPoleBulletEnv(gym.Env): return [seed] def _step(self, action): + force = self.force_mag if action==1 else -self.force_mag + + p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force) p.stepSimulation() -# time.sleep(self.timeStep) + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] theta, theta_dot, x, x_dot = self.state - dv = 0.1 - deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action] - - p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3])) - done = x < -self.x_threshold \ or x > self.x_threshold \ or theta < -self.theta_threshold_radians \ or theta > self.theta_threshold_radians + done = bool(done) reward = 1.0 - + #print("state=",self.state) return np.array(self.state), reward, done, {} def _reset(self): # print("-----------reset simulation---------------") p.resetSimulation() self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cartpole.urdf"),[0,0,0]) - self.timeStep = 0.01 + p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0) + p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0) + p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0) + self.timeStep = 0.02 p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0) - p.setGravity(0,0, -10) + p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0) + p.setGravity(0,0, -9.8) p.setTimeStep(self.timeStep) p.setRealTimeSimulation(0) - initialCartPos = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) - initialAngle = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) - p.resetJointState(self.cartpole, 1, initialAngle) - p.resetJointState(self.cartpole, 0, initialCartPos) - + randstate = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) + p.resetJointState(self.cartpole, 1, randstate[0], randstate[1]) + p.resetJointState(self.cartpole, 0, randstate[2], randstate[3]) + #print("randstate=",randstate) self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] - + #print("self.state=", self.state) return np.array(self.state) def _render(self, mode='human', close=False):