From 1752aa55ca0dc41e2bb65a96900c67bc95345225 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Sat, 10 Jun 2017 18:46:36 -0700 Subject: [PATCH] train_pybullet_racecar.py works, self-driving car drives towards the ball using OpenAI baselines DQN :-) See https://www.youtube.com/watch?v=DZ5Px-ocelw for video and how-to-install. --- .../pybullet/gym/enjoy_pybullet_racecar.py | 25 ++----------- examples/pybullet/gym/envs/bullet/racecar.py | 4 +-- .../pybullet/gym/envs/bullet/racecarGymEnv.py | 35 ++++++++++++------- examples/pybullet/gym/racecarGymEnvTest.py | 20 +++++++++-- .../pybullet/gym/train_pybullet_racecar.py | 12 +++---- 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/examples/pybullet/gym/enjoy_pybullet_racecar.py b/examples/pybullet/gym/enjoy_pybullet_racecar.py index b2c46337a..bc965f32d 100644 --- a/examples/pybullet/gym/enjoy_pybullet_racecar.py +++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py @@ -6,7 +6,7 @@ from baselines import deepq def main(): - env = RacecarGymEnv(render=True) + env = RacecarGymEnv(renders=True) act = deepq.load("racecar_model.pkl") print(act) while True: @@ -16,27 +16,8 @@ def main(): print(obs) episode_rew = 0 while not done: - #env.render() - - print("!!!!!!!!!!!!!!!!!!!!!!!!!!") - print("obs") - print(obs) - print("???????????????????????????") - print("obs[None]") - print(obs[None]) - o = obs[None] - print("o") - print(o) - aa = act(o) - print("aa") - print (aa) - a = aa[0] - print("a") - print(a) - obs, rew, done, _ = env.step(a) - print("===================================") - print("obs") - print(obs) + env.render() + obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew) diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py index 9737ede90..96db73c78 100644 --- a/examples/pybullet/gym/envs/bullet/racecar.py +++ b/examples/pybullet/gym/envs/bullet/racecar.py @@ -12,7 +12,7 @@ class Racecar: def reset(self): self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2]) - self.maxForce = 10 + self.maxForce = 20 self.nMotors = 2 self.motorizedwheels=[2] self.inactiveWheels = [3,5,7] @@ -21,7 +21,7 @@ class Racecar: self.motorizedWheels = [2] self.steeringLinks=[4,6] - self.speedMultiplier = 10. + self.speedMultiplier = 4. def getActionDimension(self): diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py index 6e12bdbde..930f9bf88 100644 --- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py +++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py @@ -16,9 +16,9 @@ class RacecarGymEnv(gym.Env): def __init__(self, urdfRoot="", - actionRepeat=1, + actionRepeat=50, isEnableSelfCollision=True, - render=True): + renders=True): print("init") self._timeStep = 0.01 self._urdfRoot = urdfRoot @@ -27,9 +27,9 @@ class RacecarGymEnv(gym.Env): self._observation = [] self._ballUniqueId = -1 self._envStepCounter = 0 - self._render = render + self._renders = renders self._p = p - if self._render: + if self._renders: p.connect(p.GUI) else: p.connect(p.DIRECT) @@ -49,9 +49,14 @@ class RacecarGymEnv(gym.Env): #p.setPhysicsEngineParameter(numSolverIterations=300) p.setTimeStep(self._timeStep) #p.loadURDF("%splane.urdf" % self._urdfRoot) - p.loadSDF("%sstadium.sdf" % self._urdfRoot) + stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot) + #move the stadium objects slightly above 0 + for i in stadiumobjects: + pos,orn = p.getBasePositionAndOrientation(i) + newpos = [pos[0],pos[1],pos[2]+0.1] + p.resetBasePositionAndOrientation(i,newpos,orn) - dist = 1.+10.*random.random() + dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() ballx = dist * math.sin(ang) @@ -75,25 +80,29 @@ class RacecarGymEnv(gym.Env): return [seed] def getExtendedObservation(self): - self._observation = self._racecar.getObservation() - pos,orn = p.getBasePositionAndOrientation(self._ballUniqueId) - self._observation.extend(list(pos)) + self._observation = [] #self._racecar.getObservation() + carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId) + invCarPos,invCarOrn = p.invertTransform(carpos,carorn) + ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn) + + self._observation.extend([ballPosInCar[0],ballPosInCar[1]]) return self._observation def _step(self, action): - if (self._render): + if (self._renders): basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) #p.resetDebugVisualizerCamera(1, 30, -40, basePos) - fwd = [-1,-1,-1,0,0,0,1,1,1] - steerings = [-0.5,0,0.5,-0.5,0,0.5,-0.5,0,0.5] + fwd = [-5,-5,-5,0,0,0,5,5,5] + steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3] forward = fwd[action] steer = steerings[action] realaction = [forward,steer] self._racecar.applyAction(realaction) for i in range(self._actionRepeat): p.stepSimulation() - if self._render: + if self._renders: time.sleep(self._timeStep) self._observation = self.getExtendedObservation() diff --git a/examples/pybullet/gym/racecarGymEnvTest.py b/examples/pybullet/gym/racecarGymEnvTest.py index 5e96cbcc2..6be102dc3 100644 --- a/examples/pybullet/gym/racecarGymEnvTest.py +++ b/examples/pybullet/gym/racecarGymEnvTest.py @@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0) while (True): targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) steeringAngle = environment._p.readUserDebugParameter(steeringSlider) - - action=[targetVelocity,steeringAngle] + discreteAction = 0 + if (targetVelocity<-0.33): + discreteAction=0 + else: + if (targetVelocity>0.33): + discreteAction=6 + else: + discreteAction=3 + if (steeringAngle>-0.17): + if (steeringAngle>0.17): + discreteAction=discreteAction+2 + else: + discreteAction=discreteAction+1 + + action=discreteAction state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + print("obs") + print(obs) diff --git a/examples/pybullet/gym/train_pybullet_racecar.py b/examples/pybullet/gym/train_pybullet_racecar.py index 3ce064d8d..face83e11 100644 --- a/examples/pybullet/gym/train_pybullet_racecar.py +++ b/examples/pybullet/gym/train_pybullet_racecar.py @@ -9,23 +9,21 @@ import datetime def callback(lcl, glb): # stop training if reward exceeds 199 - is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199 - #uniq_filename = "racecar_model" + str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.') - #print("uniq_filename=") - #print(uniq_filename) - #act.save(uniq_filename) + total = sum(lcl['episode_rewards'][-101:-1]) / 100 + totalt = lcl['t'] + is_solved = totalt > 2000 and total >= -50 return is_solved def main(): - env = RacecarGymEnv(render=False) + env = RacecarGymEnv(renders=False) model = deepq.models.mlp([64]) act = deepq.learn( env, q_func=model, lr=1e-3, - max_timesteps=10000000, + max_timesteps=10000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02,