From 82e3c553b9bfdc44fbeb48c18aeac90ac7616d24 Mon Sep 17 00:00:00 2001
From: Erwin Coumans <erwin.coumans@gmail.com>
Date: Fri, 9 Jun 2017 14:03:47 -0700
Subject: [PATCH 1/4] bump up pybullet version (some memory leaks were fixed
 since 1.1.3)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 235396713..bb86bdd4b 100644
--- a/setup.py
+++ b/setup.py
@@ -419,7 +419,7 @@ else:
 
 setup(
 	name = 'pybullet',
-	version='1.1.3',
+	version='1.1.4',
 	description='Official Python Interface for the Bullet Physics SDK Robotics Simulator',
 	long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
 	url='https://github.com/bulletphysics/bullet3',

From b361722500d05a53eaacdc3e5ded402658713338 Mon Sep 17 00:00:00 2001
From: Erwin Coumans <erwin.coumans@gmail.com>
Date: Fri, 9 Jun 2017 19:26:07 -0700
Subject: [PATCH 2/4] Implement train_pybullet_racecar.py and
 enjoy_pybullet_racecar.py using OpenAI baselines DQN for the RacecarGymEnv.

---
 .../pybullet/gym/enjoy_pybullet_racecar.py    | 45 +++++++++++++
 examples/pybullet/gym/envs/bullet/racecar.py  | 21 +++---
 .../pybullet/gym/envs/bullet/racecarGymEnv.py | 65 ++++++++++++-------
 .../pybullet/gym/train_pybullet_racecar.py    | 40 ++++++++++++
 4 files changed, 138 insertions(+), 33 deletions(-)
 create mode 100644 examples/pybullet/gym/enjoy_pybullet_racecar.py
 create mode 100644 examples/pybullet/gym/train_pybullet_racecar.py

diff --git a/examples/pybullet/gym/enjoy_pybullet_racecar.py b/examples/pybullet/gym/enjoy_pybullet_racecar.py
new file mode 100644
index 000000000..b2c46337a
--- /dev/null
+++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py
@@ -0,0 +1,45 @@
+import gym
+from envs.bullet.racecarGymEnv import RacecarGymEnv
+
+from baselines import deepq
+
+
+def main():
+    
+    env = RacecarGymEnv(render=True)
+    act = deepq.load("racecar_model.pkl")
+    print(act)
+    while True:
+        obs, done = env.reset(), False
+        print("===================================")        
+        print("obs")
+        print(obs)
+        episode_rew = 0
+        while not done:
+            #env.render()
+
+            print("!!!!!!!!!!!!!!!!!!!!!!!!!!")
+            print("obs")
+            print(obs)
+            print("???????????????????????????")
+            print("obs[None]")
+            print(obs[None])
+            o = obs[None]
+            print("o")
+            print(o)
+            aa = act(o)
+            print("aa")
+            print (aa)
+            a = aa[0]
+            print("a")
+            print(a)
+            obs, rew, done, _ = env.step(a)
+            print("===================================")
+            print("obs")
+            print(obs)
+            episode_rew += rew
+        print("Episode reward", episode_rew)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py
index 3ceb923f5..9737ede90 100644
--- a/examples/pybullet/gym/envs/bullet/racecar.py
+++ b/examples/pybullet/gym/envs/bullet/racecar.py
@@ -22,7 +22,7 @@ class Racecar:
     self.motorizedWheels = [2]
     self.steeringLinks=[4,6]
     self.speedMultiplier = 10.
-		
+    
 
   def getActionDimension(self):
     return self.nMotors
@@ -33,22 +33,25 @@ class Racecar:
   def getObservation(self):
     observation = []
     pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId)
+    
     observation.extend(list(pos))
+    observation.extend(list(orn))
+    
     return observation
 
   def applyAction(self, motorCommands):
     targetVelocity=motorCommands[0]*self.speedMultiplier
-    print("targetVelocity")
-    print(targetVelocity)
+    #print("targetVelocity")
+    #print(targetVelocity)
     steeringAngle = motorCommands[1]
-    print("steeringAngle")
-    print(steeringAngle)
-    print("maxForce")
-    print(self.maxForce)
+    #print("steeringAngle")
+    #print(steeringAngle)
+    #print("maxForce")
+    #print(self.maxForce)
     
     
     for motor in self.motorizedwheels:
-	    p.setJointMotorControl2(self.racecarUniqueId,motor,p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)		
+      p.setJointMotorControl2(self.racecarUniqueId,motor,p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)    
     for steer in self.steeringLinks:
       p.setJointMotorControl2(self.racecarUniqueId,steer,p.POSITION_CONTROL,targetPosition=steeringAngle)
-	
\ No newline at end of file
+  
\ No newline at end of file
diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
index 87c2d0be5..6e12bdbde 100644
--- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
@@ -6,6 +6,7 @@ import numpy as np
 import time
 import pybullet as p
 from . import racecar
+import random
 
 class RacecarGymEnv(gym.Env):
   metadata = {
@@ -34,11 +35,12 @@ class RacecarGymEnv(gym.Env):
       p.connect(p.DIRECT)
     self._seed()
     self.reset()
-    observationDim = self._racecar.getObservationDimension()
-    observation_high = np.array([np.finfo(np.float32).max] * observationDim)
-    actionDim = 8
-    action_high = np.array([1] * actionDim)
-    self.action_space = spaces.Box(-action_high, action_high)
+    observationDim = len(self.getExtendedObservation())
+    #print("observationDim")
+    #print(observationDim)
+    
+    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
+    self.action_space = spaces.Discrete(9)
     self.observation_space = spaces.Box(-observation_high, observation_high)
     self.viewer = None
 
@@ -49,14 +51,21 @@ class RacecarGymEnv(gym.Env):
     #p.loadURDF("%splane.urdf" % self._urdfRoot)
     p.loadSDF("%sstadium.sdf" % self._urdfRoot)
     
-    self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1])
+    dist = 1.+10.*random.random()
+    ang = 2.*3.1415925438*random.random()
+    
+    ballx = dist * math.sin(ang)
+    bally = dist * math.cos(ang)
+    ballz = 1
+    
+    self._ballUniqueId = p.loadURDF("sphere2.urdf",[ballx,bally,ballz])
     p.setGravity(0,0,-10)
     self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
     self._envStepCounter = 0
     for i in range(100):
       p.stepSimulation()
-    self._observation = self._racecar.getObservation()
-    return self._observation
+    self._observation = self.getExtendedObservation()
+    return np.array(self._observation)
 
   def __del__(self):
     p.disconnect()
@@ -65,44 +74,52 @@ class RacecarGymEnv(gym.Env):
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
 
+  def getExtendedObservation(self):
+     self._observation = self._racecar.getObservation()
+     pos,orn = p.getBasePositionAndOrientation(self._ballUniqueId)      
+     self._observation.extend(list(pos))
+     return self._observation
+     
   def _step(self, action):
     if (self._render):
       basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-      p.resetDebugVisualizerCamera(1, 30, -40, basePos)
-
-    if len(action) != self._racecar.getActionDimension():
-      raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action)))
-
-    for i in range(len(action)):
-      if not -1.01 <= action[i] <= 1.01:
-        raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i]))
-
-    self._racecar.applyAction(action)
+      #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
+    
+    fwd = [-1,-1,-1,0,0,0,1,1,1]
+    steerings = [-0.5,0,0.5,-0.5,0,0.5,-0.5,0,0.5]
+    forward = fwd[action]
+    steer = steerings[action]
+    realaction = [forward,steer]
+    self._racecar.applyAction(realaction)
     for i in range(self._actionRepeat):
       p.stepSimulation()
       if self._render:
         time.sleep(self._timeStep)
-      self._observation = self._racecar.getObservation()
+      self._observation = self.getExtendedObservation()
+      
       if self._termination():
         break
       self._envStepCounter += 1
     reward = self._reward()
     done = self._termination()
+    #print("len=%r" % len(self._observation))
+    
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
       return
 
   def _termination(self):
-    return False
+    return self._envStepCounter>1000
     
   def _reward(self):
     closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
+    
     numPt = len(closestPoints)
     reward=-1000
-    print(numPt)
+    #print(numPt)
     if (numPt>0):
-      print("reward:")
-      reward = closestPoints[0][8]
-      print(reward)
+      #print("reward:")
+      reward = -closestPoints[0][8]
+      #print(reward)
     return reward
diff --git a/examples/pybullet/gym/train_pybullet_racecar.py b/examples/pybullet/gym/train_pybullet_racecar.py
new file mode 100644
index 000000000..3ce064d8d
--- /dev/null
+++ b/examples/pybullet/gym/train_pybullet_racecar.py
@@ -0,0 +1,40 @@
+import gym
+from envs.bullet.racecarGymEnv import RacecarGymEnv
+
+from baselines import deepq
+
+import datetime
+
+
+
+def callback(lcl, glb):
+    # stop training if reward exceeds 199
+    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
+    #uniq_filename = "racecar_model" + str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.')
+    #print("uniq_filename=")
+    #print(uniq_filename)
+    #act.save(uniq_filename)
+    return is_solved
+
+
+def main():
+  
+    env = RacecarGymEnv(render=False)
+    model = deepq.models.mlp([64])
+    act = deepq.learn(
+        env,
+        q_func=model,
+        lr=1e-3,
+        max_timesteps=10000000,
+        buffer_size=50000,
+        exploration_fraction=0.1,
+        exploration_final_eps=0.02,
+        print_freq=10,
+        callback=callback
+    )
+    print("Saving model to racecar_model.pkl")
+    act.save("racecar_model.pkl")
+
+
+if __name__ == '__main__':
+    main()

From 4a7469a1ba2cca9be9fdbc20cd75e8e3bc0c580c Mon Sep 17 00:00:00 2001
From: Erwin Coumans <erwincoumans@google.com>
Date: Fri, 9 Jun 2017 21:19:02 -0700
Subject: [PATCH 3/4] fix uninitialized-variable issues (were unused
 initialized, but triggers some memory checking tools)

---
 examples/SharedMemory/PhysicsDirect.cpp                    | 6 ++++++
 examples/SharedMemory/TinyRendererVisualShapeConverter.cpp | 2 +-
 src/BulletCollision/CollisionShapes/btSphereShape.h        | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/examples/SharedMemory/PhysicsDirect.cpp b/examples/SharedMemory/PhysicsDirect.cpp
index ed8376c9f..e49784d47 100644
--- a/examples/SharedMemory/PhysicsDirect.cpp
+++ b/examples/SharedMemory/PhysicsDirect.cpp
@@ -64,9 +64,15 @@ struct PhysicsDirectInternalData
 	PhysicsDirectInternalData()
 		:m_hasStatus(false),
 		m_verboseOutput(false),
+		m_cachedCameraPixelsWidth(0),
+		m_cachedCameraPixelsHeight(0),
+		m_commandProcessor(NULL),
 		m_ownsCommandProcessor(false),
 		m_timeOutInSeconds(1e30)
 	{
+		memset(&m_command, 0, sizeof(m_command));
+		memset(&m_serverStatus, 0, sizeof(m_serverStatus));
+		memset(m_bulletStreamDataServerToClient, 0, sizeof(m_bulletStreamDataServerToClient));
 	}
 };
 
diff --git a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
index f076d301e..143c931ce 100644
--- a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
+++ b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
@@ -177,7 +177,7 @@ void convertURDFToVisualShape(const UrdfShape* visual, const char* urdfPathPrefi
 	visualShapeOut.m_dimensions[0] = 0;
 	visualShapeOut.m_dimensions[1] = 0;
 	visualShapeOut.m_dimensions[2] = 0;
-	visualShapeOut.m_meshAssetFileName[0] = 0;
+	memset(visualShapeOut.m_meshAssetFileName, 0, sizeof(visualShapeOut.m_meshAssetFileName));
 	if (visual->m_geometry.m_hasLocalMaterial) {
 		visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0];
 		visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1];
diff --git a/src/BulletCollision/CollisionShapes/btSphereShape.h b/src/BulletCollision/CollisionShapes/btSphereShape.h
index b192efeeb..50561f7f5 100644
--- a/src/BulletCollision/CollisionShapes/btSphereShape.h
+++ b/src/BulletCollision/CollisionShapes/btSphereShape.h
@@ -29,8 +29,11 @@ public:
 	btSphereShape (btScalar radius) : btConvexInternalShape ()
 	{
 		m_shapeType = SPHERE_SHAPE_PROXYTYPE;
+		m_localScaling.setValue(1.0, 1.0, 1.0);
+		m_implicitShapeDimensions.setZero();
 		m_implicitShapeDimensions.setX(radius);
 		m_collisionMargin = radius;
+		m_padding = 0;
 	}
 	
 	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;

From 1752aa55ca0dc41e2bb65a96900c67bc95345225 Mon Sep 17 00:00:00 2001
From: erwincoumans <erwin.coumans@gmail.com>
Date: Sat, 10 Jun 2017 18:46:36 -0700
Subject: [PATCH 4/4] train_pybullet_racecar.py works, self-driving car drives
 towards the ball using OpenAI baselines DQN :-) See
 https://www.youtube.com/watch?v=DZ5Px-ocelw for video and how-to-install.

---
 .../pybullet/gym/enjoy_pybullet_racecar.py    | 25 ++-----------
 examples/pybullet/gym/envs/bullet/racecar.py  |  4 +--
 .../pybullet/gym/envs/bullet/racecarGymEnv.py | 35 ++++++++++++-------
 examples/pybullet/gym/racecarGymEnvTest.py    | 20 +++++++++--
 .../pybullet/gym/train_pybullet_racecar.py    | 12 +++----
 5 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/examples/pybullet/gym/enjoy_pybullet_racecar.py b/examples/pybullet/gym/enjoy_pybullet_racecar.py
index b2c46337a..bc965f32d 100644
--- a/examples/pybullet/gym/enjoy_pybullet_racecar.py
+++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py
@@ -6,7 +6,7 @@ from baselines import deepq
 
 def main():
     
-    env = RacecarGymEnv(render=True)
+    env = RacecarGymEnv(renders=True)
     act = deepq.load("racecar_model.pkl")
     print(act)
     while True:
@@ -16,27 +16,8 @@ def main():
         print(obs)
         episode_rew = 0
         while not done:
-            #env.render()
-
-            print("!!!!!!!!!!!!!!!!!!!!!!!!!!")
-            print("obs")
-            print(obs)
-            print("???????????????????????????")
-            print("obs[None]")
-            print(obs[None])
-            o = obs[None]
-            print("o")
-            print(o)
-            aa = act(o)
-            print("aa")
-            print (aa)
-            a = aa[0]
-            print("a")
-            print(a)
-            obs, rew, done, _ = env.step(a)
-            print("===================================")
-            print("obs")
-            print(obs)
+            env.render()
+            obs, rew, done, _ = env.step(act(obs[None])[0])
             episode_rew += rew
         print("Episode reward", episode_rew)
 
diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py
index 9737ede90..96db73c78 100644
--- a/examples/pybullet/gym/envs/bullet/racecar.py
+++ b/examples/pybullet/gym/envs/bullet/racecar.py
@@ -12,7 +12,7 @@ class Racecar:
 
   def reset(self):
     self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2])
-    self.maxForce = 10
+    self.maxForce = 20
     self.nMotors = 2
     self.motorizedwheels=[2]
     self.inactiveWheels = [3,5,7]
@@ -21,7 +21,7 @@ class Racecar:
 
     self.motorizedWheels = [2]
     self.steeringLinks=[4,6]
-    self.speedMultiplier = 10.
+    self.speedMultiplier = 4.
     
 
   def getActionDimension(self):
diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
index 6e12bdbde..930f9bf88 100644
--- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
@@ -16,9 +16,9 @@ class RacecarGymEnv(gym.Env):
 
   def __init__(self,
                urdfRoot="",
-               actionRepeat=1,
+               actionRepeat=50,
                isEnableSelfCollision=True,
-               render=True):
+               renders=True):
     print("init")
     self._timeStep = 0.01
     self._urdfRoot = urdfRoot
@@ -27,9 +27,9 @@ class RacecarGymEnv(gym.Env):
     self._observation = []
     self._ballUniqueId = -1
     self._envStepCounter = 0
-    self._render = render
+    self._renders = renders
     self._p = p
-    if self._render:
+    if self._renders:
       p.connect(p.GUI)
     else:
       p.connect(p.DIRECT)
@@ -49,9 +49,14 @@ class RacecarGymEnv(gym.Env):
     #p.setPhysicsEngineParameter(numSolverIterations=300)
     p.setTimeStep(self._timeStep)
     #p.loadURDF("%splane.urdf" % self._urdfRoot)
-    p.loadSDF("%sstadium.sdf" % self._urdfRoot)
+    stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot)
+    #move the stadium objects slightly above 0
+    for i in stadiumobjects:
+    	pos,orn = p.getBasePositionAndOrientation(i)
+    	newpos = [pos[0],pos[1],pos[2]+0.1]
+    	p.resetBasePositionAndOrientation(i,newpos,orn)
     
-    dist = 1.+10.*random.random()
+    dist = 5 +2.*random.random()
     ang = 2.*3.1415925438*random.random()
     
     ballx = dist * math.sin(ang)
@@ -75,25 +80,29 @@ class RacecarGymEnv(gym.Env):
     return [seed]
 
   def getExtendedObservation(self):
-     self._observation = self._racecar.getObservation()
-     pos,orn = p.getBasePositionAndOrientation(self._ballUniqueId)      
-     self._observation.extend(list(pos))
+     self._observation = [] #self._racecar.getObservation()
+     carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+     ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)      
+     invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
+     ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
+    
+     self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
      return self._observation
      
   def _step(self, action):
-    if (self._render):
+    if (self._renders):
       basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
       #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
     
-    fwd = [-1,-1,-1,0,0,0,1,1,1]
-    steerings = [-0.5,0,0.5,-0.5,0,0.5,-0.5,0,0.5]
+    fwd = [-5,-5,-5,0,0,0,5,5,5]
+    steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3]
     forward = fwd[action]
     steer = steerings[action]
     realaction = [forward,steer]
     self._racecar.applyAction(realaction)
     for i in range(self._actionRepeat):
       p.stepSimulation()
-      if self._render:
+      if self._renders:
         time.sleep(self._timeStep)
       self._observation = self.getExtendedObservation()
       
diff --git a/examples/pybullet/gym/racecarGymEnvTest.py b/examples/pybullet/gym/racecarGymEnvTest.py
index 5e96cbcc2..6be102dc3 100644
--- a/examples/pybullet/gym/racecarGymEnvTest.py
+++ b/examples/pybullet/gym/racecarGymEnvTest.py
@@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
 while (True):
   targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
   steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
-	
-  action=[targetVelocity,steeringAngle]
+  discreteAction = 0
+  if (targetVelocity<-0.33):
+    discreteAction=0
+  else:
+    if (targetVelocity>0.33):
+      discreteAction=6
+    else:
+      discreteAction=3
+  if (steeringAngle>-0.17):
+    if (steeringAngle>0.17):
+      discreteAction=discreteAction+2
+    else:
+      discreteAction=discreteAction+1
+    
+  action=discreteAction
   state, reward, done, info = environment.step(action)
+  obs = environment.getExtendedObservation()
+  print("obs")
+  print(obs)
diff --git a/examples/pybullet/gym/train_pybullet_racecar.py b/examples/pybullet/gym/train_pybullet_racecar.py
index 3ce064d8d..face83e11 100644
--- a/examples/pybullet/gym/train_pybullet_racecar.py
+++ b/examples/pybullet/gym/train_pybullet_racecar.py
@@ -9,23 +9,21 @@ import datetime
 
 def callback(lcl, glb):
     # stop training if reward exceeds 199
-    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
-    #uniq_filename = "racecar_model" + str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.')
-    #print("uniq_filename=")
-    #print(uniq_filename)
-    #act.save(uniq_filename)
+    total = sum(lcl['episode_rewards'][-101:-1]) / 100
+    totalt = lcl['t']
+    is_solved = totalt > 2000 and total >= -50
     return is_solved
 
 
 def main():
   
-    env = RacecarGymEnv(render=False)
+    env = RacecarGymEnv(renders=False)
     model = deepq.models.mlp([64])
     act = deepq.learn(
         env,
         q_func=model,
         lr=1e-3,
-        max_timesteps=10000000,
+        max_timesteps=10000,
         buffer_size=50000,
         exploration_fraction=0.1,
         exploration_final_eps=0.02,