allow continuous control for MIT racecar gym environment, use differential drive version

2017-08-23 23:12:26 -07:00
parent 8a4f51baa4
commit f0c32b84c0
26 changed files with 374 additions and 130 deletions
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -5,7 +5,8 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import time
-import pybullet as p
+import pybullet
+from . import bullet_client
 from . import racecar
 import random

@@ -17,8 +18,9 @@ class RacecarZEDGymEnv(gym.Env):

  def __init__(self,
               urdfRoot="",
-               actionRepeat=100,
+               actionRepeat=10,
               isEnableSelfCollision=True,
+               isDiscrete=True,
               renders=True):
    print("init")
    self._timeStep = 0.01
@@ -30,11 +32,14 @@ class RacecarZEDGymEnv(gym.Env):
    self._renders = renders
    self._width = 100
    self._height = 10
-    self._p = p
+    
+    self._isDiscrete = isDiscrete
    if self._renders:
-      p.connect(p.GUI)
+      self._p = bullet_client.BulletClient(
+          connection_mode=pybullet.GUI)
    else:
-      p.connect(p.DIRECT)
+      self._p = bullet_client.BulletClient()
+
    self._seed()
    self.reset()
    observationDim = len(self.getExtendedObservation())
@@ -42,22 +47,22 @@ class RacecarZEDGymEnv(gym.Env):
    #print(observationDim)
    
    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    self.action_space = spaces.Discrete(6)
+    self.action_space = spaces.Discrete(9)
    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))

    self.viewer = None

  def _reset(self):
-    p.resetSimulation()
+    self._p.resetSimulation()
    #p.setPhysicsEngineParameter(numSolverIterations=300)
-    p.setTimeStep(self._timeStep)
-    #p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
-    stadiumobjects = p.loadSDF(os.path.join(os.path.dirname(__file__),"../data","stadium.sdf"))
+    self._p.setTimeStep(self._timeStep)
+    #self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
+    stadiumobjects = self._p.loadSDF(os.path.join(os.path.dirname(__file__),"../data","stadium.sdf"))
    #move the stadium objects slightly above 0
    for i in stadiumobjects:
-      pos,orn = p.getBasePositionAndOrientation(i)
+      pos,orn = self._p.getBasePositionAndOrientation(i)
      newpos = [pos[0],pos[1],pos[2]+0.1]
-      p.resetBasePositionAndOrientation(i,newpos,orn)
+      self._p.resetBasePositionAndOrientation(i,newpos,orn)
    
    dist = 5 +2.*random.random()
    ang = 2.*3.1415925438*random.random()
@@ -66,39 +71,39 @@ class RacecarZEDGymEnv(gym.Env):
    bally = dist * math.cos(ang)
    ballz = 1
    
-    self._ballUniqueId = p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","sphere2red.urdf"),[ballx,bally,ballz])
-    p.setGravity(0,0,-10)
-    self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
+    self._ballUniqueId = self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","sphere2red.urdf"),[ballx,bally,ballz])
+    self._p.setGravity(0,0,-10)
+    self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    for i in range(100):
-      p.stepSimulation()
+      self._p.stepSimulation()
    self._observation = self.getExtendedObservation()
    return np.array(self._observation)

  def __del__(self):
-    p.disconnect()
+    self._p = 0

  def _seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def getExtendedObservation(self):
-     carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-     carmat = p.getMatrixFromQuaternion(carorn)
-     ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)      
-     invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
-     ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
+     carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+     carmat = self._p.getMatrixFromQuaternion(carorn)
+     ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)      
+     invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
+     ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
     dist0 = 0.3
     dist1 = 1.
     eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3]
     targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3]      
     up = [carmat[2],carmat[5],carmat[8]]
-     viewMat = p.computeViewMatrix(eyePos,targetPos,up)
-     #viewMat = p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
+     viewMat = self._p.computeViewMatrix(eyePos,targetPos,up)
+     #viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
     #print("projectionMatrix:")
-     #print(p.getDebugVisualizerCamera()[3])
+     #print(self._p.getDebugVisualizerCamera()[3])
     projMatrix = [0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
-     img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
+     img_arr = self._p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
     rgb=img_arr[2]
     np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
     self._observation = np_img_arr
@@ -106,17 +111,18 @@ class RacecarZEDGymEnv(gym.Env):
     
  def _step(self, action):
    if (self._renders):
-      basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-      #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
+      basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+      #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
    
-    fwd = [5,0,5,10,10,10]
-    steerings = [-0.5,0,0.5,-0.3,0,0.3]
+    fwd = [-1,-1,-1,0,0,0,1,1,1]
+    steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
    forward = fwd[action]
    steer = steerings[action]
    realaction = [forward,steer]
+
    self._racecar.applyAction(realaction)
    for i in range(self._actionRepeat):
-      p.stepSimulation()
+      self._p.stepSimulation()
      if self._renders:
        time.sleep(self._timeStep)
      self._observation = self.getExtendedObservation()
@@ -137,7 +143,7 @@ class RacecarZEDGymEnv(gym.Env):
    return self._envStepCounter>1000
    
  def _reward(self):
-    closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
+    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
    
    numPt = len(closestPoints)
    reward=-1000