allow continuous control for MIT racecar gym environment, use differential drive version

2017-08-23 23:12:26 -07:00
parent 8a4f51baa4
commit f0c32b84c0
26 changed files with 374 additions and 130 deletions
--- a/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/bullet_client.py
@@ -22,7 +22,7 @@ class BulletClient(object):
    """Inject the client id into Bullet functions."""
    attribute = getattr(pybullet, name)
    if inspect.isbuiltin(attribute):
-      if name not in ["invertTransform", "multiplyTransforms",
+      if name not in ["invertTransform", "computeViewMatrix","multiplyTransforms",
                      "getMatrixFromQuaternion"]:  # A temporary hack for now.
        attribute = functools.partial(attribute, physicsClientId=self._client)
    return attribute
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur.py
@@ -4,7 +4,7 @@
 import copy
 import math
 import numpy as np
-import motor
+from . import motor
 import os

 INIT_POSITION = [0, 0, .2]
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -8,8 +8,8 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import pybullet
-import bullet_client
-import minitaur
+from . import bullet_client
+from . import minitaur
 import os

 NUM_SUBSTEPS = 5
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecar.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecar.py
@@ -5,54 +5,79 @@ import math

 class Racecar:

-  def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
-    self.urdfRootPath = urdfRootPath
-    self.timeStep = timeStep
-    self._p = bullet_client
-    self.reset()
+	def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
+		self.urdfRootPath = urdfRootPath
+		self.timeStep = timeStep
+		self._p = bullet_client
+		self.reset()

-  def reset(self):
-    self.racecarUniqueId = self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","racecar/racecar.urdf"), [0,0,.2])
-    self.maxForce = 20
-    self.nMotors = 2
-    self.motorizedwheels=[2]
-    self.inactiveWheels = [3,5,7]
-    for wheel in self.inactiveWheels:
-      self._p.setJointMotorControl2(self.racecarUniqueId,wheel,self._p.VELOCITY_CONTROL,targetVelocity=0,force=0)
+	def reset(self):  	
+		car = self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","racecar/racecar_differential.urdf"), [0,0,.2],useFixedBase=False)
+		self.racecarUniqueId = car
+		#for i in range (self._p.getNumJoints(car)):
+		#	print (self._p.getJointInfo(car,i))
+		for wheel in range(self._p.getNumJoints(car)):
+				self._p.setJointMotorControl2(car,wheel,self._p.VELOCITY_CONTROL,targetVelocity=0,force=0)
+				self._p.getJointInfo(car,wheel)	

-    self.motorizedWheels = [2]
-    self.steeringLinks=[4,6]
-    self.speedMultiplier = 4.
-    
+		#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
+		c = self._p.createConstraint(car,9,car,11,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=1, maxForce=10000)

-  def getActionDimension(self):
-    return self.nMotors
+		c = self._p.createConstraint(car,10,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)

-  def getObservationDimension(self):
-    return len(self.getObservation())
+		c = self._p.createConstraint(car,9,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)

-  def getObservation(self):
-    observation = []
-    pos,orn=self._p.getBasePositionAndOrientation(self.racecarUniqueId)
-    
-    observation.extend(list(pos))
-    observation.extend(list(orn))
-    
-    return observation
+		c = self._p.createConstraint(car,16,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
+
+		c = self._p.createConstraint(car,16,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+
+		c = self._p.createConstraint(car,17,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
+
+		c = self._p.createConstraint(car,1,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15, maxForce=10000)
+		c = self._p.createConstraint(car,3,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
+		self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15,maxForce=10000)
+
+		self.steeringLinks = [0,2]
+		self.maxForce = 20
+		self.nMotors = 2
+		self.motorizedwheels=[8,15]
+		self.speedMultiplier = 20.
+		self.steeringMultiplier = 0.5
+
+	def getActionDimension(self):
+		return self.nMotors
+
+	def getObservationDimension(self):
+		return len(self.getObservation())
+
+	def getObservation(self):
+		observation = []
+		pos,orn=self._p.getBasePositionAndOrientation(self.racecarUniqueId)
+
+		observation.extend(list(pos))
+		observation.extend(list(orn))
+
+		return observation
+
+	def applyAction(self, motorCommands):
+		targetVelocity=motorCommands[0]*self.speedMultiplier
+		#print("targetVelocity")
+		#print(targetVelocity)
+		steeringAngle = motorCommands[1]*self.steeringMultiplier
+		#print("steeringAngle")
+		#print(steeringAngle)
+		#print("maxForce")
+		#print(self.maxForce)
+
+		for motor in self.motorizedwheels:
+			self._p.setJointMotorControl2(self.racecarUniqueId,motor,self._p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)
+		for steer in self.steeringLinks:
+			self._p.setJointMotorControl2(self.racecarUniqueId,steer,self._p.POSITION_CONTROL,targetPosition=steeringAngle)

-  def applyAction(self, motorCommands):
-    targetVelocity=motorCommands[0]*self.speedMultiplier
-    #print("targetVelocity")
-    #print(targetVelocity)
-    steeringAngle = motorCommands[1]
-    #print("steeringAngle")
-    #print(steeringAngle)
-    #print("maxForce")
-    #print(self.maxForce)
-    
-    
-    for motor in self.motorizedwheels:
-      self._p.setJointMotorControl2(self.racecarUniqueId,motor,self._p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)    
-    for steer in self.steeringLinks:
-      self._p.setJointMotorControl2(self.racecarUniqueId,steer,self._p.POSITION_CONTROL,targetPosition=steeringAngle)
-  
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -8,7 +8,7 @@ import time
 import pybullet
 from . import racecar
 import random
-import bullet_client
+from . import bullet_client

 class RacecarGymEnv(gym.Env):
  metadata = {
@@ -20,6 +20,7 @@ class RacecarGymEnv(gym.Env):
               urdfRoot="",
               actionRepeat=50,
               isEnableSelfCollision=True,
+               isDiscrete=False,
               renders=False):
    print("init")
    self._timeStep = 0.01
@@ -30,6 +31,7 @@ class RacecarGymEnv(gym.Env):
    self._ballUniqueId = -1
    self._envStepCounter = 0
    self._renders = renders
+    self._isDiscrete = isDiscrete
    if self._renders:
      self._p = bullet_client.BulletClient(
          connection_mode=pybullet.GUI)
@@ -42,7 +44,13 @@ class RacecarGymEnv(gym.Env):
    #print("observationDim")
    #print(observationDim) 
    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    self.action_space = spaces.Discrete(9)
+    if (isDiscrete):
+      self.action_space = spaces.Discrete(9)
+    else:
+       action_dim = 2
+       self._action_bound = 1
+       action_high = np.array([self._action_bound] * action_dim)
+       self.action_space = spaces.Box(-action_high, action_high) 
    self.observation_space = spaces.Box(-observation_high, observation_high)
    self.viewer = None

@@ -53,10 +61,10 @@ class RacecarGymEnv(gym.Env):
    #self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
    stadiumobjects = self._p.loadSDF(os.path.join(os.path.dirname(__file__),"../data","stadium.sdf"))
    #move the stadium objects slightly above 0
-    for i in stadiumobjects:
-    	pos,orn = self._p.getBasePositionAndOrientation(i)
-    	newpos = [pos[0],pos[1],pos[2]+0.1]
-    	self._p.resetBasePositionAndOrientation(i,newpos,orn)
+    #for i in stadiumobjects:
+    #	pos,orn = self._p.getBasePositionAndOrientation(i)
+    #	newpos = [pos[0],pos[1],pos[2]-0.1]
+    #	self._p.resetBasePositionAndOrientation(i,newpos,orn)
    
    dist = 5 +2.*random.random()
    ang = 2.*3.1415925438*random.random()
@@ -96,11 +104,15 @@ class RacecarGymEnv(gym.Env):
      basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
      #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
    
-    fwd = [-5,-5,-5,0,0,0,5,5,5]
-    steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3]
-    forward = fwd[action]
-    steer = steerings[action]
-    realaction = [forward,steer]
+    if (self._isDiscrete):
+	    fwd = [-1,-1,-1,0,0,0,1,1,1]
+	    steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
+	    forward = fwd[action]
+	    steer = steerings[action]
+	    realaction = [forward,steer]
+    else:
+      realaction = action
+
    self._racecar.applyAction(realaction)
    for i in range(self._actionRepeat):
      self._p.stepSimulation()
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -5,7 +5,8 @@ from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import time
-import pybullet as p
+import pybullet
+from . import bullet_client
 from . import racecar
 import random

@@ -17,8 +18,9 @@ class RacecarZEDGymEnv(gym.Env):

  def __init__(self,
               urdfRoot="",
-               actionRepeat=100,
+               actionRepeat=10,
               isEnableSelfCollision=True,
+               isDiscrete=True,
               renders=True):
    print("init")
    self._timeStep = 0.01
@@ -30,11 +32,14 @@ class RacecarZEDGymEnv(gym.Env):
    self._renders = renders
    self._width = 100
    self._height = 10
-    self._p = p
+    
+    self._isDiscrete = isDiscrete
    if self._renders:
-      p.connect(p.GUI)
+      self._p = bullet_client.BulletClient(
+          connection_mode=pybullet.GUI)
    else:
-      p.connect(p.DIRECT)
+      self._p = bullet_client.BulletClient()
+
    self._seed()
    self.reset()
    observationDim = len(self.getExtendedObservation())
@@ -42,22 +47,22 @@ class RacecarZEDGymEnv(gym.Env):
    #print(observationDim)
    
    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    self.action_space = spaces.Discrete(6)
+    self.action_space = spaces.Discrete(9)
    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))

    self.viewer = None

  def _reset(self):
-    p.resetSimulation()
+    self._p.resetSimulation()
    #p.setPhysicsEngineParameter(numSolverIterations=300)
-    p.setTimeStep(self._timeStep)
-    #p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
-    stadiumobjects = p.loadSDF(os.path.join(os.path.dirname(__file__),"../data","stadium.sdf"))
+    self._p.setTimeStep(self._timeStep)
+    #self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
+    stadiumobjects = self._p.loadSDF(os.path.join(os.path.dirname(__file__),"../data","stadium.sdf"))
    #move the stadium objects slightly above 0
    for i in stadiumobjects:
-      pos,orn = p.getBasePositionAndOrientation(i)
+      pos,orn = self._p.getBasePositionAndOrientation(i)
      newpos = [pos[0],pos[1],pos[2]+0.1]
-      p.resetBasePositionAndOrientation(i,newpos,orn)
+      self._p.resetBasePositionAndOrientation(i,newpos,orn)
    
    dist = 5 +2.*random.random()
    ang = 2.*3.1415925438*random.random()
@@ -66,39 +71,39 @@ class RacecarZEDGymEnv(gym.Env):
    bally = dist * math.cos(ang)
    ballz = 1
    
-    self._ballUniqueId = p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","sphere2red.urdf"),[ballx,bally,ballz])
-    p.setGravity(0,0,-10)
-    self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
+    self._ballUniqueId = self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","sphere2red.urdf"),[ballx,bally,ballz])
+    self._p.setGravity(0,0,-10)
+    self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    for i in range(100):
-      p.stepSimulation()
+      self._p.stepSimulation()
    self._observation = self.getExtendedObservation()
    return np.array(self._observation)

  def __del__(self):
-    p.disconnect()
+    self._p = 0

  def _seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def getExtendedObservation(self):
-     carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-     carmat = p.getMatrixFromQuaternion(carorn)
-     ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)      
-     invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
-     ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
+     carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+     carmat = self._p.getMatrixFromQuaternion(carorn)
+     ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)      
+     invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
+     ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
     dist0 = 0.3
     dist1 = 1.
     eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3]
     targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3]      
     up = [carmat[2],carmat[5],carmat[8]]
-     viewMat = p.computeViewMatrix(eyePos,targetPos,up)
-     #viewMat = p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
+     viewMat = self._p.computeViewMatrix(eyePos,targetPos,up)
+     #viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
     #print("projectionMatrix:")
-     #print(p.getDebugVisualizerCamera()[3])
+     #print(self._p.getDebugVisualizerCamera()[3])
     projMatrix = [0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
-     img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
+     img_arr = self._p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
     rgb=img_arr[2]
     np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
     self._observation = np_img_arr
@@ -106,17 +111,18 @@ class RacecarZEDGymEnv(gym.Env):
     
  def _step(self, action):
    if (self._renders):
-      basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-      #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
+      basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+      #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
    
-    fwd = [5,0,5,10,10,10]
-    steerings = [-0.5,0,0.5,-0.3,0,0.3]
+    fwd = [-1,-1,-1,0,0,0,1,1,1]
+    steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
    forward = fwd[action]
    steer = steerings[action]
    realaction = [forward,steer]
+
    self._racecar.applyAction(realaction)
    for i in range(self._actionRepeat):
-      p.stepSimulation()
+      self._p.stepSimulation()
      if self._renders:
        time.sleep(self._timeStep)
      self._observation = self.getExtendedObservation()
@@ -137,7 +143,7 @@ class RacecarZEDGymEnv(gym.Env):
    return self._envStepCounter>1000
    
  def _reward(self):
-    closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
+    closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
    
    numPt = len(closestPoints)
    reward=-1000