fixes in kuka/racecar environment. kuka still doesn't train well, work-in-progress

2017-11-01 18:06:47 -07:00
parent e4f58ddc33
commit 4798d66f19
8 changed files with 218 additions and 65 deletions
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -34,7 +34,7 @@ def default():
  # General
  algorithm = ppo.PPOAlgorithm
  num_agents = 10
-  eval_episodes = 25
+  eval_episodes = 20
  use_gpu = False
  # Network
  network = networks.feed_forward_gaussian
@@ -47,7 +47,7 @@ def default():
  init_mean_factor = 0.05
  init_logstd = -1
  # Optimization
-  update_every = 25
+  update_every = 20
  policy_optimizer = 'AdamOptimizer'
  value_optimizer = 'AdamOptimizer'
  update_epochs_policy = 50
@@ -105,7 +105,7 @@ def pybullet_kuka_grasping():
  locals().update(default())
  # Environment
  env = 'KukaBulletEnv-v0'
-  max_length = 10
+  max_length = 1000
  steps = 1e7  # 10M
  return locals()
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -25,6 +25,7 @@ class Kuka:
    self.useNullSpace =21
    self.useOrientation = 1
    self.kukaEndEffectorIndex = 6
    self.kukaGripperIndex = 7
    #lower limits for null space
    self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
    #upper limits for null space
@@ -76,7 +77,7 @@ class Kuka:
  def getObservation(self):
    observation = []
-    state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
+    state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
    pos = state[0]
    orn = state[1]
    euler = p.getEulerFromQuaternion(orn)
@@ -106,13 +107,13 @@ class Kuka:
      self.endEffectorPos[0] = self.endEffectorPos[0]+dx
-      if (self.endEffectorPos[0]>0.75):
+      if (self.endEffectorPos[0]>0.65):
-        self.endEffectorPos[0]=0.75
+        self.endEffectorPos[0]=0.65
-      if (self.endEffectorPos[0]<0.45):
+      if (self.endEffectorPos[0]<0.50):
-        self.endEffectorPos[0]=0.45
+        self.endEffectorPos[0]=0.50
      self.endEffectorPos[1] = self.endEffectorPos[1]+dy
-      if (self.endEffectorPos[1]<-0.22):
+      if (self.endEffectorPos[1]<-0.17):
-        self.endEffectorPos[1]=-0.22
+        self.endEffectorPos[1]=-0.17
      if (self.endEffectorPos[1]>0.22):
        self.endEffectorPos[1]=0.22
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
@@ -16,6 +16,9 @@ import random
 import pybullet_data
 maxSteps = 1000
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 class KukaCamGymEnv(gym.Env):
  metadata = {
      'render.modes': ['human', 'rgb_array'],
@@ -24,9 +27,9 @@ class KukaCamGymEnv(gym.Env):
  def __init__(self,
               urdfRoot=pybullet_data.getDataPath(),
-               actionRepeat=25,
+               actionRepeat=1,
               isEnableSelfCollision=True,
-               renders=True,
+               renders=False,
               isDiscrete=False):
    self._timeStep = 1./240.
    self._urdfRoot = urdfRoot
@@ -163,7 +166,25 @@ class KukaCamGymEnv(gym.Env):
    return np.array(self._observation), reward, done, {}
  def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
      return np.array([])
    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=base_pos,
        distance=self._cam_dist,
        yaw=self._cam_yaw,
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
    proj_matrix = self._p.computeProjectionMatrixFOV(
        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
        nearVal=0.1, farVal=100.0)
    (_, _, px, _, _) = self._p.getCameraImage(
        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
  def _termination(self):
    #print (self._kuka.endEffectorPos[2])
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
@@ -14,8 +14,10 @@ from . import kuka
 import random
 import pybullet_data
-maxSteps = 1000
+largeValObservation = 100
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 class KukaGymEnv(gym.Env):
  metadata = {
@@ -27,8 +29,10 @@ class KukaGymEnv(gym.Env):
               urdfRoot=pybullet_data.getDataPath(),
               actionRepeat=1,
               isEnableSelfCollision=True,
-               renders=True,
+               renders=False,
-               isDiscrete=False):
+               isDiscrete=False,
               maxSteps = 1000):
    #print("KukaGymEnv __init__")
    self._isDiscrete = isDiscrete
    self._timeStep = 1./240.
    self._urdfRoot = urdfRoot
@@ -37,7 +41,12 @@ class KukaGymEnv(gym.Env):
    self._observation = []
    self._envStepCounter = 0
    self._renders = renders
    self._maxSteps = maxSteps
    self.terminated = 0
    self._cam_dist = 1.3
    self._cam_yaw = 180 
    self._cam_pitch = -40
    self._p = p
    if self._renders:
      cid = p.connect(p.SHARED_MEMORY)
@@ -53,7 +62,7 @@ class KukaGymEnv(gym.Env):
    #print("observationDim")
    #print(observationDim)
-    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
+    observation_high = np.array([largeValObservation] * observationDim)    
    if (self._isDiscrete):
      self.action_space = spaces.Discrete(7)
    else:
@@ -65,6 +74,7 @@ class KukaGymEnv(gym.Env):
    self.viewer = None
  def _reset(self):
    #print("KukaGymEnv _reset")
    self.terminated = 0
    p.resetSimulation()
    p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -73,11 +83,11 @@ class KukaGymEnv(gym.Env):
    p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
-    xpos = 0.5 +0.2*random.random()
+    xpos = 0.55 +0.12*random.random()
-    ypos = 0 +0.25*random.random()
+    ypos = 0 +0.2*random.random()
-    ang = 3.1415925438*random.random()
+    ang = 3.14*0.5+3.1415925438*random.random()
    orn = p.getQuaternionFromEuler([0,0,ang])
-    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
+    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
    p.setGravity(0,0,-10)
    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
@@ -95,57 +105,104 @@ class KukaGymEnv(gym.Env):
  def getExtendedObservation(self):
     self._observation = self._kuka.getObservation()
-     eeState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+     gripperState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
-     endEffectorPos = eeState[0]
+     gripperPos = gripperState[0]
-     endEffectorOrn = eeState[1]
+     gripperOrn = gripperState[1]
     blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
-     invEEPos,invEEOrn = p.invertTransform(endEffectorPos,endEffectorOrn)
+     invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
-     blockPosInEE,blockOrnInEE = p.multiplyTransforms(invEEPos,invEEOrn,blockPos,blockOrn)
+     gripperMat = p.getMatrixFromQuaternion(gripperOrn)
-     blockEulerInEE = p.getEulerFromQuaternion(blockOrnInEE)
+     dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
-     self._observation.extend(list(blockPosInEE))
+     dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
-     self._observation.extend(list(blockEulerInEE))
+     dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
-
+             
     gripperEul =  p.getEulerFromQuaternion(gripperOrn)
     #print("gripperEul")
     #print(gripperEul)
     blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
     projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
     blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
     #print("projectedBlockPos2D")
     #print(projectedBlockPos2D)
     #print("blockEulerInGripper")
     #print(blockEulerInGripper)
     #we return the relative x,y position and euler angle of block in gripper space
     blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
     self._observation.extend(list(blockInGripperPosXYEulZ))
     return self._observation
  def _step(self, action):
    if (self._isDiscrete):
-      dv = 0.01
+      dv = 0.005
      dx = [0,-dv,dv,0,0,0,0][action]
      dy = [0,0,0,-dv,dv,0,0][action]
-      da = [0,0,0,0,0,-0.1,0.1][action]
+      da = [0,0,0,0,0,-0.05,0.05][action]
      f = 0.3
      realAction = [dx,dy,-0.002,da,f]
    else:
-      dv = 0.01
+      #print("action[0]=", str(action[0]))
      dv = 0.005
      dx = action[0] * dv
      dy = action[1] * dv
-      da = action[2] * 0.1
+      da = action[2] * 0.05
      f = 0.3
      realAction = [dx,dy,-0.002,da,f]
    return self.step2( realAction)
  def step2(self, action):
    self._kuka.applyAction(action)
    for i in range(self._actionRepeat):
      self._kuka.applyAction(action)
      p.stepSimulation()
      if self._renders:
        time.sleep(self._timeStep)
      self._observation = self.getExtendedObservation()
      if self._termination():
        break
      self._envStepCounter += 1
    if self._renders:
      time.sleep(self._timeStep)
    self._observation = self.getExtendedObservation()
    #print("self._envStepCounter")
    #print(self._envStepCounter)
    done = self._termination()
-    reward = self._reward()
+    npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
    actionCost = np.linalg.norm(npaction)*10.
    #print("actionCost")
    #print(actionCost)
    reward = self._reward()-actionCost
    #print("reward")
    #print(reward)
    #print("len=%r" % len(self._observation))
    return np.array(self._observation), reward, done, {}
-  def _render(self, mode='human', close=False):
+  def _render(self, mode="rgb_array", close=False):
-      return
+    if mode != "rgb_array":
      return np.array([])
    base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=base_pos,
        distance=self._cam_dist,
        yaw=self._cam_yaw,
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
    proj_matrix = self._p.computeProjectionMatrixFOV(
        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
        nearVal=0.1, farVal=100.0)
    (_, _, px, _, _) = self._p.getCameraImage(
        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
        projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
  def _termination(self):
    #print (self._kuka.endEffectorPos[2])
@@ -154,7 +211,7 @@ class KukaGymEnv(gym.Env):
    #print("self._envStepCounter")
    #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>maxSteps):
+    if (self.terminated or self._envStepCounter>self._maxSteps):
      self._observation = self.getExtendedObservation()
      return True
    maxDist = 0.005 
@@ -163,7 +220,7 @@ class KukaGymEnv(gym.Env):
    if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
      self.terminated = 1
-      #print("closing gripper, attempting grasp")
+      #print("terminating, closing gripper, attempting grasp")
      #start grasp and terminate
      fingerAngle = 0.3
      for i in range (100):
@@ -199,18 +256,22 @@ class KukaGymEnv(gym.Env):
    blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) 
-    reward = -1000    
+    reward = -1000
    numPt = len(closestPoints)
    #print(numPt)
    if (numPt>0):
      #print("reward:")
      reward = -closestPoints[0][8]*10
    if (blockPos[2] >0.2):
-      #print("grasped a block!!!")
+      reward = reward+10000
      print("successfully grasped a block!!!")
      #print("self._envStepCounter")
      #print(self._envStepCounter)
-      reward = reward+1000
+      #print("self._envStepCounter")
-
+      #print(self._envStepCounter)
-    #print("reward")
+      #print("reward")
      #print(reward)
     #print("reward")
    #print(reward)
    return reward
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -15,6 +15,9 @@ import random
 from . import bullet_client
 import pybullet_data
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 class RacecarGymEnv(gym.Env):
  metadata = {
      'render.modes': ['human', 'rgb_array'],
@@ -136,20 +139,27 @@ class RacecarGymEnv(gym.Env):
    return np.array(self._observation), reward, done, {}
  def _render(self, mode='human', close=False):
-      width=320
+    if mode != "rgb_array":
-      height=200
+      return np.array([])
-      img_arr = self._p.getCameraImage(width,height)
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-      w=img_arr[0]
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
-      h=img_arr[1]
+        cameraTargetPosition=base_pos,
-      rgb=img_arr[2]
+        distance=self._cam_dist,
-      dep=img_arr[3]
+        yaw=self._cam_yaw,
-      #print 'width = %d height = %d' % (w,h)
+        pitch=self._cam_pitch,
-      # reshape creates np array
+        roll=0,
-      np_img_arr = np.reshape(rgb, (h, w, 4))
+        upAxisIndex=2)
-      # remove alpha channel
+    proj_matrix = self._p.computeProjectionMatrixFOV(
-      np_img_arr = np_img_arr[:, :, :3]
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
-      return np_img_arr
+        nearVal=0.1, farVal=100.0)
    (_, _, px, _, _) = self._p.getCameraImage(
        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
  def _termination(self):
    return self._envStepCounter>1000
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -15,6 +15,9 @@ from . import racecar
 import random
 import pybullet_data
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 class RacecarZEDGymEnv(gym.Env):
  metadata = {
      'render.modes': ['human', 'rgb_array'],
@@ -151,7 +154,26 @@ class RacecarZEDGymEnv(gym.Env):
    return np.array(self._observation), reward, done, {}
  def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
      return np.array([])
    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=base_pos,
        distance=self._cam_dist,
        yaw=self._cam_yaw,
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
    proj_matrix = self._p.computeProjectionMatrixFOV(
        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
        nearVal=0.1, farVal=100.0)
    (_, _, px, _, _) = self._p.getCameraImage(
        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
    rgb_array = np.array(px)
    rgb_array = rgb_array[:, :, :3]
    return rgb_array
  def _termination(self):
    return self._envStepCounter>1000
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
@@ -9,7 +9,7 @@ import time
 def main():
-	environment = KukaGymEnv(renders=True,isDiscrete=False)
+	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
 	motorsIds=[]
@@ -19,7 +19,7 @@ def main():
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
-	dv = 1 
+	dv = 0.01 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
@@ -33,7 +33,7 @@ def main():
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
-	  state, reward, done, info = environment.step(action)
+	  state, reward, done, info = environment.step2(action)
 	  obs = environment.getExtendedObservation()
 if __name__=="__main__":
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
@@ -0,0 +1,38 @@
 #add parent dir to find package. Only needed for source code build, pip install doesn't need it.
 import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
 os.sys.path.insert(0,parentdir)
 from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
 import time
 def main():
 	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
 	motorsIds=[]
 	#motorsIds.append(environment._p.addUserDebugParameter("posX",-1,1,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0))
 	#motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2))
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	dv = 1 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
 	done = False
 	while (not done):
 	  action=[]
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
 	  state, reward, done, info = environment.step(action)
 	  obs = environment.getExtendedObservation()
 if __name__=="__main__":
    main()