From 4798d66f19056e144516cce3d44b32bd68008f91 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 1 Nov 2017 18:06:47 -0700 Subject: [PATCH] fixes in kuka/racecar environment. kuka still doesn't train well, work-in-progress --- .../gym/pybullet_envs/agents/configs.py | 6 +- .../pybullet/gym/pybullet_envs/bullet/kuka.py | 15 +- .../gym/pybullet_envs/bullet/kukaCamGymEnv.py | 27 +++- .../gym/pybullet_envs/bullet/kukaGymEnv.py | 131 +++++++++++++----- .../gym/pybullet_envs/bullet/racecarGymEnv.py | 36 +++-- .../pybullet_envs/bullet/racecarZEDGymEnv.py | 24 +++- .../pybullet_envs/examples/kukaGymEnvTest.py | 6 +- .../pybullet_envs/examples/kukaGymEnvTest2.py | 38 +++++ 8 files changed, 218 insertions(+), 65 deletions(-) create mode 100644 examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py index 1444637c8..edf4cd23d 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/configs.py +++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py @@ -34,7 +34,7 @@ def default(): # General algorithm = ppo.PPOAlgorithm num_agents = 10 - eval_episodes = 25 + eval_episodes = 20 use_gpu = False # Network network = networks.feed_forward_gaussian @@ -47,7 +47,7 @@ def default(): init_mean_factor = 0.05 init_logstd = -1 # Optimization - update_every = 25 + update_every = 20 policy_optimizer = 'AdamOptimizer' value_optimizer = 'AdamOptimizer' update_epochs_policy = 50 @@ -105,7 +105,7 @@ def pybullet_kuka_grasping(): locals().update(default()) # Environment env = 'KukaBulletEnv-v0' - max_length = 10 + max_length = 1000 steps = 1e7 # 10M return locals() diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py index b2b02f2e7..4c745cca1 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py @@ -25,6 +25,7 @@ class Kuka: self.useNullSpace =21 self.useOrientation = 1 self.kukaEndEffectorIndex = 6 + self.kukaGripperIndex = 7 #lower limits for null space self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05] #upper limits for null space @@ -76,7 +77,7 @@ class Kuka: def getObservation(self): observation = [] - state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex) + state = p.getLinkState(self.kukaUid,self.kukaGripperIndex) pos = state[0] orn = state[1] euler = p.getEulerFromQuaternion(orn) @@ -106,13 +107,13 @@ class Kuka: self.endEffectorPos[0] = self.endEffectorPos[0]+dx - if (self.endEffectorPos[0]>0.75): - self.endEffectorPos[0]=0.75 - if (self.endEffectorPos[0]<0.45): - self.endEffectorPos[0]=0.45 + if (self.endEffectorPos[0]>0.65): + self.endEffectorPos[0]=0.65 + if (self.endEffectorPos[0]<0.50): + self.endEffectorPos[0]=0.50 self.endEffectorPos[1] = self.endEffectorPos[1]+dy - if (self.endEffectorPos[1]<-0.22): - self.endEffectorPos[1]=-0.22 + if (self.endEffectorPos[1]<-0.17): + self.endEffectorPos[1]=-0.17 if (self.endEffectorPos[1]>0.22): self.endEffectorPos[1]=0.22 diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py index a1cdc8242..4e7d7e70e 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py @@ -16,6 +16,9 @@ import random import pybullet_data maxSteps = 1000 +RENDER_HEIGHT = 720 +RENDER_WIDTH = 960 + class KukaCamGymEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], @@ -24,9 +27,9 @@ class KukaCamGymEnv(gym.Env): def __init__(self, urdfRoot=pybullet_data.getDataPath(), - actionRepeat=25, + actionRepeat=1, isEnableSelfCollision=True, - renders=True, + renders=False, isDiscrete=False): self._timeStep = 1./240. self._urdfRoot = urdfRoot @@ -163,7 +166,25 @@ class KukaCamGymEnv(gym.Env): return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): - return + if mode != "rgb_array": + return np.array([]) + base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + view_matrix = self._p.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._p.computeProjectionMatrixFOV( + fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT, + nearVal=0.1, farVal=100.0) + (_, _, px, _, _) = self._p.getCameraImage( + width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix, + projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) + rgb_array = np.array(px) + rgb_array = rgb_array[:, :, :3] + return rgb_array def _termination(self): #print (self._kuka.endEffectorPos[2]) diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py index 0dfb62490..d2a47935d 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py @@ -14,8 +14,10 @@ from . import kuka import random import pybullet_data -maxSteps = 1000 +largeValObservation = 100 +RENDER_HEIGHT = 720 +RENDER_WIDTH = 960 class KukaGymEnv(gym.Env): metadata = { @@ -27,8 +29,10 @@ class KukaGymEnv(gym.Env): urdfRoot=pybullet_data.getDataPath(), actionRepeat=1, isEnableSelfCollision=True, - renders=True, - isDiscrete=False): + renders=False, + isDiscrete=False, + maxSteps = 1000): + #print("KukaGymEnv __init__") self._isDiscrete = isDiscrete self._timeStep = 1./240. self._urdfRoot = urdfRoot @@ -37,7 +41,12 @@ class KukaGymEnv(gym.Env): self._observation = [] self._envStepCounter = 0 self._renders = renders + self._maxSteps = maxSteps self.terminated = 0 + self._cam_dist = 1.3 + self._cam_yaw = 180 + self._cam_pitch = -40 + self._p = p if self._renders: cid = p.connect(p.SHARED_MEMORY) @@ -53,7 +62,7 @@ class KukaGymEnv(gym.Env): #print("observationDim") #print(observationDim) - observation_high = np.array([np.finfo(np.float32).max] * observationDim) + observation_high = np.array([largeValObservation] * observationDim) if (self._isDiscrete): self.action_space = spaces.Discrete(7) else: @@ -65,6 +74,7 @@ class KukaGymEnv(gym.Env): self.viewer = None def _reset(self): + #print("KukaGymEnv _reset") self.terminated = 0 p.resetSimulation() p.setPhysicsEngineParameter(numSolverIterations=150) @@ -73,11 +83,11 @@ class KukaGymEnv(gym.Env): p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - xpos = 0.5 +0.2*random.random() - ypos = 0 +0.25*random.random() - ang = 3.1415925438*random.random() + xpos = 0.55 +0.12*random.random() + ypos = 0 +0.2*random.random() + ang = 3.14*0.5+3.1415925438*random.random() orn = p.getQuaternionFromEuler([0,0,ang]) - self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3]) + self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3]) p.setGravity(0,0,-10) self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) @@ -95,57 +105,104 @@ class KukaGymEnv(gym.Env): def getExtendedObservation(self): self._observation = self._kuka.getObservation() - eeState = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) - endEffectorPos = eeState[0] - endEffectorOrn = eeState[1] + gripperState = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex) + gripperPos = gripperState[0] + gripperOrn = gripperState[1] blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid) - invEEPos,invEEOrn = p.invertTransform(endEffectorPos,endEffectorOrn) - blockPosInEE,blockOrnInEE = p.multiplyTransforms(invEEPos,invEEOrn,blockPos,blockOrn) - blockEulerInEE = p.getEulerFromQuaternion(blockOrnInEE) - self._observation.extend(list(blockPosInEE)) - self._observation.extend(list(blockEulerInEE)) - + invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn) + gripperMat = p.getMatrixFromQuaternion(gripperOrn) + dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]] + dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]] + dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]] + + gripperEul = p.getEulerFromQuaternion(gripperOrn) + #print("gripperEul") + #print(gripperEul) + blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn) + projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]] + blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper) + #print("projectedBlockPos2D") + #print(projectedBlockPos2D) + #print("blockEulerInGripper") + #print(blockEulerInGripper) + + #we return the relative x,y position and euler angle of block in gripper space + blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]] + + #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1) + #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1) + #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1) + + self._observation.extend(list(blockInGripperPosXYEulZ)) return self._observation def _step(self, action): if (self._isDiscrete): - dv = 0.01 + dv = 0.005 dx = [0,-dv,dv,0,0,0,0][action] dy = [0,0,0,-dv,dv,0,0][action] - da = [0,0,0,0,0,-0.1,0.1][action] + da = [0,0,0,0,0,-0.05,0.05][action] f = 0.3 realAction = [dx,dy,-0.002,da,f] else: - dv = 0.01 + #print("action[0]=", str(action[0])) + dv = 0.005 dx = action[0] * dv dy = action[1] * dv - da = action[2] * 0.1 + da = action[2] * 0.05 f = 0.3 realAction = [dx,dy,-0.002,da,f] return self.step2( realAction) def step2(self, action): - self._kuka.applyAction(action) for i in range(self._actionRepeat): + self._kuka.applyAction(action) p.stepSimulation() - if self._renders: - time.sleep(self._timeStep) - self._observation = self.getExtendedObservation() if self._termination(): break self._envStepCounter += 1 + if self._renders: + time.sleep(self._timeStep) + self._observation = self.getExtendedObservation() + #print("self._envStepCounter") #print(self._envStepCounter) done = self._termination() - reward = self._reward() + npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]]) + actionCost = np.linalg.norm(npaction)*10. + #print("actionCost") + #print(actionCost) + reward = self._reward()-actionCost + #print("reward") + #print(reward) + #print("len=%r" % len(self._observation)) return np.array(self._observation), reward, done, {} - def _render(self, mode='human', close=False): - return + def _render(self, mode="rgb_array", close=False): + if mode != "rgb_array": + return np.array([]) + base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid) + view_matrix = self._p.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._p.computeProjectionMatrixFOV( + fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT, + nearVal=0.1, farVal=100.0) + (_, _, px, _, _) = self._p.getCameraImage( + width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix, + projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL) + rgb_array = np.array(px) + rgb_array = rgb_array[:, :, :3] + return rgb_array + def _termination(self): #print (self._kuka.endEffectorPos[2]) @@ -154,7 +211,7 @@ class KukaGymEnv(gym.Env): #print("self._envStepCounter") #print(self._envStepCounter) - if (self.terminated or self._envStepCounter>maxSteps): + if (self.terminated or self._envStepCounter>self._maxSteps): self._observation = self.getExtendedObservation() return True maxDist = 0.005 @@ -163,7 +220,7 @@ class KukaGymEnv(gym.Env): if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43): self.terminated = 1 - #print("closing gripper, attempting grasp") + #print("terminating, closing gripper, attempting grasp") #start grasp and terminate fingerAngle = 0.3 for i in range (100): @@ -199,18 +256,22 @@ class KukaGymEnv(gym.Env): blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) - reward = -1000 + reward = -1000 + numPt = len(closestPoints) #print(numPt) if (numPt>0): #print("reward:") reward = -closestPoints[0][8]*10 if (blockPos[2] >0.2): - #print("grasped a block!!!") + reward = reward+10000 + print("successfully grasped a block!!!") #print("self._envStepCounter") #print(self._envStepCounter) - reward = reward+1000 - - #print("reward") + #print("self._envStepCounter") + #print(self._envStepCounter) + #print("reward") + #print(reward) + #print("reward") #print(reward) return reward diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py index bea79bd77..7537d54c1 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py @@ -15,6 +15,9 @@ import random from . import bullet_client import pybullet_data +RENDER_HEIGHT = 720 +RENDER_WIDTH = 960 + class RacecarGymEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], @@ -136,20 +139,27 @@ class RacecarGymEnv(gym.Env): return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): - width=320 - height=200 - img_arr = self._p.getCameraImage(width,height) - w=img_arr[0] - h=img_arr[1] - rgb=img_arr[2] - dep=img_arr[3] - #print 'width = %d height = %d' % (w,h) - # reshape creates np array - np_img_arr = np.reshape(rgb, (h, w, 4)) - # remove alpha channel - np_img_arr = np_img_arr[:, :, :3] - return np_img_arr + if mode != "rgb_array": + return np.array([]) + base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + view_matrix = self._p.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._p.computeProjectionMatrixFOV( + fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT, + nearVal=0.1, farVal=100.0) + (_, _, px, _, _) = self._p.getCameraImage( + width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix, + projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) + rgb_array = np.array(px) + rgb_array = rgb_array[:, :, :3] + return rgb_array + def _termination(self): return self._envStepCounter>1000 diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py index 507f6cb1a..d9546de8c 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py @@ -15,6 +15,9 @@ from . import racecar import random import pybullet_data +RENDER_HEIGHT = 720 +RENDER_WIDTH = 960 + class RacecarZEDGymEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], @@ -151,7 +154,26 @@ class RacecarZEDGymEnv(gym.Env): return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): - return + if mode != "rgb_array": + return np.array([]) + base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + view_matrix = self._p.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._p.computeProjectionMatrixFOV( + fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT, + nearVal=0.1, farVal=100.0) + (_, _, px, _, _) = self._p.getCameraImage( + width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix, + projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) + rgb_array = np.array(px) + rgb_array = rgb_array[:, :, :3] + return rgb_array + def _termination(self): return self._envStepCounter>1000 diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py index 5649063b2..3917f08cf 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py +++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py @@ -9,7 +9,7 @@ import time def main(): - environment = KukaGymEnv(renders=True,isDiscrete=False) + environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000) motorsIds=[] @@ -19,7 +19,7 @@ def main(): #motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0)) #motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) - dv = 1 + dv = 0.01 motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0)) @@ -33,7 +33,7 @@ def main(): for motorId in motorsIds: action.append(environment._p.readUserDebugParameter(motorId)) - state, reward, done, info = environment.step(action) + state, reward, done, info = environment.step2(action) obs = environment.getExtendedObservation() if __name__=="__main__": diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py new file mode 100644 index 000000000..5c7f245c3 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py @@ -0,0 +1,38 @@ +#add parent dir to find package. Only needed for source code build, pip install doesn't need it. +import os, inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0,parentdir) + +from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv +import time + +def main(): + + environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000) + + + motorsIds=[] + #motorsIds.append(environment._p.addUserDebugParameter("posX",-1,1,0)) + #motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0)) + #motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2)) + #motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0)) + #motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) + + dv = 1 + motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0)) + motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0)) + motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0)) + + done = False + while (not done): + + action=[] + for motorId in motorsIds: + action.append(environment._p.readUserDebugParameter(motorId)) + + state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + +if __name__=="__main__": + main()