diff --git a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py index e28a9e962..277e52ba2 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py @@ -64,10 +64,10 @@ class CartPoleBulletEnv(gym.Env): # time.sleep(self.timeStep) self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] theta, theta_dot, x, x_dot = self.state - + dv = 0.1 deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action] - + p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(deltav + self.state[3])) done = x < -self.x_threshold \ @@ -99,3 +99,8 @@ class CartPoleBulletEnv(gym.Env): def _render(self, mode='human', close=False): return + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py index 4e7d7e70e..8ed171be0 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py @@ -56,8 +56,8 @@ class KukaCamGymEnv(gym.Env): observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) - - observation_high = np.array([np.finfo(np.float32).max] * observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) if (self._isDiscrete): self.action_space = spaces.Discrete(7) else: @@ -74,15 +74,15 @@ class KukaCamGymEnv(gym.Env): p.setPhysicsEngineParameter(numSolverIterations=150) p.setTimeStep(self._timeStep) p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1]) - + p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - + xpos = 0.5 +0.2*random.random() ypos = 0 +0.25*random.random() ang = 3.1415925438*random.random() orn = p.getQuaternionFromEuler([0,0,ang]) self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3]) - + p.setGravity(0,0,-10) self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 @@ -98,7 +98,7 @@ class KukaCamGymEnv(gym.Env): return [seed] def getExtendedObservation(self): - + #camEyePos = [0.03,0.236,0.54] #distance = 1.06 #pitch=-56 @@ -118,13 +118,13 @@ class KukaCamGymEnv(gym.Env): viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0] #projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0] projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0] - + img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix) rgb=img_arr[2] np_img_arr = np.reshape(rgb, (self._height, self._width, 4)) self._observation = np_img_arr return self._observation - + def _step(self, action): if (self._isDiscrete): dv = 0.01 @@ -142,7 +142,7 @@ class KukaCamGymEnv(gym.Env): realAction = [dx,dy,-0.002,da,f] return self.step2( realAction) - + def step2(self, action): for i in range(self._actionRepeat): self._kuka.applyAction(action) @@ -158,11 +158,11 @@ class KukaCamGymEnv(gym.Env): #print("self._envStepCounter") #print(self._envStepCounter) - + done = self._termination() reward = self._reward() #print("len=%r" % len(self._observation)) - + return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): @@ -190,18 +190,18 @@ class KukaCamGymEnv(gym.Env): #print (self._kuka.endEffectorPos[2]) state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) actualEndEffectorPos = state[0] - + #print("self._envStepCounter") #print(self._envStepCounter) if (self.terminated or self._envStepCounter>maxSteps): self._observation = self.getExtendedObservation() return True - maxDist = 0.005 + maxDist = 0.005 closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist) - + if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43): self.terminated = 1 - + #print("closing gripper, attempting grasp") #start grasp and terminate fingerAngle = 0.3 @@ -212,7 +212,7 @@ class KukaCamGymEnv(gym.Env): fingerAngle = fingerAngle-(0.3/100.) if (fingerAngle<0): fingerAngle=0 - + for i in range (1000): graspAction = [0,0,0.001,0,fingerAngle] self._kuka.applyAction(graspAction) @@ -227,18 +227,18 @@ class KukaCamGymEnv(gym.Env): if (actualEndEffectorPos[2]>0.5): break - + self._observation = self.getExtendedObservation() return True return False - + def _reward(self): - + #rewards is height of target object blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) - closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) + closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) - reward = -1000 + reward = -1000 numPt = len(closestPoints) #print(numPt) if (numPt>0): @@ -254,3 +254,7 @@ class KukaCamGymEnv(gym.Env): #print(reward) return reward + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py index b9ef16017..6fa57e76a 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py @@ -44,7 +44,7 @@ class KukaGymEnv(gym.Env): self._maxSteps = maxSteps self.terminated = 0 self._cam_dist = 1.3 - self._cam_yaw = 180 + self._cam_yaw = 180 self._cam_pitch = -40 self._p = p @@ -61,8 +61,8 @@ class KukaGymEnv(gym.Env): observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) - - observation_high = np.array([largeValObservation] * observationDim) + + observation_high = np.array([largeValObservation] * observationDim) if (self._isDiscrete): self.action_space = spaces.Discrete(7) else: @@ -80,15 +80,15 @@ class KukaGymEnv(gym.Env): p.setPhysicsEngineParameter(numSolverIterations=150) p.setTimeStep(self._timeStep) p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1]) - + p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - + xpos = 0.55 +0.12*random.random() ypos = 0 +0.2*random.random() ang = 3.14*0.5+3.1415925438*random.random() orn = p.getQuaternionFromEuler([0,0,ang]) self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3]) - + p.setGravity(0,0,-10) self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 @@ -115,7 +115,7 @@ class KukaGymEnv(gym.Env): dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]] dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]] dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]] - + gripperEul = p.getEulerFromQuaternion(gripperOrn) #print("gripperEul") #print(gripperEul) @@ -126,17 +126,17 @@ class KukaGymEnv(gym.Env): #print(projectedBlockPos2D) #print("blockEulerInGripper") #print(blockEulerInGripper) - + #we return the relative x,y position and euler angle of block in gripper space blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]] - + #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1) #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1) #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1) - + self._observation.extend(list(blockInGripperPosXYEulZ)) return self._observation - + def _step(self, action): if (self._isDiscrete): dv = 0.005 @@ -154,7 +154,7 @@ class KukaGymEnv(gym.Env): f = 0.3 realAction = [dx,dy,-0.002,da,f] return self.step2( realAction) - + def step2(self, action): for i in range(self._actionRepeat): self._kuka.applyAction(action) @@ -168,7 +168,7 @@ class KukaGymEnv(gym.Env): #print("self._envStepCounter") #print(self._envStepCounter) - + done = self._termination() npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]]) actionCost = np.linalg.norm(npaction)*10. @@ -177,9 +177,9 @@ class KukaGymEnv(gym.Env): reward = self._reward()-actionCost #print("reward") #print(reward) - + #print("len=%r" % len(self._observation)) - + return np.array(self._observation), reward, done, {} def _render(self, mode="rgb_array", close=False): @@ -208,18 +208,18 @@ class KukaGymEnv(gym.Env): #print (self._kuka.endEffectorPos[2]) state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) actualEndEffectorPos = state[0] - + #print("self._envStepCounter") #print(self._envStepCounter) if (self.terminated or self._envStepCounter>self._maxSteps): self._observation = self.getExtendedObservation() return True - maxDist = 0.005 + maxDist = 0.005 closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist) - + if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43): self.terminated = 1 - + #print("terminating, closing gripper, attempting grasp") #start grasp and terminate fingerAngle = 0.3 @@ -230,7 +230,7 @@ class KukaGymEnv(gym.Env): fingerAngle = fingerAngle-(0.3/100.) if (fingerAngle<0): fingerAngle=0 - + for i in range (1000): graspAction = [0,0,0.001,0,fingerAngle] self._kuka.applyAction(graspAction) @@ -245,19 +245,19 @@ class KukaGymEnv(gym.Env): if (actualEndEffectorPos[2]>0.5): break - + self._observation = self.getExtendedObservation() return True return False - + def _reward(self): - + #rewards is height of target object blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) - closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) + closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) reward = -1000 - + numPt = len(closestPoints) #print(numPt) if (numPt>0): @@ -276,10 +276,7 @@ class KukaGymEnv(gym.Env): #print(reward) return reward - def reset(self): - """Resets the state of the environment and returns an initial observation. - - Returns: observation (object): the initial observation of the - space. - """ - return self._reset() \ No newline at end of file + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py index 44b41dd7d..b5c3375b4 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py @@ -385,3 +385,8 @@ class MinitaurBulletDuckEnv(gym.Env): scale=self._observation_noise_stdev, size=observation.shape) * self.minitaur.GetObservationUpperBound()) return observation + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py index 38ec631a5..158128067 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py @@ -376,3 +376,8 @@ class MinitaurBulletEnv(gym.Env): scale=self._observation_noise_stdev, size=observation.shape) * self.minitaur.GetObservationUpperBound()) return observation + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py index 7537d54c1..bdde5d963 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py @@ -50,8 +50,8 @@ class RacecarGymEnv(gym.Env): #self.reset() observationDim = 2 #len(self.getExtendedObservation()) #print("observationDim") - #print(observationDim) - # observation_high = np.array([np.finfo(np.float32).max] * observationDim) + #print(observationDim) + # observation_high = np.array([np.finfo(np.float32).max] * observationDim) observation_high = np.ones(observationDim) * 1000 #np.inf if (isDiscrete): self.action_space = spaces.Discrete(9) @@ -59,7 +59,7 @@ class RacecarGymEnv(gym.Env): action_dim = 2 self._action_bound = 1 action_high = np.array([self._action_bound] * action_dim) - self.action_space = spaces.Box(-action_high, action_high) + self.action_space = spaces.Box(-action_high, action_high) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None @@ -74,14 +74,14 @@ class RacecarGymEnv(gym.Env): # pos,orn = self._p.getBasePositionAndOrientation(i) # newpos = [pos[0],pos[1],pos[2]-0.1] # self._p.resetBasePositionAndOrientation(i,newpos,orn) - + dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() - + ballx = dist * math.sin(ang) bally = dist * math.cos(ang) ballz = 1 - + self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2.urdf"),[ballx,bally,ballz]) self._p.setGravity(0,0,-10) self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep) @@ -101,18 +101,18 @@ class RacecarGymEnv(gym.Env): def getExtendedObservation(self): self._observation = [] #self._racecar.getObservation() carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) - ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId) + ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId) invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn) ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn) - + self._observation.extend([ballPosInCar[0],ballPosInCar[1]]) return self._observation - + def _step(self, action): if (self._renders): basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos) - + if (self._isDiscrete): fwd = [-1,-1,-1,0,0,0,1,1,1] steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6] @@ -128,14 +128,14 @@ class RacecarGymEnv(gym.Env): if self._renders: time.sleep(self._timeStep) self._observation = self.getExtendedObservation() - + if self._termination(): break self._envStepCounter += 1 reward = self._reward() done = self._termination() #print("len=%r" % len(self._observation)) - + return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): @@ -159,13 +159,13 @@ class RacecarGymEnv(gym.Env): rgb_array = rgb_array[:, :, :3] return rgb_array - + def _termination(self): return self._envStepCounter>1000 - + def _reward(self): - closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) - + closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) + numPt = len(closestPoints) reward=-1000 #print(numPt) @@ -174,3 +174,8 @@ class RacecarGymEnv(gym.Env): reward = -closestPoints[0][8] #print(reward) return reward + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py index d9546de8c..277453e14 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py @@ -40,7 +40,7 @@ class RacecarZEDGymEnv(gym.Env): self._renders = renders self._width = 100 self._height = 10 - + self._isDiscrete = isDiscrete if self._renders: self._p = bullet_client.BulletClient( @@ -53,8 +53,8 @@ class RacecarZEDGymEnv(gym.Env): observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) - - observation_high = np.array([np.finfo(np.float32).max] * observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) if (isDiscrete): self.action_space = spaces.Discrete(9) else: @@ -77,14 +77,14 @@ class RacecarZEDGymEnv(gym.Env): pos,orn = self._p.getBasePositionAndOrientation(i) newpos = [pos[0],pos[1],pos[2]+0.1] self._p.resetBasePositionAndOrientation(i,newpos,orn) - + dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() - + ballx = dist * math.sin(ang) bally = dist * math.cos(ang) ballz = 1 - + self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2red.urdf"),[ballx,bally,ballz]) self._p.setGravity(0,0,-10) self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep) @@ -104,13 +104,13 @@ class RacecarZEDGymEnv(gym.Env): def getExtendedObservation(self): carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) carmat = self._p.getMatrixFromQuaternion(carorn) - ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId) + ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId) invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn) ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn) dist0 = 0.3 dist1 = 1. eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3] - targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3] + targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3] up = [carmat[2],carmat[5],carmat[8]] viewMat = self._p.computeViewMatrix(eyePos,targetPos,up) #viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2) @@ -122,12 +122,12 @@ class RacecarZEDGymEnv(gym.Env): np_img_arr = np.reshape(rgb, (self._height, self._width, 4)) self._observation = np_img_arr return self._observation - + def _step(self, action): if (self._renders): basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) #self._p.resetDebugVisualizerCamera(1, 30, -40, basePos) - + if (self._isDiscrete): fwd = [-1,-1,-1,0,0,0,1,1,1] steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6] @@ -143,14 +143,14 @@ class RacecarZEDGymEnv(gym.Env): if self._renders: time.sleep(self._timeStep) self._observation = self.getExtendedObservation() - + if self._termination(): break self._envStepCounter += 1 reward = self._reward() done = self._termination() #print("len=%r" % len(self._observation)) - + return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): @@ -177,10 +177,10 @@ class RacecarZEDGymEnv(gym.Env): def _termination(self): return self._envStepCounter>1000 - + def _reward(self): - closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) - + closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) + numPt = len(closestPoints) reward=-1000 #print(numPt) @@ -189,3 +189,8 @@ class RacecarZEDGymEnv(gym.Env): reward = -closestPoints[0][8] #print(reward) return reward + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/bullet/simpleHumanoidGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/simpleHumanoidGymEnv.py index c632e7f31..53b19b7a8 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/simpleHumanoidGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/simpleHumanoidGymEnv.py @@ -46,8 +46,8 @@ class SimpleHumanoidGymEnv(gym.Env): observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) - - observation_high = np.array([np.finfo(np.float32).max] * observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) self.action_space = spaces.Discrete(9) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None @@ -57,14 +57,14 @@ class SimpleHumanoidGymEnv(gym.Env): #p.setPhysicsEngineParameter(numSolverIterations=300) p.setTimeStep(self._timeStep) p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf")) - + dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() - + ballx = dist * math.sin(ang) bally = dist * math.cos(ang) ballz = 1 - + p.setGravity(0,0,-10) self._humanoid = simpleHumanoid.SimpleHumanoid(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 @@ -82,7 +82,7 @@ class SimpleHumanoidGymEnv(gym.Env): def getExtendedObservation(self): self._observation = self._humanoid.getObservation() return self._observation - + def _step(self, action): self._humanoid.applyAction(action) for i in range(self._actionRepeat): @@ -96,7 +96,7 @@ class SimpleHumanoidGymEnv(gym.Env): reward = self._reward() done = self._termination() #print("len=%r" % len(self._observation)) - + return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): @@ -104,8 +104,13 @@ class SimpleHumanoidGymEnv(gym.Env): def _termination(self): return self._envStepCounter>1000 - + def _reward(self): reward=self._humanoid.distance print(reward) return reward + + render = _render + reset = _reset + seed = _seed + step = _step diff --git a/examples/pybullet/gym/pybullet_envs/env_bases.py b/examples/pybullet/gym/pybullet_envs/env_bases.py index 5d49c22c3..ceafb6404 100644 --- a/examples/pybullet/gym/pybullet_envs/env_bases.py +++ b/examples/pybullet/gym/pybullet_envs/env_bases.py @@ -43,7 +43,7 @@ class MJCFBaseBulletEnv(gym.Env): conInfo = p.getConnectionInfo() if (conInfo['isConnected']): self.ownsPhysicsClient = False - + self.physicsClientId = 0 else: self.ownsPhysicsClient = True @@ -75,12 +75,12 @@ class MJCFBaseBulletEnv(gym.Env): self.isRender = True if mode != "rgb_array": return np.array([]) - + base_pos=[0,0,0] if (hasattr(self,'robot')): if (hasattr(self.robot,'body_xyz')): base_pos = self.robot.body_xyz - + view_matrix = p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=base_pos, distance=self._cam_dist, @@ -100,6 +100,7 @@ class MJCFBaseBulletEnv(gym.Env): rgb_array = rgb_array[:, :, :3] return rgb_array + def _close(self): if (self.ownsPhysicsClient): if (self.physicsClientId>=0): @@ -109,6 +110,17 @@ class MJCFBaseBulletEnv(gym.Env): def HUD(self, state, a, done): pass + # backwards compatibility for gym >= v0.9.x + # for extension of this class. + def step(self, *args, **kwargs): + return self._step(*args, **kwargs) + + close = _close + render = _render + reset = _reset + seed = _seed + + class Camera: def __init__(self): pass