From cc34ebab253ae70731f7d1077ca5291d98b5f201 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 14 Jun 2017 00:54:41 -0700 Subject: [PATCH] add humanoid and kuka gym environments (experimental) --- examples/pybullet/gym/envs/__init__.py | 14 ++ examples/pybullet/gym/envs/bullet/__init__.py | 4 + examples/pybullet/gym/envs/bullet/humanoid.py | 124 ++++++++++++++++++ .../gym/envs/bullet/humanoidGymEnv.py | 102 ++++++++++++++ examples/pybullet/gym/envs/bullet/kuka.py | 58 ++++++++ .../pybullet/gym/envs/bullet/kukaGymEnv.py | 103 +++++++++++++++ examples/pybullet/gym/humanoidGymEnvTest.py | 21 +++ examples/pybullet/gym/kukaGymEnvTest.py | 21 +++ examples/pybullet/gym/racecarZEDGymEnvTest.py | 30 +++++ 9 files changed, 477 insertions(+) create mode 100644 examples/pybullet/gym/envs/bullet/humanoid.py create mode 100644 examples/pybullet/gym/envs/bullet/humanoidGymEnv.py create mode 100644 examples/pybullet/gym/envs/bullet/kuka.py create mode 100644 examples/pybullet/gym/envs/bullet/kukaGymEnv.py create mode 100644 examples/pybullet/gym/humanoidGymEnvTest.py create mode 100644 examples/pybullet/gym/kukaGymEnvTest.py create mode 100644 examples/pybullet/gym/racecarZEDGymEnvTest.py diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py index 495c30e39..d8b096113 100644 --- a/examples/pybullet/gym/envs/__init__.py +++ b/examples/pybullet/gym/envs/__init__.py @@ -29,3 +29,17 @@ register( timestep_limit=1000, reward_threshold=5.0, ) + +register( + id='HumanoidBulletEnv-v0', + entry_point='envs.bullet:HumanoidGymEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + +register( + id='KukaBulletEnv-v0', + entry_point='envs.bullet:KukaGymEnv', + timestep_limit=1000, + reward_threshold=5.0, +) \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/__init__.py b/examples/pybullet/gym/envs/bullet/__init__.py index 25da62f87..708ae4e53 100644 --- a/examples/pybullet/gym/envs/bullet/__init__.py +++ b/examples/pybullet/gym/envs/bullet/__init__.py @@ -1,2 +1,6 @@ from envs.bullet.cartpole_bullet import CartPoleBulletEnv from envs.bullet.minitaur_bullet import MinitaurBulletEnv +from envs.bullet.racecarGymEnv import RacecarGymEnv +from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv +from envs.bullet.humanoidGymEnv import HumanoidGymEnv +from envs.bullet.kukaGymEnv import KukaGymEnv diff --git a/examples/pybullet/gym/envs/bullet/humanoid.py b/examples/pybullet/gym/envs/bullet/humanoid.py new file mode 100644 index 000000000..a883139b1 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/humanoid.py @@ -0,0 +1,124 @@ +import pybullet as p +import numpy as np +import copy +import math +import time + + + +class Humanoid: + + def __init__(self, urdfRootPath='', timeStep=0.01): + self.urdfRootPath = urdfRootPath + self.timeStep = timeStep + self.reset() + + + + def reset(self): + self.initial_z = None + + objs = p.loadMJCF("mjcf/humanoid_symmetric_no_ground.xml",flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS) + self.human = objs[0] + self.jdict = {} + self.ordered_joints = [] + self.ordered_joint_indices = [] + + for j in range( p.getNumJoints(self.human) ): + info = p.getJointInfo(self.human, j) + link_name = info[12].decode("ascii") + if link_name=="left_foot": self.left_foot = j + if link_name=="right_foot": self.right_foot = j + self.ordered_joint_indices.append(j) + if info[2] != p.JOINT_REVOLUTE: continue + jname = info[1].decode("ascii") + self.jdict[jname] = j + lower, upper = (info[8], info[9]) + self.ordered_joints.append( (j, lower, upper) ) + p.setJointMotorControl2(self.human, j, controlMode=p.VELOCITY_CONTROL, force=0) + + self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"] + self.motor_power = [100, 100, 100] + self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"] + self.motor_power += [100, 100, 300, 200] + self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"] + self.motor_power += [100, 100, 300, 200] + self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"] + self.motor_power += [75, 75, 75] + self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"] + self.motor_power += [75, 75, 75] + self.motors = [self.jdict[n] for n in self.motor_names] + print("self.motors") + print(self.motors) + print("num motors") + print(len(self.motors)) + + + + def current_relative_position(self, jointStates, human, j, lower, upper): + #print("j") + #print(j) + #print (len(jointStates)) + #print(j) + temp = jointStates[j] + pos = temp[0] + vel = temp[1] + #print("pos") + #print(pos) + #print("vel") + #print(vel) + pos_mid = 0.5 * (lower + upper); + return ( + 2 * (pos - pos_mid) / (upper - lower), + 0.1 * vel + ) + + def collect_observations(self, human): + #print("ordered_joint_indices") + #print(ordered_joint_indices) + + + jointStates = p.getJointStates(human,self.ordered_joint_indices) + j = np.array([self.current_relative_position(jointStates, human, *jtuple) for jtuple in self.ordered_joints]).flatten() + #print("j") + #print(j) + body_xyz, (qx, qy, qz, qw) = p.getBasePositionAndOrientation(human) + #print("body_xyz") + #print(body_xyz, qx,qy,qz,qw) + z = body_xyz[2] + self.distance = body_xyz[0] + if self.initial_z==None: + self.initial_z = z + (vx, vy, vz), _ = p.getBaseVelocity(human) + more = np.array([z-self.initial_z, 0.1*vx, 0.1*vy, 0.1*vz, qx, qy, qz, qw]) + rcont = p.getContactPoints(human, -1, self.right_foot, -1) + #print("rcont") + #print(rcont) + lcont = p.getContactPoints(human, -1, self.left_foot, -1) + #print("lcont") + #print(lcont) + feet_contact = np.array([len(rcont)>0, len(lcont)>0]) + return np.clip( np.concatenate([more] + [j] + [feet_contact]), -5, +5) + + def getActionDimension(self): + return len(self.motors) + + def getObservationDimension(self): + return len(self.getObservation()) + + def getObservation(self): + observation = self.collect_observations(self.human) + return observation + + def applyAction(self, actions): + forces = [0.] * len(self.motors) + for m in range(len(self.motors)): + forces[m] = self.motor_power[m]*actions[m]*0.082 + p.setJointMotorControlArray(self.human, self.motors,controlMode=p.TORQUE_CONTROL, forces=forces) + + p.stepSimulation() + time.sleep(0.01) + distance=5 + yaw = 0 + #humanPos, humanOrn = p.getBasePositionAndOrientation(self.human) + #p.resetDebugVisualizerCamera(distance,yaw,-20,humanPos); diff --git a/examples/pybullet/gym/envs/bullet/humanoidGymEnv.py b/examples/pybullet/gym/envs/bullet/humanoidGymEnv.py new file mode 100644 index 000000000..f46a05ce0 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/humanoidGymEnv.py @@ -0,0 +1,102 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import pybullet as p +from . import humanoid +import random + +class HumanoidGymEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self, + urdfRoot="", + actionRepeat=50, + isEnableSelfCollision=True, + renders=True): + print("init") + self._timeStep = 0.01 + self._urdfRoot = urdfRoot + self._actionRepeat = actionRepeat + self._isEnableSelfCollision = isEnableSelfCollision + self._observation = [] + self._envStepCounter = 0 + self._renders = renders + self._p = p + if self._renders: + p.connect(p.GUI) + else: + p.connect(p.DIRECT) + self._seed() + self.reset() + observationDim = len(self.getExtendedObservation()) + #print("observationDim") + #print(observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + self.action_space = spaces.Discrete(9) + self.observation_space = spaces.Box(-observation_high, observation_high) + self.viewer = None + + def _reset(self): + p.resetSimulation() + #p.setPhysicsEngineParameter(numSolverIterations=300) + p.setTimeStep(self._timeStep) + p.loadURDF("%splane.urdf" % self._urdfRoot) + + dist = 5 +2.*random.random() + ang = 2.*3.1415925438*random.random() + + ballx = dist * math.sin(ang) + bally = dist * math.cos(ang) + ballz = 1 + + p.setGravity(0,0,-10) + self._humanoid = humanoid.Humanoid(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) + self._envStepCounter = 0 + p.stepSimulation() + self._observation = self.getExtendedObservation() + return np.array(self._observation) + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def getExtendedObservation(self): + self._observation = self._humanoid.getObservation() + return self._observation + + def _step(self, action): + self._humanoid.applyAction(action) + for i in range(self._actionRepeat): + p.stepSimulation() + if self._renders: + time.sleep(self._timeStep) + self._observation = self.getExtendedObservation() + if self._termination(): + break + self._envStepCounter += 1 + reward = self._reward() + done = self._termination() + #print("len=%r" % len(self._observation)) + + return np.array(self._observation), reward, done, {} + + def _render(self, mode='human', close=False): + return + + def _termination(self): + return self._envStepCounter>1000 + + def _reward(self): + reward=self._humanoid.distance + print(reward) + return reward \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/kuka.py b/examples/pybullet/gym/envs/bullet/kuka.py new file mode 100644 index 000000000..3f74d87ff --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/kuka.py @@ -0,0 +1,58 @@ +import pybullet as p +import numpy as np +import copy +import math + +class Kuka: + + def __init__(self, urdfRootPath='', timeStep=0.01): + self.urdfRootPath = urdfRootPath + self.timeStep = timeStep + self.reset() + self.maxForce = 100 + + def reset(self): + + objects = p.loadSDF("kuka_iiwa/kuka_with_gripper.sdf") + self.kukaUid = objects[0] + p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000]) + self.jointPositions=[ -0.196884, 0.857586, -0.023543, -1.664977, 0.030403, 0.624786, -0.232294, 0.000000, -0.296450, 0.000000, 0.100002, 0.284399, 0.000000, -0.099999 ] + for jointIndex in range (p.getNumJoints(self.kukaUid)): + p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex]) + + self.trayUid = p.loadURDF("tray/tray.urdf", 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000) + self.blockUid =p.loadURDF("block.urdf", 0.604746,0.080626,-0.180069,0.000050,-0.000859,-0.824149,0.566372) + + self.motorNames = [] + self.motorIndices = [] + numJoints = p.getNumJoints(self.kukaUid) + for i in range (numJoints): + jointInfo = p.getJointInfo(self.kukaUid,i) + qIndex = jointInfo[3] + if qIndex > -1: + print("motorname") + print(jointInfo[1]) + self.motorNames.append(str(jointInfo[1])) + self.motorIndices.append(i) + + def getActionDimension(self): + return len(self.motorIndices) + + def getObservationDimension(self): + return len(self.getObservation()) + + def getObservation(self): + observation = [] + pos,orn=p.getBasePositionAndOrientation(self.blockUid) + + observation.extend(list(pos)) + observation.extend(list(orn)) + + return observation + + def applyAction(self, motorCommands): + + for action in range (len(motorCommands)): + motor = self.motorIndices[action] + p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce) + \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/envs/bullet/kukaGymEnv.py new file mode 100644 index 000000000..ee0479c6c --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/kukaGymEnv.py @@ -0,0 +1,103 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import pybullet as p +from . import kuka +import random + +class KukaGymEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self, + urdfRoot="", + actionRepeat=50, + isEnableSelfCollision=True, + renders=True): + print("init") + self._timeStep = 0.01 + self._urdfRoot = urdfRoot + self._actionRepeat = actionRepeat + self._isEnableSelfCollision = isEnableSelfCollision + self._observation = [] + self._envStepCounter = 0 + self._renders = renders + self._p = p + if self._renders: + p.connect(p.GUI) + else: + p.connect(p.DIRECT) + self._seed() + self.reset() + observationDim = len(self.getExtendedObservation()) + #print("observationDim") + #print(observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + self.action_space = spaces.Discrete(9) + self.observation_space = spaces.Box(-observation_high, observation_high) + self.viewer = None + + def _reset(self): + p.resetSimulation() + #p.setPhysicsEngineParameter(numSolverIterations=300) + p.setTimeStep(self._timeStep) + p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1]) + + dist = 5 +2.*random.random() + ang = 2.*3.1415925438*random.random() + + ballx = dist * math.sin(ang) + bally = dist * math.cos(ang) + ballz = 1 + + p.setGravity(0,0,-10) + self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) + self._envStepCounter = 0 + p.stepSimulation() + self._observation = self.getExtendedObservation() + return np.array(self._observation) + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def getExtendedObservation(self): + self._observation = self._kuka.getObservation() + return self._observation + + def _step(self, action): + self._kuka.applyAction(action) + for i in range(self._actionRepeat): + p.stepSimulation() + if self._renders: + time.sleep(self._timeStep) + self._observation = self.getExtendedObservation() + if self._termination(): + break + self._envStepCounter += 1 + reward = self._reward() + done = self._termination() + #print("len=%r" % len(self._observation)) + + return np.array(self._observation), reward, done, {} + + def _render(self, mode='human', close=False): + return + + def _termination(self): + return self._envStepCounter>1000 + + def _reward(self): + #todo + reward=0 + + return reward \ No newline at end of file diff --git a/examples/pybullet/gym/humanoidGymEnvTest.py b/examples/pybullet/gym/humanoidGymEnvTest.py new file mode 100644 index 000000000..72004b7b9 --- /dev/null +++ b/examples/pybullet/gym/humanoidGymEnvTest.py @@ -0,0 +1,21 @@ + +from envs.bullet.humanoidGymEnv import HumanoidGymEnv +print ("hello") +environment = HumanoidGymEnv(renders=True) + +environment._p.setGravity(0,0,0) + +motorsIds=[] +for motor in environment._humanoid.motor_names: + motorsIds.append(environment._p.addUserDebugParameter(motor,-1,1,0)) + +while (True): + + action=[] + for motorId in motorsIds: + action.append(environment._p.readUserDebugParameter(motorId)) + + state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + print("obs") + print(obs) diff --git a/examples/pybullet/gym/kukaGymEnvTest.py b/examples/pybullet/gym/kukaGymEnvTest.py new file mode 100644 index 000000000..46bde180a --- /dev/null +++ b/examples/pybullet/gym/kukaGymEnvTest.py @@ -0,0 +1,21 @@ + +from envs.bullet.kukaGymEnv import KukaGymEnv +print ("hello") +environment = KukaGymEnv(renders=True) + + +motorsIds=[] +for i in range (len(environment._kuka.motorNames)): + motor = environment._kuka.motorNames[i] + motorJointIndex = environment._kuka.motorIndices[i] + motorsIds.append(environment._p.addUserDebugParameter(motor,-3,3,environment._kuka.jointPositions[i])) + +while (True): + + action=[] + for motorId in motorsIds: + action.append(environment._p.readUserDebugParameter(motorId)) + + state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + diff --git a/examples/pybullet/gym/racecarZEDGymEnvTest.py b/examples/pybullet/gym/racecarZEDGymEnvTest.py new file mode 100644 index 000000000..68364bcb4 --- /dev/null +++ b/examples/pybullet/gym/racecarZEDGymEnvTest.py @@ -0,0 +1,30 @@ + +from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv +print ("hello") +environment = RacecarZEDGymEnv(renders=True) + +targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0) +steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0) + +while (True): + targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) + steeringAngle = environment._p.readUserDebugParameter(steeringSlider) + discreteAction = 0 + if (targetVelocity<-0.33): + discreteAction=0 + else: + if (targetVelocity>0.33): + discreteAction=6 + else: + discreteAction=3 + if (steeringAngle>-0.17): + if (steeringAngle>0.17): + discreteAction=discreteAction+2 + else: + discreteAction=discreteAction+1 + + action=discreteAction + state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + print("obs") + print(obs)