add humanoid and kuka gym environments (experimental)

This commit is contained in:
erwincoumans
2017-06-14 00:54:41 -07:00
parent a0ded43a69
commit cc34ebab25
9 changed files with 477 additions and 0 deletions

View File

@@ -29,3 +29,17 @@ register(
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='HumanoidBulletEnv-v0',
entry_point='envs.bullet:HumanoidGymEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='KukaBulletEnv-v0',
entry_point='envs.bullet:KukaGymEnv',
timestep_limit=1000,
reward_threshold=5.0,
)

View File

@@ -1,2 +1,6 @@
from envs.bullet.cartpole_bullet import CartPoleBulletEnv
from envs.bullet.minitaur_bullet import MinitaurBulletEnv
from envs.bullet.racecarGymEnv import RacecarGymEnv
from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
from envs.bullet.humanoidGymEnv import HumanoidGymEnv
from envs.bullet.kukaGymEnv import KukaGymEnv

View File

@@ -0,0 +1,124 @@
import pybullet as p
import numpy as np
import copy
import math
import time
class Humanoid:
def __init__(self, urdfRootPath='', timeStep=0.01):
self.urdfRootPath = urdfRootPath
self.timeStep = timeStep
self.reset()
def reset(self):
self.initial_z = None
objs = p.loadMJCF("mjcf/humanoid_symmetric_no_ground.xml",flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS)
self.human = objs[0]
self.jdict = {}
self.ordered_joints = []
self.ordered_joint_indices = []
for j in range( p.getNumJoints(self.human) ):
info = p.getJointInfo(self.human, j)
link_name = info[12].decode("ascii")
if link_name=="left_foot": self.left_foot = j
if link_name=="right_foot": self.right_foot = j
self.ordered_joint_indices.append(j)
if info[2] != p.JOINT_REVOLUTE: continue
jname = info[1].decode("ascii")
self.jdict[jname] = j
lower, upper = (info[8], info[9])
self.ordered_joints.append( (j, lower, upper) )
p.setJointMotorControl2(self.human, j, controlMode=p.VELOCITY_CONTROL, force=0)
self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
self.motor_power = [100, 100, 100]
self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
self.motor_power += [100, 100, 300, 200]
self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
self.motor_power += [100, 100, 300, 200]
self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
self.motor_power += [75, 75, 75]
self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
self.motor_power += [75, 75, 75]
self.motors = [self.jdict[n] for n in self.motor_names]
print("self.motors")
print(self.motors)
print("num motors")
print(len(self.motors))
def current_relative_position(self, jointStates, human, j, lower, upper):
#print("j")
#print(j)
#print (len(jointStates))
#print(j)
temp = jointStates[j]
pos = temp[0]
vel = temp[1]
#print("pos")
#print(pos)
#print("vel")
#print(vel)
pos_mid = 0.5 * (lower + upper);
return (
2 * (pos - pos_mid) / (upper - lower),
0.1 * vel
)
def collect_observations(self, human):
#print("ordered_joint_indices")
#print(ordered_joint_indices)
jointStates = p.getJointStates(human,self.ordered_joint_indices)
j = np.array([self.current_relative_position(jointStates, human, *jtuple) for jtuple in self.ordered_joints]).flatten()
#print("j")
#print(j)
body_xyz, (qx, qy, qz, qw) = p.getBasePositionAndOrientation(human)
#print("body_xyz")
#print(body_xyz, qx,qy,qz,qw)
z = body_xyz[2]
self.distance = body_xyz[0]
if self.initial_z==None:
self.initial_z = z
(vx, vy, vz), _ = p.getBaseVelocity(human)
more = np.array([z-self.initial_z, 0.1*vx, 0.1*vy, 0.1*vz, qx, qy, qz, qw])
rcont = p.getContactPoints(human, -1, self.right_foot, -1)
#print("rcont")
#print(rcont)
lcont = p.getContactPoints(human, -1, self.left_foot, -1)
#print("lcont")
#print(lcont)
feet_contact = np.array([len(rcont)>0, len(lcont)>0])
return np.clip( np.concatenate([more] + [j] + [feet_contact]), -5, +5)
def getActionDimension(self):
return len(self.motors)
def getObservationDimension(self):
return len(self.getObservation())
def getObservation(self):
observation = self.collect_observations(self.human)
return observation
def applyAction(self, actions):
forces = [0.] * len(self.motors)
for m in range(len(self.motors)):
forces[m] = self.motor_power[m]*actions[m]*0.082
p.setJointMotorControlArray(self.human, self.motors,controlMode=p.TORQUE_CONTROL, forces=forces)
p.stepSimulation()
time.sleep(0.01)
distance=5
yaw = 0
#humanPos, humanOrn = p.getBasePositionAndOrientation(self.human)
#p.resetDebugVisualizerCamera(distance,yaw,-20,humanPos);

View File

@@ -0,0 +1,102 @@
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import pybullet as p
from . import humanoid
import random
class HumanoidGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self,
urdfRoot="",
actionRepeat=50,
isEnableSelfCollision=True,
renders=True):
print("init")
self._timeStep = 0.01
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
self._observation = []
self._envStepCounter = 0
self._renders = renders
self._p = p
if self._renders:
p.connect(p.GUI)
else:
p.connect(p.DIRECT)
self._seed()
self.reset()
observationDim = len(self.getExtendedObservation())
#print("observationDim")
#print(observationDim)
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Box(-observation_high, observation_high)
self.viewer = None
def _reset(self):
p.resetSimulation()
#p.setPhysicsEngineParameter(numSolverIterations=300)
p.setTimeStep(self._timeStep)
p.loadURDF("%splane.urdf" % self._urdfRoot)
dist = 5 +2.*random.random()
ang = 2.*3.1415925438*random.random()
ballx = dist * math.sin(ang)
bally = dist * math.cos(ang)
ballz = 1
p.setGravity(0,0,-10)
self._humanoid = humanoid.Humanoid(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
self._observation = self.getExtendedObservation()
return np.array(self._observation)
def __del__(self):
p.disconnect()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def getExtendedObservation(self):
self._observation = self._humanoid.getObservation()
return self._observation
def _step(self, action):
self._humanoid.applyAction(action)
for i in range(self._actionRepeat):
p.stepSimulation()
if self._renders:
time.sleep(self._timeStep)
self._observation = self.getExtendedObservation()
if self._termination():
break
self._envStepCounter += 1
reward = self._reward()
done = self._termination()
#print("len=%r" % len(self._observation))
return np.array(self._observation), reward, done, {}
def _render(self, mode='human', close=False):
return
def _termination(self):
return self._envStepCounter>1000
def _reward(self):
reward=self._humanoid.distance
print(reward)
return reward

View File

@@ -0,0 +1,58 @@
import pybullet as p
import numpy as np
import copy
import math
class Kuka:
def __init__(self, urdfRootPath='', timeStep=0.01):
self.urdfRootPath = urdfRootPath
self.timeStep = timeStep
self.reset()
self.maxForce = 100
def reset(self):
objects = p.loadSDF("kuka_iiwa/kuka_with_gripper.sdf")
self.kukaUid = objects[0]
p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000])
self.jointPositions=[ -0.196884, 0.857586, -0.023543, -1.664977, 0.030403, 0.624786, -0.232294, 0.000000, -0.296450, 0.000000, 0.100002, 0.284399, 0.000000, -0.099999 ]
for jointIndex in range (p.getNumJoints(self.kukaUid)):
p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex])
self.trayUid = p.loadURDF("tray/tray.urdf", 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000)
self.blockUid =p.loadURDF("block.urdf", 0.604746,0.080626,-0.180069,0.000050,-0.000859,-0.824149,0.566372)
self.motorNames = []
self.motorIndices = []
numJoints = p.getNumJoints(self.kukaUid)
for i in range (numJoints):
jointInfo = p.getJointInfo(self.kukaUid,i)
qIndex = jointInfo[3]
if qIndex > -1:
print("motorname")
print(jointInfo[1])
self.motorNames.append(str(jointInfo[1]))
self.motorIndices.append(i)
def getActionDimension(self):
return len(self.motorIndices)
def getObservationDimension(self):
return len(self.getObservation())
def getObservation(self):
observation = []
pos,orn=p.getBasePositionAndOrientation(self.blockUid)
observation.extend(list(pos))
observation.extend(list(orn))
return observation
def applyAction(self, motorCommands):
for action in range (len(motorCommands)):
motor = self.motorIndices[action]
p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce)

View File

@@ -0,0 +1,103 @@
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import pybullet as p
from . import kuka
import random
class KukaGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self,
urdfRoot="",
actionRepeat=50,
isEnableSelfCollision=True,
renders=True):
print("init")
self._timeStep = 0.01
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
self._observation = []
self._envStepCounter = 0
self._renders = renders
self._p = p
if self._renders:
p.connect(p.GUI)
else:
p.connect(p.DIRECT)
self._seed()
self.reset()
observationDim = len(self.getExtendedObservation())
#print("observationDim")
#print(observationDim)
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Box(-observation_high, observation_high)
self.viewer = None
def _reset(self):
p.resetSimulation()
#p.setPhysicsEngineParameter(numSolverIterations=300)
p.setTimeStep(self._timeStep)
p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1])
dist = 5 +2.*random.random()
ang = 2.*3.1415925438*random.random()
ballx = dist * math.sin(ang)
bally = dist * math.cos(ang)
ballz = 1
p.setGravity(0,0,-10)
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
self._observation = self.getExtendedObservation()
return np.array(self._observation)
def __del__(self):
p.disconnect()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def getExtendedObservation(self):
self._observation = self._kuka.getObservation()
return self._observation
def _step(self, action):
self._kuka.applyAction(action)
for i in range(self._actionRepeat):
p.stepSimulation()
if self._renders:
time.sleep(self._timeStep)
self._observation = self.getExtendedObservation()
if self._termination():
break
self._envStepCounter += 1
reward = self._reward()
done = self._termination()
#print("len=%r" % len(self._observation))
return np.array(self._observation), reward, done, {}
def _render(self, mode='human', close=False):
return
def _termination(self):
return self._envStepCounter>1000
def _reward(self):
#todo
reward=0
return reward

View File

@@ -0,0 +1,21 @@
from envs.bullet.humanoidGymEnv import HumanoidGymEnv
print ("hello")
environment = HumanoidGymEnv(renders=True)
environment._p.setGravity(0,0,0)
motorsIds=[]
for motor in environment._humanoid.motor_names:
motorsIds.append(environment._p.addUserDebugParameter(motor,-1,1,0))
while (True):
action=[]
for motorId in motorsIds:
action.append(environment._p.readUserDebugParameter(motorId))
state, reward, done, info = environment.step(action)
obs = environment.getExtendedObservation()
print("obs")
print(obs)

View File

@@ -0,0 +1,21 @@
from envs.bullet.kukaGymEnv import KukaGymEnv
print ("hello")
environment = KukaGymEnv(renders=True)
motorsIds=[]
for i in range (len(environment._kuka.motorNames)):
motor = environment._kuka.motorNames[i]
motorJointIndex = environment._kuka.motorIndices[i]
motorsIds.append(environment._p.addUserDebugParameter(motor,-3,3,environment._kuka.jointPositions[i]))
while (True):
action=[]
for motorId in motorsIds:
action.append(environment._p.readUserDebugParameter(motorId))
state, reward, done, info = environment.step(action)
obs = environment.getExtendedObservation()

View File

@@ -0,0 +1,30 @@
from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
print ("hello")
environment = RacecarZEDGymEnv(renders=True)
targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
while (True):
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
discreteAction = 0
if (targetVelocity<-0.33):
discreteAction=0
else:
if (targetVelocity>0.33):
discreteAction=6
else:
discreteAction=3
if (steeringAngle>-0.17):
if (steeringAngle>0.17):
discreteAction=discreteAction+2
else:
discreteAction=discreteAction+1
action=discreteAction
state, reward, done, info = environment.step(action)
obs = environment.getExtendedObservation()
print("obs")
print(obs)