Merge pull request #1187 from erwincoumans/master
add humanoid and kuka gym environments (experimental)
This commit is contained in:
@@ -29,3 +29,17 @@ register(
|
||||
timestep_limit=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidBulletEnv-v0',
|
||||
entry_point='envs.bullet:HumanoidGymEnv',
|
||||
timestep_limit=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='KukaBulletEnv-v0',
|
||||
entry_point='envs.bullet:KukaGymEnv',
|
||||
timestep_limit=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
@@ -1,2 +1,6 @@
|
||||
from envs.bullet.cartpole_bullet import CartPoleBulletEnv
|
||||
from envs.bullet.minitaur_bullet import MinitaurBulletEnv
|
||||
from envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||
from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
||||
from envs.bullet.humanoidGymEnv import HumanoidGymEnv
|
||||
from envs.bullet.kukaGymEnv import KukaGymEnv
|
||||
|
||||
124
examples/pybullet/gym/envs/bullet/humanoid.py
Normal file
124
examples/pybullet/gym/envs/bullet/humanoid.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import pybullet as p
|
||||
import numpy as np
|
||||
import copy
|
||||
import math
|
||||
import time
|
||||
|
||||
|
||||
|
||||
class Humanoid:
|
||||
|
||||
def __init__(self, urdfRootPath='', timeStep=0.01):
|
||||
self.urdfRootPath = urdfRootPath
|
||||
self.timeStep = timeStep
|
||||
self.reset()
|
||||
|
||||
|
||||
|
||||
def reset(self):
|
||||
self.initial_z = None
|
||||
|
||||
objs = p.loadMJCF("mjcf/humanoid_symmetric_no_ground.xml",flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS)
|
||||
self.human = objs[0]
|
||||
self.jdict = {}
|
||||
self.ordered_joints = []
|
||||
self.ordered_joint_indices = []
|
||||
|
||||
for j in range( p.getNumJoints(self.human) ):
|
||||
info = p.getJointInfo(self.human, j)
|
||||
link_name = info[12].decode("ascii")
|
||||
if link_name=="left_foot": self.left_foot = j
|
||||
if link_name=="right_foot": self.right_foot = j
|
||||
self.ordered_joint_indices.append(j)
|
||||
if info[2] != p.JOINT_REVOLUTE: continue
|
||||
jname = info[1].decode("ascii")
|
||||
self.jdict[jname] = j
|
||||
lower, upper = (info[8], info[9])
|
||||
self.ordered_joints.append( (j, lower, upper) )
|
||||
p.setJointMotorControl2(self.human, j, controlMode=p.VELOCITY_CONTROL, force=0)
|
||||
|
||||
self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
|
||||
self.motor_power = [100, 100, 100]
|
||||
self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motors = [self.jdict[n] for n in self.motor_names]
|
||||
print("self.motors")
|
||||
print(self.motors)
|
||||
print("num motors")
|
||||
print(len(self.motors))
|
||||
|
||||
|
||||
|
||||
def current_relative_position(self, jointStates, human, j, lower, upper):
|
||||
#print("j")
|
||||
#print(j)
|
||||
#print (len(jointStates))
|
||||
#print(j)
|
||||
temp = jointStates[j]
|
||||
pos = temp[0]
|
||||
vel = temp[1]
|
||||
#print("pos")
|
||||
#print(pos)
|
||||
#print("vel")
|
||||
#print(vel)
|
||||
pos_mid = 0.5 * (lower + upper);
|
||||
return (
|
||||
2 * (pos - pos_mid) / (upper - lower),
|
||||
0.1 * vel
|
||||
)
|
||||
|
||||
def collect_observations(self, human):
|
||||
#print("ordered_joint_indices")
|
||||
#print(ordered_joint_indices)
|
||||
|
||||
|
||||
jointStates = p.getJointStates(human,self.ordered_joint_indices)
|
||||
j = np.array([self.current_relative_position(jointStates, human, *jtuple) for jtuple in self.ordered_joints]).flatten()
|
||||
#print("j")
|
||||
#print(j)
|
||||
body_xyz, (qx, qy, qz, qw) = p.getBasePositionAndOrientation(human)
|
||||
#print("body_xyz")
|
||||
#print(body_xyz, qx,qy,qz,qw)
|
||||
z = body_xyz[2]
|
||||
self.distance = body_xyz[0]
|
||||
if self.initial_z==None:
|
||||
self.initial_z = z
|
||||
(vx, vy, vz), _ = p.getBaseVelocity(human)
|
||||
more = np.array([z-self.initial_z, 0.1*vx, 0.1*vy, 0.1*vz, qx, qy, qz, qw])
|
||||
rcont = p.getContactPoints(human, -1, self.right_foot, -1)
|
||||
#print("rcont")
|
||||
#print(rcont)
|
||||
lcont = p.getContactPoints(human, -1, self.left_foot, -1)
|
||||
#print("lcont")
|
||||
#print(lcont)
|
||||
feet_contact = np.array([len(rcont)>0, len(lcont)>0])
|
||||
return np.clip( np.concatenate([more] + [j] + [feet_contact]), -5, +5)
|
||||
|
||||
def getActionDimension(self):
|
||||
return len(self.motors)
|
||||
|
||||
def getObservationDimension(self):
|
||||
return len(self.getObservation())
|
||||
|
||||
def getObservation(self):
|
||||
observation = self.collect_observations(self.human)
|
||||
return observation
|
||||
|
||||
def applyAction(self, actions):
|
||||
forces = [0.] * len(self.motors)
|
||||
for m in range(len(self.motors)):
|
||||
forces[m] = self.motor_power[m]*actions[m]*0.082
|
||||
p.setJointMotorControlArray(self.human, self.motors,controlMode=p.TORQUE_CONTROL, forces=forces)
|
||||
|
||||
p.stepSimulation()
|
||||
time.sleep(0.01)
|
||||
distance=5
|
||||
yaw = 0
|
||||
#humanPos, humanOrn = p.getBasePositionAndOrientation(self.human)
|
||||
#p.resetDebugVisualizerCamera(distance,yaw,-20,humanPos);
|
||||
102
examples/pybullet/gym/envs/bullet/humanoidGymEnv.py
Normal file
102
examples/pybullet/gym/envs/bullet/humanoidGymEnv.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
import pybullet as p
|
||||
from . import humanoid
|
||||
import random
|
||||
|
||||
class HumanoidGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot="",
|
||||
actionRepeat=50,
|
||||
isEnableSelfCollision=True,
|
||||
renders=True):
|
||||
print("init")
|
||||
self._timeStep = 0.01
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
self._isEnableSelfCollision = isEnableSelfCollision
|
||||
self._observation = []
|
||||
self._envStepCounter = 0
|
||||
self._renders = renders
|
||||
self._p = p
|
||||
if self._renders:
|
||||
p.connect(p.GUI)
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
self._seed()
|
||||
self.reset()
|
||||
observationDim = len(self.getExtendedObservation())
|
||||
#print("observationDim")
|
||||
#print(observationDim)
|
||||
|
||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||
self.action_space = spaces.Discrete(9)
|
||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||
self.viewer = None
|
||||
|
||||
def _reset(self):
|
||||
p.resetSimulation()
|
||||
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
||||
p.setTimeStep(self._timeStep)
|
||||
p.loadURDF("%splane.urdf" % self._urdfRoot)
|
||||
|
||||
dist = 5 +2.*random.random()
|
||||
ang = 2.*3.1415925438*random.random()
|
||||
|
||||
ballx = dist * math.sin(ang)
|
||||
bally = dist * math.cos(ang)
|
||||
ballz = 1
|
||||
|
||||
p.setGravity(0,0,-10)
|
||||
self._humanoid = humanoid.Humanoid(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
p.stepSimulation()
|
||||
self._observation = self.getExtendedObservation()
|
||||
return np.array(self._observation)
|
||||
|
||||
def __del__(self):
|
||||
p.disconnect()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def getExtendedObservation(self):
|
||||
self._observation = self._humanoid.getObservation()
|
||||
return self._observation
|
||||
|
||||
def _step(self, action):
|
||||
self._humanoid.applyAction(action)
|
||||
for i in range(self._actionRepeat):
|
||||
p.stepSimulation()
|
||||
if self._renders:
|
||||
time.sleep(self._timeStep)
|
||||
self._observation = self.getExtendedObservation()
|
||||
if self._termination():
|
||||
break
|
||||
self._envStepCounter += 1
|
||||
reward = self._reward()
|
||||
done = self._termination()
|
||||
#print("len=%r" % len(self._observation))
|
||||
|
||||
return np.array(self._observation), reward, done, {}
|
||||
|
||||
def _render(self, mode='human', close=False):
|
||||
return
|
||||
|
||||
def _termination(self):
|
||||
return self._envStepCounter>1000
|
||||
|
||||
def _reward(self):
|
||||
reward=self._humanoid.distance
|
||||
print(reward)
|
||||
return reward
|
||||
58
examples/pybullet/gym/envs/bullet/kuka.py
Normal file
58
examples/pybullet/gym/envs/bullet/kuka.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import pybullet as p
|
||||
import numpy as np
|
||||
import copy
|
||||
import math
|
||||
|
||||
class Kuka:
|
||||
|
||||
def __init__(self, urdfRootPath='', timeStep=0.01):
|
||||
self.urdfRootPath = urdfRootPath
|
||||
self.timeStep = timeStep
|
||||
self.reset()
|
||||
self.maxForce = 100
|
||||
|
||||
def reset(self):
|
||||
|
||||
objects = p.loadSDF("kuka_iiwa/kuka_with_gripper.sdf")
|
||||
self.kukaUid = objects[0]
|
||||
p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000])
|
||||
self.jointPositions=[ -0.196884, 0.857586, -0.023543, -1.664977, 0.030403, 0.624786, -0.232294, 0.000000, -0.296450, 0.000000, 0.100002, 0.284399, 0.000000, -0.099999 ]
|
||||
for jointIndex in range (p.getNumJoints(self.kukaUid)):
|
||||
p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex])
|
||||
|
||||
self.trayUid = p.loadURDF("tray/tray.urdf", 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000)
|
||||
self.blockUid =p.loadURDF("block.urdf", 0.604746,0.080626,-0.180069,0.000050,-0.000859,-0.824149,0.566372)
|
||||
|
||||
self.motorNames = []
|
||||
self.motorIndices = []
|
||||
numJoints = p.getNumJoints(self.kukaUid)
|
||||
for i in range (numJoints):
|
||||
jointInfo = p.getJointInfo(self.kukaUid,i)
|
||||
qIndex = jointInfo[3]
|
||||
if qIndex > -1:
|
||||
print("motorname")
|
||||
print(jointInfo[1])
|
||||
self.motorNames.append(str(jointInfo[1]))
|
||||
self.motorIndices.append(i)
|
||||
|
||||
def getActionDimension(self):
|
||||
return len(self.motorIndices)
|
||||
|
||||
def getObservationDimension(self):
|
||||
return len(self.getObservation())
|
||||
|
||||
def getObservation(self):
|
||||
observation = []
|
||||
pos,orn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
|
||||
observation.extend(list(pos))
|
||||
observation.extend(list(orn))
|
||||
|
||||
return observation
|
||||
|
||||
def applyAction(self, motorCommands):
|
||||
|
||||
for action in range (len(motorCommands)):
|
||||
motor = self.motorIndices[action]
|
||||
p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce)
|
||||
|
||||
103
examples/pybullet/gym/envs/bullet/kukaGymEnv.py
Normal file
103
examples/pybullet/gym/envs/bullet/kukaGymEnv.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
import pybullet as p
|
||||
from . import kuka
|
||||
import random
|
||||
|
||||
class KukaGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot="",
|
||||
actionRepeat=50,
|
||||
isEnableSelfCollision=True,
|
||||
renders=True):
|
||||
print("init")
|
||||
self._timeStep = 0.01
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
self._isEnableSelfCollision = isEnableSelfCollision
|
||||
self._observation = []
|
||||
self._envStepCounter = 0
|
||||
self._renders = renders
|
||||
self._p = p
|
||||
if self._renders:
|
||||
p.connect(p.GUI)
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
self._seed()
|
||||
self.reset()
|
||||
observationDim = len(self.getExtendedObservation())
|
||||
#print("observationDim")
|
||||
#print(observationDim)
|
||||
|
||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||
self.action_space = spaces.Discrete(9)
|
||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||
self.viewer = None
|
||||
|
||||
def _reset(self):
|
||||
p.resetSimulation()
|
||||
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
||||
p.setTimeStep(self._timeStep)
|
||||
p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1])
|
||||
|
||||
dist = 5 +2.*random.random()
|
||||
ang = 2.*3.1415925438*random.random()
|
||||
|
||||
ballx = dist * math.sin(ang)
|
||||
bally = dist * math.cos(ang)
|
||||
ballz = 1
|
||||
|
||||
p.setGravity(0,0,-10)
|
||||
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
p.stepSimulation()
|
||||
self._observation = self.getExtendedObservation()
|
||||
return np.array(self._observation)
|
||||
|
||||
def __del__(self):
|
||||
p.disconnect()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def getExtendedObservation(self):
|
||||
self._observation = self._kuka.getObservation()
|
||||
return self._observation
|
||||
|
||||
def _step(self, action):
|
||||
self._kuka.applyAction(action)
|
||||
for i in range(self._actionRepeat):
|
||||
p.stepSimulation()
|
||||
if self._renders:
|
||||
time.sleep(self._timeStep)
|
||||
self._observation = self.getExtendedObservation()
|
||||
if self._termination():
|
||||
break
|
||||
self._envStepCounter += 1
|
||||
reward = self._reward()
|
||||
done = self._termination()
|
||||
#print("len=%r" % len(self._observation))
|
||||
|
||||
return np.array(self._observation), reward, done, {}
|
||||
|
||||
def _render(self, mode='human', close=False):
|
||||
return
|
||||
|
||||
def _termination(self):
|
||||
return self._envStepCounter>1000
|
||||
|
||||
def _reward(self):
|
||||
#todo
|
||||
reward=0
|
||||
|
||||
return reward
|
||||
21
examples/pybullet/gym/humanoidGymEnvTest.py
Normal file
21
examples/pybullet/gym/humanoidGymEnvTest.py
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
from envs.bullet.humanoidGymEnv import HumanoidGymEnv
|
||||
print ("hello")
|
||||
environment = HumanoidGymEnv(renders=True)
|
||||
|
||||
environment._p.setGravity(0,0,0)
|
||||
|
||||
motorsIds=[]
|
||||
for motor in environment._humanoid.motor_names:
|
||||
motorsIds.append(environment._p.addUserDebugParameter(motor,-1,1,0))
|
||||
|
||||
while (True):
|
||||
|
||||
action=[]
|
||||
for motorId in motorsIds:
|
||||
action.append(environment._p.readUserDebugParameter(motorId))
|
||||
|
||||
state, reward, done, info = environment.step(action)
|
||||
obs = environment.getExtendedObservation()
|
||||
print("obs")
|
||||
print(obs)
|
||||
21
examples/pybullet/gym/kukaGymEnvTest.py
Normal file
21
examples/pybullet/gym/kukaGymEnvTest.py
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
from envs.bullet.kukaGymEnv import KukaGymEnv
|
||||
print ("hello")
|
||||
environment = KukaGymEnv(renders=True)
|
||||
|
||||
|
||||
motorsIds=[]
|
||||
for i in range (len(environment._kuka.motorNames)):
|
||||
motor = environment._kuka.motorNames[i]
|
||||
motorJointIndex = environment._kuka.motorIndices[i]
|
||||
motorsIds.append(environment._p.addUserDebugParameter(motor,-3,3,environment._kuka.jointPositions[i]))
|
||||
|
||||
while (True):
|
||||
|
||||
action=[]
|
||||
for motorId in motorsIds:
|
||||
action.append(environment._p.readUserDebugParameter(motorId))
|
||||
|
||||
state, reward, done, info = environment.step(action)
|
||||
obs = environment.getExtendedObservation()
|
||||
|
||||
30
examples/pybullet/gym/racecarZEDGymEnvTest.py
Normal file
30
examples/pybullet/gym/racecarZEDGymEnvTest.py
Normal file
@@ -0,0 +1,30 @@
|
||||
|
||||
from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
||||
print ("hello")
|
||||
environment = RacecarZEDGymEnv(renders=True)
|
||||
|
||||
targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
|
||||
steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
|
||||
|
||||
while (True):
|
||||
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
|
||||
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
|
||||
discreteAction = 0
|
||||
if (targetVelocity<-0.33):
|
||||
discreteAction=0
|
||||
else:
|
||||
if (targetVelocity>0.33):
|
||||
discreteAction=6
|
||||
else:
|
||||
discreteAction=3
|
||||
if (steeringAngle>-0.17):
|
||||
if (steeringAngle>0.17):
|
||||
discreteAction=discreteAction+2
|
||||
else:
|
||||
discreteAction=discreteAction+1
|
||||
|
||||
action=discreteAction
|
||||
state, reward, done, info = environment.step(action)
|
||||
obs = environment.getExtendedObservation()
|
||||
print("obs")
|
||||
print(obs)
|
||||
Reference in New Issue
Block a user