Split locomotion environments.
This commit is contained in:
@@ -8,7 +8,9 @@ def relu(x):
|
||||
return np.maximum(x, 0)
|
||||
|
||||
class SmallReactivePolicy:
|
||||
"Simple multi-layer perceptron policy, no internal state"
|
||||
"""
|
||||
Simple multi-layer perceptron policy, no internal state
|
||||
"""
|
||||
def __init__(self, observation_space, action_space):
|
||||
assert weights_dense1_w.shape == (observation_space.shape[0], 128)
|
||||
assert weights_dense2_w.shape == (128, 64)
|
||||
|
||||
@@ -53,32 +53,25 @@ register(
|
||||
|
||||
register(
|
||||
id='InvertedPendulumBulletEnv-v0',
|
||||
entry_point='envs.gym_pendula_envs:InvertedPendulumBulletEnv',
|
||||
entry_point='envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedDoublePendulumBulletEnv-v0',
|
||||
entry_point='envs.gym_pendula_envs:InvertedDoublePendulumBulletEnv',
|
||||
entry_point='envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=9100.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedPendulumSwingupBulletEnv-v0',
|
||||
entry_point='envs.gym_pendula_envs:InvertedPendulumSwingupBulletEnv',
|
||||
entry_point='envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HopperBulletEnv-v0',
|
||||
entry_point='envs.gym_walker_envs:HopperBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
|
||||
register(
|
||||
id='ReacherBulletEnv-v0',
|
||||
entry_point='envs.gym_manipulator_envs:ReacherBulletEnv',
|
||||
@@ -109,26 +102,33 @@ register(
|
||||
|
||||
register(
|
||||
id='Walker2DBulletEnv-v0',
|
||||
entry_point='envs.gym_walker_envs:Walker2DBulletEnv',
|
||||
entry_point='envs.gym_locomotion_envs:Walker2DBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
register(
|
||||
id='HalfCheetahBulletEnv-v0',
|
||||
entry_point='envs.gym_walker_envs:HalfCheetahBulletEnv',
|
||||
entry_point='envs.gym_locomotion_envs:HalfCheetahBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3000.0
|
||||
)
|
||||
|
||||
register(
|
||||
id='AntBulletEnv-v0',
|
||||
entry_point='envs.gym_walker_envs:AntBulletEnv',
|
||||
entry_point='envs.gym_locomotion_envs:AntBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidBulletEnv-v0',
|
||||
entry_point='envs.gym_walker_envs:HumanoidBulletEnv',
|
||||
entry_point='envs.gym_locomotion_envs:HumanoidBulletEnv',
|
||||
max_episode_steps=1000
|
||||
)
|
||||
|
||||
register(
|
||||
id='HopperBulletEnv-v0',
|
||||
entry_point='envs.gym_locomotion_envs:HopperBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
@@ -1,12 +1,11 @@
|
||||
import gym, gym.spaces, gym.utils, gym.utils.seeding
|
||||
import numpy as np
|
||||
import pybullet as p
|
||||
import os
|
||||
|
||||
|
||||
class MujocoXmlBaseBulletEnv(gym.Env):
|
||||
"""
|
||||
Base class for MuJoCo .xml actors in a Scene.
|
||||
Base class for MuJoCo .xml environments in a Scene.
|
||||
These environments create single-player scenes and behave like normal Gym environments, if
|
||||
you don't use multiplayer.
|
||||
"""
|
||||
@@ -16,112 +15,42 @@ class MujocoXmlBaseBulletEnv(gym.Env):
|
||||
'video.frames_per_second': 60
|
||||
}
|
||||
|
||||
self_collision = False
|
||||
|
||||
def __init__(self, model_xml, robot_name, action_dim, obs_dim):
|
||||
def __init__(self, robot):
|
||||
self.scene = None
|
||||
|
||||
self.parts = None
|
||||
self.jdict = None
|
||||
self.ordered_joints = None
|
||||
self.robot_body = None
|
||||
|
||||
high = np.ones([action_dim])
|
||||
self.action_space = gym.spaces.Box(-high, high)
|
||||
high = np.inf*np.ones([obs_dim])
|
||||
self.observation_space = gym.spaces.Box(-high, high)
|
||||
self._seed()
|
||||
|
||||
self.model_xml = model_xml
|
||||
self.robot_name = robot_name
|
||||
|
||||
self.camera = Camera()
|
||||
|
||||
self.robot = robot
|
||||
|
||||
self._seed()
|
||||
|
||||
self.action_space = robot.action_space
|
||||
self.observation_space = robot.observation_space
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
||||
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
||||
return [seed]
|
||||
|
||||
def addToScene(self, bodies):
|
||||
if self.parts is not None:
|
||||
parts = self.parts
|
||||
else:
|
||||
parts = {}
|
||||
|
||||
if self.jdict is not None:
|
||||
joints = self.jdict
|
||||
else:
|
||||
joints = {}
|
||||
|
||||
if self.ordered_joints is not None:
|
||||
ordered_joints = self.ordered_joints
|
||||
else:
|
||||
ordered_joints = []
|
||||
|
||||
dump = 0
|
||||
for i in range(len(bodies)):
|
||||
if p.getNumJoints(bodies[i]) == 0:
|
||||
part_name, robot_name = p.getBodyInfo(bodies[i], 0)
|
||||
robot_name = robot_name.decode("utf8")
|
||||
part_name = part_name.decode("utf8")
|
||||
parts[part_name] = BodyPart(part_name, bodies, i, -1)
|
||||
for j in range(p.getNumJoints(bodies[i])):
|
||||
_,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j)
|
||||
|
||||
joint_name = joint_name.decode("utf8")
|
||||
part_name = part_name.decode("utf8")
|
||||
|
||||
if dump: print("ROBOT PART '%s'" % part_name)
|
||||
if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) )
|
||||
|
||||
parts[part_name] = BodyPart(part_name, bodies, i, j)
|
||||
|
||||
if part_name == self.robot_name:
|
||||
self.robot_body = parts[part_name]
|
||||
|
||||
if i == 0 and j == 0 and self.robot_body is None: # if nothing else works, we take this as robot_body
|
||||
parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1)
|
||||
self.robot_body = parts[self.robot_name]
|
||||
|
||||
if joint_name[:6] == "ignore":
|
||||
Joint(joint_name, bodies, i, j).disable_motor()
|
||||
continue
|
||||
|
||||
if joint_name[:8] != "jointfix":
|
||||
joints[joint_name] = Joint(joint_name, bodies, i, j)
|
||||
ordered_joints.append(joints[joint_name])
|
||||
|
||||
joints[joint_name].power_coef = 100.0
|
||||
|
||||
return parts, joints, ordered_joints, self.robot_body
|
||||
|
||||
def _reset(self):
|
||||
if self.scene is None:
|
||||
self.scene = self.create_single_player_scene()
|
||||
if not self.scene.multiplayer:
|
||||
self.scene.episode_restart()
|
||||
|
||||
self.ordered_joints = []
|
||||
self.robot.scene = self.scene
|
||||
|
||||
self.frame = 0
|
||||
self.done = 0
|
||||
self.reward = 0
|
||||
dump = 0
|
||||
|
||||
if self.self_collision:
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml), flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS))
|
||||
else:
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml)))
|
||||
|
||||
self.robot_specific_reset()
|
||||
s = self.calc_state() # optimization: calc_state() can calculate something in self.* for calc_potential() to use
|
||||
self.potential = self.calc_potential()
|
||||
s = self.robot.reset()
|
||||
self.potential = self.robot.calc_potential()
|
||||
return s
|
||||
|
||||
def _render(self, mode, close):
|
||||
pass
|
||||
|
||||
def calc_potential(self):
|
||||
return 0
|
||||
|
||||
def HUD(self, state, a, done):
|
||||
pass
|
||||
|
||||
@@ -134,113 +63,3 @@ class Camera:
|
||||
distance = 10
|
||||
yaw = 10
|
||||
p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
|
||||
|
||||
class Pose_Helper: # dummy class to comply to original interface
|
||||
def __init__(self, body_part):
|
||||
self.body_part = body_part
|
||||
|
||||
def xyz(self):
|
||||
return self.body_part.current_position()
|
||||
|
||||
def rpy(self):
|
||||
return p.getEulerFromQuaternion(self.body_part.current_orientation())
|
||||
|
||||
def orientation(self):
|
||||
return self.body_part.current_orientation()
|
||||
|
||||
class BodyPart:
|
||||
def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex):
|
||||
self.bodies = bodies
|
||||
self.bodyIndex = bodyIndex
|
||||
self.bodyPartIndex = bodyPartIndex
|
||||
self.initialPosition = self.current_position()
|
||||
self.initialOrientation = self.current_orientation()
|
||||
self.bp_pose = Pose_Helper(self)
|
||||
|
||||
def state_fields_of_pose_of(self, body_id, link_id=-1): # a method you will most probably need a lot to get pose and orientation
|
||||
if link_id == -1:
|
||||
(x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id)
|
||||
else:
|
||||
(x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id)
|
||||
return np.array([x, y, z, a, b, c, d])
|
||||
|
||||
def get_pose(self):
|
||||
return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex)
|
||||
|
||||
def speed(self):
|
||||
if self.bodyPartIndex == -1:
|
||||
(vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex])
|
||||
else:
|
||||
(x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
|
||||
return np.array([vx, vy, vz])
|
||||
|
||||
def current_position(self):
|
||||
return self.get_pose()[:3]
|
||||
|
||||
def current_orientation(self):
|
||||
return self.get_pose()[3:]
|
||||
|
||||
def reset_position(self, position):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation())
|
||||
|
||||
def reset_orientation(self, orientation):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation)
|
||||
|
||||
def reset_pose(self, position, orientation):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation)
|
||||
|
||||
def pose(self):
|
||||
return self.bp_pose
|
||||
|
||||
def contact_list(self):
|
||||
return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1)
|
||||
|
||||
|
||||
class Joint:
|
||||
def __init__(self, joint_name, bodies, bodyIndex, jointIndex):
|
||||
self.bodies = bodies
|
||||
self.bodyIndex = bodyIndex
|
||||
self.jointIndex = jointIndex
|
||||
self.joint_name = joint_name
|
||||
_,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex)
|
||||
self.power_coeff = 0
|
||||
|
||||
def set_state(self, x, vx):
|
||||
p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx)
|
||||
|
||||
def current_position(self): # just some synonyme method
|
||||
return self.get_state()
|
||||
|
||||
def current_relative_position(self):
|
||||
pos, vel = self.get_state()
|
||||
pos_mid = 0.5 * (self.lowerLimit + self.upperLimit);
|
||||
return (
|
||||
2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit),
|
||||
0.1 * vel
|
||||
)
|
||||
|
||||
def get_state(self):
|
||||
x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex)
|
||||
return x, vx
|
||||
|
||||
def set_position(self, position):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position)
|
||||
|
||||
def set_velocity(self, velocity):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity)
|
||||
|
||||
def set_motor_torque(self, torque): # just some synonyme method
|
||||
self.set_torque(torque)
|
||||
|
||||
def set_torque(self, torque):
|
||||
p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1)
|
||||
|
||||
def reset_current_position(self, position, velocity): # just some synonyme method
|
||||
self.reset_position(position, velocity)
|
||||
|
||||
def reset_position(self, position, velocity):
|
||||
p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity)
|
||||
self.disable_motor()
|
||||
|
||||
def disable_motor(self):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0)
|
||||
|
||||
110
examples/pybullet/gym/envs/gym_locomotion_envs.py
Normal file
110
examples/pybullet/gym/envs/gym_locomotion_envs.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from scene_stadium import SinglePlayerStadiumScene
|
||||
from env_bases import MujocoXmlBaseBulletEnv
|
||||
import numpy as np
|
||||
from robot_locomotors import Hopper, Walker2D, HalfCheetah, Ant, Humanoid
|
||||
|
||||
|
||||
class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv):
|
||||
def __init__(self, robot):
|
||||
MujocoXmlBaseBulletEnv.__init__(self, robot)
|
||||
self.camera_x = 0
|
||||
self.walk_target_x = 1e3 # kilometer away
|
||||
self.walk_target_y = 0
|
||||
|
||||
def create_single_player_scene(self):
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
|
||||
return self.stadium_scene
|
||||
|
||||
def _reset(self):
|
||||
r = MujocoXmlBaseBulletEnv._reset(self)
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(
|
||||
self.stadium_scene.ground_plane_mjcf)
|
||||
self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in
|
||||
self.foot_ground_object_names])
|
||||
return r
|
||||
|
||||
def move_robot(self, init_x, init_y, init_z):
|
||||
"Used by multiplayer stadium to move sideways, to another running lane."
|
||||
self.cpp_robot.query_position()
|
||||
pose = self.cpp_robot.root_part.pose()
|
||||
pose.move_xyz(init_x, init_y, init_z) # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
|
||||
self.cpp_robot.set_pose(pose)
|
||||
|
||||
electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
|
||||
stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small
|
||||
foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
|
||||
foot_ground_object_names = set(["floor"]) # to distinguish ground and other objects
|
||||
joints_at_limit_cost = -0.1 # discourage stuck joints
|
||||
|
||||
def _step(self, a):
|
||||
if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
|
||||
self.robot.apply_action(a)
|
||||
self.scene.global_step()
|
||||
|
||||
state = self.robot.calc_state() # also calculates self.joints_at_limit
|
||||
|
||||
alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch
|
||||
done = alive < 0
|
||||
if not np.isfinite(state).all():
|
||||
print("~INF~", state)
|
||||
done = True
|
||||
|
||||
potential_old = self.potential
|
||||
self.potential = self.robot.calc_potential()
|
||||
progress = float(self.potential - potential_old)
|
||||
|
||||
feet_collision_cost = 0.0
|
||||
for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
|
||||
self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
|
||||
if contact_ids - self.ground_ids:
|
||||
feet_collision_cost += self.foot_collision_cost
|
||||
|
||||
electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||
|
||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
||||
|
||||
self.rewards = [
|
||||
alive,
|
||||
progress,
|
||||
electricity_cost,
|
||||
joints_at_limit_cost,
|
||||
feet_collision_cost
|
||||
]
|
||||
|
||||
self.HUD(state, a, done)
|
||||
return state, sum(self.rewards), bool(done), {}
|
||||
|
||||
def camera_adjust(self):
|
||||
x, y, z = self.body_xyz
|
||||
self.camera_x = 0.98*self.camera_x + (1-0.98)*x
|
||||
self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
|
||||
|
||||
class HopperBulletEnv(WalkerBaseBulletEnv):
|
||||
def __init__(self):
|
||||
self.robot = Hopper()
|
||||
WalkerBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
class Walker2DBulletEnv(WalkerBaseBulletEnv):
|
||||
def __init__(self):
|
||||
self.robot = Walker2D()
|
||||
WalkerBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
|
||||
def __init__(self):
|
||||
self.robot = HalfCheetah()
|
||||
WalkerBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
class AntBulletEnv(WalkerBaseBulletEnv):
|
||||
def __init__(self):
|
||||
self.robot = Ant()
|
||||
WalkerBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
class HumanoidBulletEnv(WalkerBaseBulletEnv):
|
||||
def __init__(self):
|
||||
self.robot = Humanoid()
|
||||
WalkerBaseBulletEnv.__init__(self, self.robot)
|
||||
self.electricity_cost = 4.25*WalkerBaseBulletEnv.electricity_cost
|
||||
self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost
|
||||
@@ -1,4 +1,4 @@
|
||||
from pybulletgym.envs.scene_abstract import SingleRobotEmptyScene
|
||||
from scene_abstract import SingleRobotEmptyScene
|
||||
from env_bases import MujocoXmlBaseBulletEnv
|
||||
import gym, gym.spaces, gym.utils, gym.utils.seeding
|
||||
import numpy as np
|
||||
|
||||
@@ -1,233 +0,0 @@
|
||||
from distutils.command.config import config
|
||||
|
||||
from scene_abstract import SingleRobotEmptyScene
|
||||
from scene_stadium import SinglePlayerStadiumScene
|
||||
from env_bases import MujocoXmlBaseBulletEnv
|
||||
import gym, gym.spaces, gym.utils, gym.utils.seeding
|
||||
import numpy as np
|
||||
import os, sys
|
||||
|
||||
class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv):
|
||||
def __init__(self, fn, robot_name, action_dim, obs_dim, power):
|
||||
MujocoXmlBaseBulletEnv.__init__(self, fn, robot_name, action_dim, obs_dim)
|
||||
self.power = power
|
||||
self.camera_x = 0
|
||||
self.walk_target_x = 1e3 # kilometer away
|
||||
self.walk_target_y = 0
|
||||
|
||||
def create_single_player_scene(self):
|
||||
self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
|
||||
return self.stadium_scene
|
||||
|
||||
def robot_specific_reset(self):
|
||||
for j in self.ordered_joints:
|
||||
j.reset_current_position(self.np_random.uniform( low=-0.1, high=0.1 ), 0)
|
||||
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self.stadium_scene.ground_plane_mjcf)
|
||||
self.feet = [self.parts[f] for f in self.foot_list]
|
||||
self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
|
||||
self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in self.foot_ground_object_names])
|
||||
self.scene.actor_introduce(self)
|
||||
self.initial_z = None
|
||||
|
||||
def move_robot(self, init_x, init_y, init_z):
|
||||
"Used by multiplayer stadium to move sideways, to another running lane."
|
||||
self.cpp_robot.query_position()
|
||||
pose = self.cpp_robot.root_part.pose()
|
||||
pose.move_xyz(init_x, init_y, init_z) # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
|
||||
self.cpp_robot.set_pose(pose)
|
||||
|
||||
def apply_action(self, a):
|
||||
assert( np.isfinite(a).all() )
|
||||
for n,j in enumerate(self.ordered_joints):
|
||||
j.set_motor_torque( self.power*j.power_coef*float(np.clip(a[n], -1, +1)) )
|
||||
|
||||
def calc_state(self):
|
||||
j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
|
||||
# even elements [0::2] position, scaled to -1..+1 between limits
|
||||
# odd elements [1::2] angular speed, scaled to show -1..+1
|
||||
self.joint_speeds = j[1::2]
|
||||
self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
|
||||
|
||||
body_pose = self.robot_body.pose()
|
||||
parts_xyz = np.array( [p.pose().xyz() for p in self.parts.values()] ).flatten()
|
||||
self.body_xyz = (parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z
|
||||
self.body_rpy = body_pose.rpy()
|
||||
z = self.body_xyz[2]
|
||||
r, p, yaw = self.body_rpy
|
||||
if self.initial_z==None:
|
||||
self.initial_z = z
|
||||
self.walk_target_theta = np.arctan2( self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0] )
|
||||
self.walk_target_dist = np.linalg.norm( [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]] )
|
||||
angle_to_target = self.walk_target_theta - yaw
|
||||
|
||||
rot_speed = np.array(
|
||||
[[np.cos(-yaw), -np.sin(-yaw), 0],
|
||||
[np.sin(-yaw), np.cos(-yaw), 0],
|
||||
[ 0, 0, 1]]
|
||||
)
|
||||
vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view
|
||||
|
||||
more = np.array([
|
||||
z-self.initial_z,
|
||||
np.sin(angle_to_target), np.cos(angle_to_target),
|
||||
0.3*vx, 0.3*vy, 0.3*vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here
|
||||
r, p], dtype=np.float32)
|
||||
return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
|
||||
|
||||
def calc_potential(self):
|
||||
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
|
||||
# all rewards have rew/frame units and close to 1.0
|
||||
return - self.walk_target_dist / self.scene.dt
|
||||
|
||||
electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
|
||||
stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small
|
||||
foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
|
||||
foot_ground_object_names = set(["floor"]) # to distinguish ground and other objects
|
||||
joints_at_limit_cost = -0.1 # discourage stuck joints
|
||||
|
||||
def _step(self, a):
|
||||
if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
|
||||
self.apply_action(a)
|
||||
self.scene.global_step()
|
||||
|
||||
state = self.calc_state() # also calculates self.joints_at_limit
|
||||
|
||||
alive = float(self.alive_bonus(state[0]+self.initial_z, self.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch
|
||||
done = alive < 0
|
||||
if not np.isfinite(state).all():
|
||||
print("~INF~", state)
|
||||
done = True
|
||||
|
||||
potential_old = self.potential
|
||||
self.potential = self.calc_potential()
|
||||
progress = float(self.potential - potential_old)
|
||||
|
||||
feet_collision_cost = 0.0
|
||||
for i,f in enumerate(self.feet):
|
||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||
#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
|
||||
self.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
|
||||
if contact_ids - self.ground_ids:
|
||||
feet_collision_cost += self.foot_collision_cost
|
||||
|
||||
electricity_cost = self.electricity_cost * float(np.abs(a*self.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||
|
||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit)
|
||||
|
||||
self.rewards = [
|
||||
alive,
|
||||
progress,
|
||||
electricity_cost,
|
||||
joints_at_limit_cost,
|
||||
feet_collision_cost
|
||||
]
|
||||
|
||||
self.HUD(state, a, done)
|
||||
return state, sum(self.rewards), bool(done), {}
|
||||
|
||||
def camera_adjust(self):
|
||||
x, y, z = self.body_xyz
|
||||
self.camera_x = 0.98*self.camera_x + (1-0.98)*x
|
||||
self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
|
||||
|
||||
class HopperBulletEnv(WalkerBaseBulletEnv):
|
||||
foot_list = ["foot"]
|
||||
def __init__(self):
|
||||
WalkerBaseBulletEnv.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75)
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
|
||||
|
||||
class Walker2DBulletEnv(WalkerBaseBulletEnv):
|
||||
foot_list = ["foot", "foot_left"]
|
||||
def __init__(self):
|
||||
WalkerBaseBulletEnv.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40)
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
|
||||
def robot_specific_reset(self):
|
||||
WalkerBaseBulletEnv.robot_specific_reset(self)
|
||||
for n in ["foot_joint", "foot_left_joint"]:
|
||||
self.jdict[n].power_coef = 30.0
|
||||
|
||||
class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
|
||||
foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground
|
||||
def __init__(self):
|
||||
WalkerBaseBulletEnv.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90)
|
||||
def alive_bonus(self, z, pitch):
|
||||
# Use contact other than feet to terminate episode: due to a lot of strange walks using knees
|
||||
return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
|
||||
def robot_specific_reset(self):
|
||||
WalkerBaseBulletEnv.robot_specific_reset(self)
|
||||
self.jdict["bthigh"].power_coef = 120.0
|
||||
self.jdict["bshin"].power_coef = 90.0
|
||||
self.jdict["bfoot"].power_coef = 60.0
|
||||
self.jdict["fthigh"].power_coef = 140.0
|
||||
self.jdict["fshin"].power_coef = 60.0
|
||||
self.jdict["ffoot"].power_coef = 30.0
|
||||
|
||||
class AntBulletEnv(WalkerBaseBulletEnv):
|
||||
foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
|
||||
def __init__(self):
|
||||
WalkerBaseBulletEnv.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5)
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground
|
||||
|
||||
|
||||
## 3d Humanoid ##
|
||||
|
||||
class HumanoidBulletEnv(WalkerBaseBulletEnv):
|
||||
self_collision = True
|
||||
foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand"
|
||||
|
||||
def __init__(self):
|
||||
WalkerBaseBulletEnv.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41)
|
||||
# 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
|
||||
self.electricity_cost = 4.25*WalkerBaseBulletEnv.electricity_cost
|
||||
self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost
|
||||
|
||||
def robot_specific_reset(self):
|
||||
WalkerBaseBulletEnv.robot_specific_reset(self)
|
||||
self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
|
||||
self.motor_power = [100, 100, 100]
|
||||
self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motors = [self.jdict[n] for n in self.motor_names]
|
||||
# if self.random_yaw: # TODO: Make leaning work as soon as the rest works
|
||||
# cpose = cpp_household.Pose()
|
||||
# yaw = self.np_random.uniform(low=-3.14, high=3.14)
|
||||
# if self.random_lean and self.np_random.randint(2)==0:
|
||||
# cpose.set_xyz(0, 0, 1.4)
|
||||
# if self.np_random.randint(2)==0:
|
||||
# pitch = np.pi/2
|
||||
# cpose.set_xyz(0, 0, 0.45)
|
||||
# else:
|
||||
# pitch = np.pi*3/2
|
||||
# cpose.set_xyz(0, 0, 0.25)
|
||||
# roll = 0
|
||||
# cpose.set_rpy(roll, pitch, yaw)
|
||||
# else:
|
||||
# cpose.set_xyz(0, 0, 1.4)
|
||||
# cpose.set_rpy(0, 0, yaw) # just face random direction, but stay straight otherwise
|
||||
# self.cpp_robot.set_pose_and_speed(cpose, 0,0,0)
|
||||
self.initial_z = 0.8
|
||||
|
||||
random_yaw = False
|
||||
random_lean = False
|
||||
|
||||
def apply_action(self, a):
|
||||
assert( np.isfinite(a).all() )
|
||||
force_gain = 1
|
||||
for i, m, power in zip(range(17), self.motors, self.motor_power):
|
||||
m.set_motor_torque( float(force_gain * power*self.power*a[i]) )
|
||||
#m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
|
||||
|
||||
208
examples/pybullet/gym/envs/robot_bases.py
Normal file
208
examples/pybullet/gym/envs/robot_bases.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import pybullet as p
|
||||
import gym, gym.spaces, gym.utils
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
|
||||
class MujocoXmlBasedRobot:
|
||||
"""
|
||||
Base class for mujoco .xml based agents.
|
||||
"""
|
||||
|
||||
self_collision = False
|
||||
|
||||
def __init__(self, model_xml, robot_name, action_dim, obs_dim):
|
||||
self.parts = None
|
||||
self.jdict = None
|
||||
self.ordered_joints = None
|
||||
self.robot_body = None
|
||||
|
||||
high = np.ones([action_dim])
|
||||
self.action_space = gym.spaces.Box(-high, high)
|
||||
high = np.inf * np.ones([obs_dim])
|
||||
self.observation_space = gym.spaces.Box(-high, high)
|
||||
|
||||
self.model_xml = model_xml
|
||||
self.robot_name = robot_name
|
||||
|
||||
def addToScene(self, bodies):
|
||||
if self.parts is not None:
|
||||
parts = self.parts
|
||||
else:
|
||||
parts = {}
|
||||
|
||||
if self.jdict is not None:
|
||||
joints = self.jdict
|
||||
else:
|
||||
joints = {}
|
||||
|
||||
if self.ordered_joints is not None:
|
||||
ordered_joints = self.ordered_joints
|
||||
else:
|
||||
ordered_joints = []
|
||||
|
||||
dump = 0
|
||||
for i in range(len(bodies)):
|
||||
if p.getNumJoints(bodies[i]) == 0:
|
||||
part_name, robot_name = p.getBodyInfo(bodies[i], 0)
|
||||
robot_name = robot_name.decode("utf8")
|
||||
part_name = part_name.decode("utf8")
|
||||
parts[part_name] = BodyPart(part_name, bodies, i, -1)
|
||||
for j in range(p.getNumJoints(bodies[i])):
|
||||
_,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j)
|
||||
|
||||
joint_name = joint_name.decode("utf8")
|
||||
part_name = part_name.decode("utf8")
|
||||
|
||||
if dump: print("ROBOT PART '%s'" % part_name)
|
||||
if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) )
|
||||
|
||||
parts[part_name] = BodyPart(part_name, bodies, i, j)
|
||||
|
||||
if part_name == self.robot_name:
|
||||
self.robot_body = parts[part_name]
|
||||
|
||||
if i == 0 and j == 0 and self.robot_body is None: # if nothing else works, we take this as robot_body
|
||||
parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1)
|
||||
self.robot_body = parts[self.robot_name]
|
||||
|
||||
if joint_name[:6] == "ignore":
|
||||
Joint(joint_name, bodies, i, j).disable_motor()
|
||||
continue
|
||||
|
||||
if joint_name[:8] != "jointfix":
|
||||
joints[joint_name] = Joint(joint_name, bodies, i, j)
|
||||
ordered_joints.append(joints[joint_name])
|
||||
|
||||
joints[joint_name].power_coef = 100.0
|
||||
|
||||
return parts, joints, ordered_joints, self.robot_body
|
||||
|
||||
def reset(self):
|
||||
self.ordered_joints = []
|
||||
|
||||
if self.self_collision:
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
|
||||
p.loadMJCF(os.path.join("mjcf", self.model_xml), flags=p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS))
|
||||
else:
|
||||
self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
|
||||
p.loadMJCF(os.path.join("mjcf", self.model_xml)))
|
||||
|
||||
self.robot_specific_reset()
|
||||
|
||||
s = self.calc_state() # optimization: calc_state() can calculate something in self.* for calc_potential() to use
|
||||
|
||||
return s
|
||||
|
||||
def calc_potential(self):
|
||||
return 0
|
||||
|
||||
class Pose_Helper: # dummy class to comply to original interface
|
||||
def __init__(self, body_part):
|
||||
self.body_part = body_part
|
||||
|
||||
def xyz(self):
|
||||
return self.body_part.current_position()
|
||||
|
||||
def rpy(self):
|
||||
return p.getEulerFromQuaternion(self.body_part.current_orientation())
|
||||
|
||||
def orientation(self):
|
||||
return self.body_part.current_orientation()
|
||||
|
||||
class BodyPart:
|
||||
def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex):
|
||||
self.bodies = bodies
|
||||
self.bodyIndex = bodyIndex
|
||||
self.bodyPartIndex = bodyPartIndex
|
||||
self.initialPosition = self.current_position()
|
||||
self.initialOrientation = self.current_orientation()
|
||||
self.bp_pose = Pose_Helper(self)
|
||||
|
||||
def state_fields_of_pose_of(self, body_id, link_id=-1): # a method you will most probably need a lot to get pose and orientation
|
||||
if link_id == -1:
|
||||
(x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id)
|
||||
else:
|
||||
(x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id)
|
||||
return np.array([x, y, z, a, b, c, d])
|
||||
|
||||
def get_pose(self):
|
||||
return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex)
|
||||
|
||||
def speed(self):
|
||||
if self.bodyPartIndex == -1:
|
||||
(vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex])
|
||||
else:
|
||||
(x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
|
||||
return np.array([vx, vy, vz])
|
||||
|
||||
def current_position(self):
|
||||
return self.get_pose()[:3]
|
||||
|
||||
def current_orientation(self):
|
||||
return self.get_pose()[3:]
|
||||
|
||||
def reset_position(self, position):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation())
|
||||
|
||||
def reset_orientation(self, orientation):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation)
|
||||
|
||||
def reset_pose(self, position, orientation):
|
||||
p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation)
|
||||
|
||||
def pose(self):
|
||||
return self.bp_pose
|
||||
|
||||
def contact_list(self):
|
||||
return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1)
|
||||
|
||||
|
||||
class Joint:
|
||||
def __init__(self, joint_name, bodies, bodyIndex, jointIndex):
|
||||
self.bodies = bodies
|
||||
self.bodyIndex = bodyIndex
|
||||
self.jointIndex = jointIndex
|
||||
self.joint_name = joint_name
|
||||
_,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex)
|
||||
self.power_coeff = 0
|
||||
|
||||
def set_state(self, x, vx):
|
||||
p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx)
|
||||
|
||||
def current_position(self): # just some synonyme method
|
||||
return self.get_state()
|
||||
|
||||
def current_relative_position(self):
|
||||
pos, vel = self.get_state()
|
||||
pos_mid = 0.5 * (self.lowerLimit + self.upperLimit);
|
||||
return (
|
||||
2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit),
|
||||
0.1 * vel
|
||||
)
|
||||
|
||||
def get_state(self):
|
||||
x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex)
|
||||
return x, vx
|
||||
|
||||
def set_position(self, position):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position)
|
||||
|
||||
def set_velocity(self, velocity):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity)
|
||||
|
||||
def set_motor_torque(self, torque): # just some synonyme method
|
||||
self.set_torque(torque)
|
||||
|
||||
def set_torque(self, torque):
|
||||
p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1)
|
||||
|
||||
def reset_current_position(self, position, velocity): # just some synonyme method
|
||||
self.reset_position(position, velocity)
|
||||
|
||||
def reset_position(self, position, velocity):
|
||||
p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity)
|
||||
self.disable_motor()
|
||||
|
||||
def disable_motor(self):
|
||||
p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0)
|
||||
175
examples/pybullet/gym/envs/robot_locomotors.py
Normal file
175
examples/pybullet/gym/envs/robot_locomotors.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from robot_bases import MujocoXmlBasedRobot
|
||||
import numpy as np
|
||||
|
||||
|
||||
class WalkerBase(MujocoXmlBasedRobot):
|
||||
def __init__(self, fn, robot_name, action_dim, obs_dim, power):
|
||||
MujocoXmlBasedRobot.__init__(self, fn, robot_name, action_dim, obs_dim)
|
||||
self.power = power
|
||||
self.camera_x = 0
|
||||
self.walk_target_x = 1e3 # kilometer away
|
||||
self.walk_target_y = 0
|
||||
|
||||
def robot_specific_reset(self):
|
||||
for j in self.ordered_joints:
|
||||
j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0)
|
||||
|
||||
self.feet = [self.parts[f] for f in self.foot_list]
|
||||
self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
|
||||
self.scene.actor_introduce(self)
|
||||
self.initial_z = None
|
||||
|
||||
def apply_action(self, a):
|
||||
assert (np.isfinite(a).all())
|
||||
for n, j in enumerate(self.ordered_joints):
|
||||
j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1)))
|
||||
|
||||
def calc_state(self):
|
||||
j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
|
||||
# even elements [0::2] position, scaled to -1..+1 between limits
|
||||
# odd elements [1::2] angular speed, scaled to show -1..+1
|
||||
self.joint_speeds = j[1::2]
|
||||
self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
|
||||
|
||||
body_pose = self.robot_body.pose()
|
||||
parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten()
|
||||
self.body_xyz = (
|
||||
parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z
|
||||
self.body_rpy = body_pose.rpy()
|
||||
z = self.body_xyz[2]
|
||||
if self.initial_z == None:
|
||||
self.initial_z = z
|
||||
r, p, yaw = self.body_rpy
|
||||
self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1],
|
||||
self.walk_target_x - self.body_xyz[0])
|
||||
self.walk_target_dist = np.linalg.norm(
|
||||
[self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]])
|
||||
angle_to_target = self.walk_target_theta - yaw
|
||||
|
||||
rot_speed = np.array(
|
||||
[[np.cos(-yaw), -np.sin(-yaw), 0],
|
||||
[np.sin(-yaw), np.cos(-yaw), 0],
|
||||
[ 0, 0, 1]]
|
||||
)
|
||||
vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view
|
||||
|
||||
more = np.array([ z-self.initial_z,
|
||||
np.sin(angle_to_target), np.cos(angle_to_target),
|
||||
0.3* vx , 0.3* vy , 0.3* vz , # 0.3 is just scaling typical speed into -1..+1, no physical sense here
|
||||
r, p], dtype=np.float32)
|
||||
return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
|
||||
|
||||
def calc_potential(self):
|
||||
# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
|
||||
# all rewards have rew/frame units and close to 1.0
|
||||
return - self.walk_target_dist / self.scene.dt
|
||||
|
||||
|
||||
class Hopper(WalkerBase):
|
||||
foot_list = ["foot"]
|
||||
|
||||
def __init__(self):
|
||||
WalkerBase.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75)
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
|
||||
|
||||
|
||||
class Walker2D(WalkerBase):
|
||||
foot_list = ["foot", "foot_left"]
|
||||
|
||||
def __init__(self):
|
||||
WalkerBase.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40)
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
|
||||
|
||||
def robot_specific_reset(self):
|
||||
WalkerBase.robot_specific_reset(self)
|
||||
for n in ["foot_joint", "foot_left_joint"]:
|
||||
self.jdict[n].power_coef = 30.0
|
||||
|
||||
|
||||
class HalfCheetah(WalkerBase):
|
||||
foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground
|
||||
|
||||
def __init__(self):
|
||||
WalkerBase.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90)
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
# Use contact other than feet to terminate episode: due to a lot of strange walks using knees
|
||||
return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
|
||||
|
||||
def robot_specific_reset(self):
|
||||
WalkerBase.robot_specific_reset(self)
|
||||
self.jdict["bthigh"].power_coef = 120.0
|
||||
self.jdict["bshin"].power_coef = 90.0
|
||||
self.jdict["bfoot"].power_coef = 60.0
|
||||
self.jdict["fthigh"].power_coef = 140.0
|
||||
self.jdict["fshin"].power_coef = 60.0
|
||||
self.jdict["ffoot"].power_coef = 30.0
|
||||
|
||||
|
||||
class Ant(WalkerBase):
|
||||
foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
|
||||
|
||||
def __init__(self):
|
||||
WalkerBase.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5)
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground
|
||||
|
||||
|
||||
class Humanoid(WalkerBase):
|
||||
self_collision = True
|
||||
foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand"
|
||||
|
||||
def __init__(self):
|
||||
WalkerBase.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41)
|
||||
# 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
|
||||
|
||||
def robot_specific_reset(self):
|
||||
WalkerBase.robot_specific_reset(self)
|
||||
self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
|
||||
self.motor_power = [100, 100, 100]
|
||||
self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
|
||||
self.motor_power += [100, 100, 300, 200]
|
||||
self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
|
||||
self.motor_power += [75, 75, 75]
|
||||
self.motors = [self.jdict[n] for n in self.motor_names]
|
||||
# if self.random_yaw: # TODO: Make leaning work as soon as the rest works
|
||||
# cpose = cpp_household.Pose()
|
||||
# yaw = self.np_random.uniform(low=-3.14, high=3.14)
|
||||
# if self.random_lean and self.np_random.randint(2)==0:
|
||||
# cpose.set_xyz(0, 0, 1.4)
|
||||
# if self.np_random.randint(2)==0:
|
||||
# pitch = np.pi/2
|
||||
# cpose.set_xyz(0, 0, 0.45)
|
||||
# else:
|
||||
# pitch = np.pi*3/2
|
||||
# cpose.set_xyz(0, 0, 0.25)
|
||||
# roll = 0
|
||||
# cpose.set_rpy(roll, pitch, yaw)
|
||||
# else:
|
||||
# cpose.set_xyz(0, 0, 1.4)
|
||||
# cpose.set_rpy(0, 0, yaw) # just face random direction, but stay straight otherwise
|
||||
# self.cpp_robot.set_pose_and_speed(cpose, 0,0,0)
|
||||
self.initial_z = 0.8
|
||||
|
||||
random_yaw = False
|
||||
random_lean = False
|
||||
|
||||
def apply_action(self, a):
|
||||
assert( np.isfinite(a).all() )
|
||||
force_gain = 1
|
||||
for i, m, power in zip(range(17), self.motors, self.motor_power):
|
||||
m.set_motor_torque( float(force_gain * power*self.power*a[i]) )
|
||||
#m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
|
||||
|
||||
def alive_bonus(self, z, pitch):
|
||||
return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
|
||||
|
||||
0
examples/pybullet/gym/envs/robot_manipulators.py
Normal file
0
examples/pybullet/gym/envs/robot_manipulators.py
Normal file
Reference in New Issue
Block a user