diff --git a/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py b/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py index 722c01724..375a078c1 100644 --- a/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py +++ b/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py @@ -8,7 +8,9 @@ def relu(x): return np.maximum(x, 0) class SmallReactivePolicy: - "Simple multi-layer perceptron policy, no internal state" + """ + Simple multi-layer perceptron policy, no internal state + """ def __init__(self, observation_space, action_space): assert weights_dense1_w.shape == (observation_space.shape[0], 128) assert weights_dense2_w.shape == (128, 64) diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py index 218e15daa..aed0c9a26 100644 --- a/examples/pybullet/gym/envs/__init__.py +++ b/examples/pybullet/gym/envs/__init__.py @@ -53,32 +53,25 @@ register( register( id='InvertedPendulumBulletEnv-v0', - entry_point='envs.gym_pendula_envs:InvertedPendulumBulletEnv', + entry_point='envs.gym_pendulum_envs:InvertedPendulumBulletEnv', max_episode_steps=1000, reward_threshold=950.0, ) register( id='InvertedDoublePendulumBulletEnv-v0', - entry_point='envs.gym_pendula_envs:InvertedDoublePendulumBulletEnv', + entry_point='envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv', max_episode_steps=1000, reward_threshold=9100.0, ) register( id='InvertedPendulumSwingupBulletEnv-v0', - entry_point='envs.gym_pendula_envs:InvertedPendulumSwingupBulletEnv', + entry_point='envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv', max_episode_steps=1000, reward_threshold=800.0, ) -register( - id='HopperBulletEnv-v0', - entry_point='envs.gym_walker_envs:HopperBulletEnv', - max_episode_steps=1000, - reward_threshold=2500.0 - ) - register( id='ReacherBulletEnv-v0', entry_point='envs.gym_manipulator_envs:ReacherBulletEnv', @@ -109,26 +102,33 @@ register( register( id='Walker2DBulletEnv-v0', - entry_point='envs.gym_walker_envs:Walker2DBulletEnv', + entry_point='envs.gym_locomotion_envs:Walker2DBulletEnv', max_episode_steps=1000, reward_threshold=2500.0 ) register( id='HalfCheetahBulletEnv-v0', - entry_point='envs.gym_walker_envs:HalfCheetahBulletEnv', + entry_point='envs.gym_locomotion_envs:HalfCheetahBulletEnv', max_episode_steps=1000, reward_threshold=3000.0 ) register( id='AntBulletEnv-v0', - entry_point='envs.gym_walker_envs:AntBulletEnv', + entry_point='envs.gym_locomotion_envs:AntBulletEnv', max_episode_steps=1000, reward_threshold=2500.0 ) register( id='HumanoidBulletEnv-v0', - entry_point='envs.gym_walker_envs:HumanoidBulletEnv', + entry_point='envs.gym_locomotion_envs:HumanoidBulletEnv', max_episode_steps=1000 ) + +register( + id='HopperBulletEnv-v0', + entry_point='envs.gym_locomotion_envs:HopperBulletEnv', + max_episode_steps=1000, + reward_threshold=2500.0 + ) \ No newline at end of file diff --git a/examples/pybullet/gym/envs/env_bases.py b/examples/pybullet/gym/envs/env_bases.py index a2a9f0b0d..573f7b284 100644 --- a/examples/pybullet/gym/envs/env_bases.py +++ b/examples/pybullet/gym/envs/env_bases.py @@ -1,12 +1,11 @@ import gym, gym.spaces, gym.utils, gym.utils.seeding import numpy as np import pybullet as p -import os class MujocoXmlBaseBulletEnv(gym.Env): """ - Base class for MuJoCo .xml actors in a Scene. + Base class for MuJoCo .xml environments in a Scene. These environments create single-player scenes and behave like normal Gym environments, if you don't use multiplayer. """ @@ -16,112 +15,42 @@ class MujocoXmlBaseBulletEnv(gym.Env): 'video.frames_per_second': 60 } - self_collision = False - - def __init__(self, model_xml, robot_name, action_dim, obs_dim): + def __init__(self, robot): self.scene = None - self.parts = None - self.jdict = None - self.ordered_joints = None - self.robot_body = None - - high = np.ones([action_dim]) - self.action_space = gym.spaces.Box(-high, high) - high = np.inf*np.ones([obs_dim]) - self.observation_space = gym.spaces.Box(-high, high) - self._seed() - - self.model_xml = model_xml - self.robot_name = robot_name - self.camera = Camera() + self.robot = robot + + self._seed() + + self.action_space = robot.action_space + self.observation_space = robot.observation_space + def _seed(self, seed=None): self.np_random, seed = gym.utils.seeding.np_random(seed) + self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env return [seed] - def addToScene(self, bodies): - if self.parts is not None: - parts = self.parts - else: - parts = {} - - if self.jdict is not None: - joints = self.jdict - else: - joints = {} - - if self.ordered_joints is not None: - ordered_joints = self.ordered_joints - else: - ordered_joints = [] - - dump = 0 - for i in range(len(bodies)): - if p.getNumJoints(bodies[i]) == 0: - part_name, robot_name = p.getBodyInfo(bodies[i], 0) - robot_name = robot_name.decode("utf8") - part_name = part_name.decode("utf8") - parts[part_name] = BodyPart(part_name, bodies, i, -1) - for j in range(p.getNumJoints(bodies[i])): - _,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j) - - joint_name = joint_name.decode("utf8") - part_name = part_name.decode("utf8") - - if dump: print("ROBOT PART '%s'" % part_name) - if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) ) - - parts[part_name] = BodyPart(part_name, bodies, i, j) - - if part_name == self.robot_name: - self.robot_body = parts[part_name] - - if i == 0 and j == 0 and self.robot_body is None: # if nothing else works, we take this as robot_body - parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1) - self.robot_body = parts[self.robot_name] - - if joint_name[:6] == "ignore": - Joint(joint_name, bodies, i, j).disable_motor() - continue - - if joint_name[:8] != "jointfix": - joints[joint_name] = Joint(joint_name, bodies, i, j) - ordered_joints.append(joints[joint_name]) - - joints[joint_name].power_coef = 100.0 - - return parts, joints, ordered_joints, self.robot_body - def _reset(self): if self.scene is None: self.scene = self.create_single_player_scene() if not self.scene.multiplayer: self.scene.episode_restart() - self.ordered_joints = [] + self.robot.scene = self.scene + self.frame = 0 self.done = 0 self.reward = 0 dump = 0 - - if self.self_collision: - self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml), flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS)) - else: - self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml))) - - self.robot_specific_reset() - s = self.calc_state() # optimization: calc_state() can calculate something in self.* for calc_potential() to use - self.potential = self.calc_potential() + s = self.robot.reset() + self.potential = self.robot.calc_potential() return s def _render(self, mode, close): pass - def calc_potential(self): - return 0 - def HUD(self, state, a, done): pass @@ -134,113 +63,3 @@ class Camera: distance = 10 yaw = 10 p.resetDebugVisualizerCamera(distance, yaw, -20, lookat) - -class Pose_Helper: # dummy class to comply to original interface - def __init__(self, body_part): - self.body_part = body_part - - def xyz(self): - return self.body_part.current_position() - - def rpy(self): - return p.getEulerFromQuaternion(self.body_part.current_orientation()) - - def orientation(self): - return self.body_part.current_orientation() - -class BodyPart: - def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex): - self.bodies = bodies - self.bodyIndex = bodyIndex - self.bodyPartIndex = bodyPartIndex - self.initialPosition = self.current_position() - self.initialOrientation = self.current_orientation() - self.bp_pose = Pose_Helper(self) - - def state_fields_of_pose_of(self, body_id, link_id=-1): # a method you will most probably need a lot to get pose and orientation - if link_id == -1: - (x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id) - else: - (x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id) - return np.array([x, y, z, a, b, c, d]) - - def get_pose(self): - return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex) - - def speed(self): - if self.bodyPartIndex == -1: - (vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex]) - else: - (x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1) - return np.array([vx, vy, vz]) - - def current_position(self): - return self.get_pose()[:3] - - def current_orientation(self): - return self.get_pose()[3:] - - def reset_position(self, position): - p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation()) - - def reset_orientation(self, orientation): - p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation) - - def reset_pose(self, position, orientation): - p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation) - - def pose(self): - return self.bp_pose - - def contact_list(self): - return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1) - - -class Joint: - def __init__(self, joint_name, bodies, bodyIndex, jointIndex): - self.bodies = bodies - self.bodyIndex = bodyIndex - self.jointIndex = jointIndex - self.joint_name = joint_name - _,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex) - self.power_coeff = 0 - - def set_state(self, x, vx): - p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx) - - def current_position(self): # just some synonyme method - return self.get_state() - - def current_relative_position(self): - pos, vel = self.get_state() - pos_mid = 0.5 * (self.lowerLimit + self.upperLimit); - return ( - 2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit), - 0.1 * vel - ) - - def get_state(self): - x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex) - return x, vx - - def set_position(self, position): - p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position) - - def set_velocity(self, velocity): - p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity) - - def set_motor_torque(self, torque): # just some synonyme method - self.set_torque(torque) - - def set_torque(self, torque): - p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1) - - def reset_current_position(self, position, velocity): # just some synonyme method - self.reset_position(position, velocity) - - def reset_position(self, position, velocity): - p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity) - self.disable_motor() - - def disable_motor(self): - p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0) diff --git a/examples/pybullet/gym/envs/gym_locomotion_envs.py b/examples/pybullet/gym/envs/gym_locomotion_envs.py new file mode 100644 index 000000000..9c7e6f843 --- /dev/null +++ b/examples/pybullet/gym/envs/gym_locomotion_envs.py @@ -0,0 +1,110 @@ +from scene_stadium import SinglePlayerStadiumScene +from env_bases import MujocoXmlBaseBulletEnv +import numpy as np +from robot_locomotors import Hopper, Walker2D, HalfCheetah, Ant, Humanoid + + +class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv): + def __init__(self, robot): + MujocoXmlBaseBulletEnv.__init__(self, robot) + self.camera_x = 0 + self.walk_target_x = 1e3 # kilometer away + self.walk_target_y = 0 + + def create_single_player_scene(self): + self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4) + return self.stadium_scene + + def _reset(self): + r = MujocoXmlBaseBulletEnv._reset(self) + self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene( + self.stadium_scene.ground_plane_mjcf) + self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in + self.foot_ground_object_names]) + return r + + def move_robot(self, init_x, init_y, init_z): + "Used by multiplayer stadium to move sideways, to another running lane." + self.cpp_robot.query_position() + pose = self.cpp_robot.root_part.pose() + pose.move_xyz(init_x, init_y, init_z) # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact + self.cpp_robot.set_pose(pose) + + electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant + stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small + foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself + foot_ground_object_names = set(["floor"]) # to distinguish ground and other objects + joints_at_limit_cost = -0.1 # discourage stuck joints + + def _step(self, a): + if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions + self.robot.apply_action(a) + self.scene.global_step() + + state = self.robot.calc_state() # also calculates self.joints_at_limit + + alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch + done = alive < 0 + if not np.isfinite(state).all(): + print("~INF~", state) + done = True + + potential_old = self.potential + self.potential = self.robot.calc_potential() + progress = float(self.potential - potential_old) + + feet_collision_cost = 0.0 + for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code + contact_ids = set((x[2], x[4]) for x in f.contact_list()) + #print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) ) + self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0 + if contact_ids - self.ground_ids: + feet_collision_cost += self.foot_collision_cost + + electricity_cost = self.electricity_cost * float(np.abs(a*self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking + electricity_cost += self.stall_torque_cost * float(np.square(a).mean()) + + joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit) + + self.rewards = [ + alive, + progress, + electricity_cost, + joints_at_limit_cost, + feet_collision_cost + ] + + self.HUD(state, a, done) + return state, sum(self.rewards), bool(done), {} + + def camera_adjust(self): + x, y, z = self.body_xyz + self.camera_x = 0.98*self.camera_x + (1-0.98)*x + self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0) + +class HopperBulletEnv(WalkerBaseBulletEnv): + def __init__(self): + self.robot = Hopper() + WalkerBaseBulletEnv.__init__(self, self.robot) + +class Walker2DBulletEnv(WalkerBaseBulletEnv): + def __init__(self): + self.robot = Walker2D() + WalkerBaseBulletEnv.__init__(self, self.robot) + +class HalfCheetahBulletEnv(WalkerBaseBulletEnv): + def __init__(self): + self.robot = HalfCheetah() + WalkerBaseBulletEnv.__init__(self, self.robot) + +class AntBulletEnv(WalkerBaseBulletEnv): + def __init__(self): + self.robot = Ant() + WalkerBaseBulletEnv.__init__(self, self.robot) + +class HumanoidBulletEnv(WalkerBaseBulletEnv): + def __init__(self): + self.robot = Humanoid() + WalkerBaseBulletEnv.__init__(self, self.robot) + self.electricity_cost = 4.25*WalkerBaseBulletEnv.electricity_cost + self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost diff --git a/examples/pybullet/gym/envs/gym_manipulator_envs.py b/examples/pybullet/gym/envs/gym_manipulator_envs.py index 111968498..07bd4ee4d 100644 --- a/examples/pybullet/gym/envs/gym_manipulator_envs.py +++ b/examples/pybullet/gym/envs/gym_manipulator_envs.py @@ -1,4 +1,4 @@ -from pybulletgym.envs.scene_abstract import SingleRobotEmptyScene +from scene_abstract import SingleRobotEmptyScene from env_bases import MujocoXmlBaseBulletEnv import gym, gym.spaces, gym.utils, gym.utils.seeding import numpy as np diff --git a/examples/pybullet/gym/envs/gym_pendula_envs.py b/examples/pybullet/gym/envs/gym_pendulum_envs.py similarity index 100% rename from examples/pybullet/gym/envs/gym_pendula_envs.py rename to examples/pybullet/gym/envs/gym_pendulum_envs.py diff --git a/examples/pybullet/gym/envs/gym_walker_envs.py b/examples/pybullet/gym/envs/gym_walker_envs.py deleted file mode 100644 index 5e0e0dd86..000000000 --- a/examples/pybullet/gym/envs/gym_walker_envs.py +++ /dev/null @@ -1,233 +0,0 @@ -from distutils.command.config import config - -from scene_abstract import SingleRobotEmptyScene -from scene_stadium import SinglePlayerStadiumScene -from env_bases import MujocoXmlBaseBulletEnv -import gym, gym.spaces, gym.utils, gym.utils.seeding -import numpy as np -import os, sys - -class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv): - def __init__(self, fn, robot_name, action_dim, obs_dim, power): - MujocoXmlBaseBulletEnv.__init__(self, fn, robot_name, action_dim, obs_dim) - self.power = power - self.camera_x = 0 - self.walk_target_x = 1e3 # kilometer away - self.walk_target_y = 0 - - def create_single_player_scene(self): - self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4) - return self.stadium_scene - - def robot_specific_reset(self): - for j in self.ordered_joints: - j.reset_current_position(self.np_random.uniform( low=-0.1, high=0.1 ), 0) - - self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self.stadium_scene.ground_plane_mjcf) - self.feet = [self.parts[f] for f in self.foot_list] - self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32) - self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in self.foot_ground_object_names]) - self.scene.actor_introduce(self) - self.initial_z = None - - def move_robot(self, init_x, init_y, init_z): - "Used by multiplayer stadium to move sideways, to another running lane." - self.cpp_robot.query_position() - pose = self.cpp_robot.root_part.pose() - pose.move_xyz(init_x, init_y, init_z) # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact - self.cpp_robot.set_pose(pose) - - def apply_action(self, a): - assert( np.isfinite(a).all() ) - for n,j in enumerate(self.ordered_joints): - j.set_motor_torque( self.power*j.power_coef*float(np.clip(a[n], -1, +1)) ) - - def calc_state(self): - j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten() - # even elements [0::2] position, scaled to -1..+1 between limits - # odd elements [1::2] angular speed, scaled to show -1..+1 - self.joint_speeds = j[1::2] - self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99) - - body_pose = self.robot_body.pose() - parts_xyz = np.array( [p.pose().xyz() for p in self.parts.values()] ).flatten() - self.body_xyz = (parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z - self.body_rpy = body_pose.rpy() - z = self.body_xyz[2] - r, p, yaw = self.body_rpy - if self.initial_z==None: - self.initial_z = z - self.walk_target_theta = np.arctan2( self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0] ) - self.walk_target_dist = np.linalg.norm( [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]] ) - angle_to_target = self.walk_target_theta - yaw - - rot_speed = np.array( - [[np.cos(-yaw), -np.sin(-yaw), 0], - [np.sin(-yaw), np.cos(-yaw), 0], - [ 0, 0, 1]] - ) - vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view - - more = np.array([ - z-self.initial_z, - np.sin(angle_to_target), np.cos(angle_to_target), - 0.3*vx, 0.3*vy, 0.3*vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here - r, p], dtype=np.float32) - return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5) - - def calc_potential(self): - # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), - # all rewards have rew/frame units and close to 1.0 - return - self.walk_target_dist / self.scene.dt - - electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant - stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small - foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself - foot_ground_object_names = set(["floor"]) # to distinguish ground and other objects - joints_at_limit_cost = -0.1 # discourage stuck joints - - def _step(self, a): - if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions - self.apply_action(a) - self.scene.global_step() - - state = self.calc_state() # also calculates self.joints_at_limit - - alive = float(self.alive_bonus(state[0]+self.initial_z, self.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch - done = alive < 0 - if not np.isfinite(state).all(): - print("~INF~", state) - done = True - - potential_old = self.potential - self.potential = self.calc_potential() - progress = float(self.potential - potential_old) - - feet_collision_cost = 0.0 - for i,f in enumerate(self.feet): - contact_ids = set((x[2], x[4]) for x in f.contact_list()) - #print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) ) - self.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0 - if contact_ids - self.ground_ids: - feet_collision_cost += self.foot_collision_cost - - electricity_cost = self.electricity_cost * float(np.abs(a*self.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking - electricity_cost += self.stall_torque_cost * float(np.square(a).mean()) - - joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit) - - self.rewards = [ - alive, - progress, - electricity_cost, - joints_at_limit_cost, - feet_collision_cost - ] - - self.HUD(state, a, done) - return state, sum(self.rewards), bool(done), {} - - def camera_adjust(self): - x, y, z = self.body_xyz - self.camera_x = 0.98*self.camera_x + (1-0.98)*x - self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0) - -class HopperBulletEnv(WalkerBaseBulletEnv): - foot_list = ["foot"] - def __init__(self): - WalkerBaseBulletEnv.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75) - def alive_bonus(self, z, pitch): - return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 - -class Walker2DBulletEnv(WalkerBaseBulletEnv): - foot_list = ["foot", "foot_left"] - def __init__(self): - WalkerBaseBulletEnv.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40) - def alive_bonus(self, z, pitch): - return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 - def robot_specific_reset(self): - WalkerBaseBulletEnv.robot_specific_reset(self) - for n in ["foot_joint", "foot_left_joint"]: - self.jdict[n].power_coef = 30.0 - -class HalfCheetahBulletEnv(WalkerBaseBulletEnv): - foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground - def __init__(self): - WalkerBaseBulletEnv.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90) - def alive_bonus(self, z, pitch): - # Use contact other than feet to terminate episode: due to a lot of strange walks using knees - return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1 - def robot_specific_reset(self): - WalkerBaseBulletEnv.robot_specific_reset(self) - self.jdict["bthigh"].power_coef = 120.0 - self.jdict["bshin"].power_coef = 90.0 - self.jdict["bfoot"].power_coef = 60.0 - self.jdict["fthigh"].power_coef = 140.0 - self.jdict["fshin"].power_coef = 60.0 - self.jdict["ffoot"].power_coef = 30.0 - -class AntBulletEnv(WalkerBaseBulletEnv): - foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot'] - def __init__(self): - WalkerBaseBulletEnv.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5) - def alive_bonus(self, z, pitch): - return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground - - -## 3d Humanoid ## - -class HumanoidBulletEnv(WalkerBaseBulletEnv): - self_collision = True - foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand" - - def __init__(self): - WalkerBaseBulletEnv.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41) - # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25 - self.electricity_cost = 4.25*WalkerBaseBulletEnv.electricity_cost - self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost - - def robot_specific_reset(self): - WalkerBaseBulletEnv.robot_specific_reset(self) - self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"] - self.motor_power = [100, 100, 100] - self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"] - self.motor_power += [100, 100, 300, 200] - self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"] - self.motor_power += [100, 100, 300, 200] - self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"] - self.motor_power += [75, 75, 75] - self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"] - self.motor_power += [75, 75, 75] - self.motors = [self.jdict[n] for n in self.motor_names] - # if self.random_yaw: # TODO: Make leaning work as soon as the rest works - # cpose = cpp_household.Pose() - # yaw = self.np_random.uniform(low=-3.14, high=3.14) - # if self.random_lean and self.np_random.randint(2)==0: - # cpose.set_xyz(0, 0, 1.4) - # if self.np_random.randint(2)==0: - # pitch = np.pi/2 - # cpose.set_xyz(0, 0, 0.45) - # else: - # pitch = np.pi*3/2 - # cpose.set_xyz(0, 0, 0.25) - # roll = 0 - # cpose.set_rpy(roll, pitch, yaw) - # else: - # cpose.set_xyz(0, 0, 1.4) - # cpose.set_rpy(0, 0, yaw) # just face random direction, but stay straight otherwise - # self.cpp_robot.set_pose_and_speed(cpose, 0,0,0) - self.initial_z = 0.8 - - random_yaw = False - random_lean = False - - def apply_action(self, a): - assert( np.isfinite(a).all() ) - force_gain = 1 - for i, m, power in zip(range(17), self.motors, self.motor_power): - m.set_motor_torque( float(force_gain * power*self.power*a[i]) ) - #m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1))) - - def alive_bonus(self, z, pitch): - return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying - diff --git a/examples/pybullet/gym/envs/robot_bases.py b/examples/pybullet/gym/envs/robot_bases.py new file mode 100644 index 000000000..626de31bf --- /dev/null +++ b/examples/pybullet/gym/envs/robot_bases.py @@ -0,0 +1,208 @@ +import pybullet as p +import gym, gym.spaces, gym.utils +import numpy as np +import os + + +class MujocoXmlBasedRobot: + """ + Base class for mujoco .xml based agents. + """ + + self_collision = False + + def __init__(self, model_xml, robot_name, action_dim, obs_dim): + self.parts = None + self.jdict = None + self.ordered_joints = None + self.robot_body = None + + high = np.ones([action_dim]) + self.action_space = gym.spaces.Box(-high, high) + high = np.inf * np.ones([obs_dim]) + self.observation_space = gym.spaces.Box(-high, high) + + self.model_xml = model_xml + self.robot_name = robot_name + + def addToScene(self, bodies): + if self.parts is not None: + parts = self.parts + else: + parts = {} + + if self.jdict is not None: + joints = self.jdict + else: + joints = {} + + if self.ordered_joints is not None: + ordered_joints = self.ordered_joints + else: + ordered_joints = [] + + dump = 0 + for i in range(len(bodies)): + if p.getNumJoints(bodies[i]) == 0: + part_name, robot_name = p.getBodyInfo(bodies[i], 0) + robot_name = robot_name.decode("utf8") + part_name = part_name.decode("utf8") + parts[part_name] = BodyPart(part_name, bodies, i, -1) + for j in range(p.getNumJoints(bodies[i])): + _,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j) + + joint_name = joint_name.decode("utf8") + part_name = part_name.decode("utf8") + + if dump: print("ROBOT PART '%s'" % part_name) + if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) ) + + parts[part_name] = BodyPart(part_name, bodies, i, j) + + if part_name == self.robot_name: + self.robot_body = parts[part_name] + + if i == 0 and j == 0 and self.robot_body is None: # if nothing else works, we take this as robot_body + parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1) + self.robot_body = parts[self.robot_name] + + if joint_name[:6] == "ignore": + Joint(joint_name, bodies, i, j).disable_motor() + continue + + if joint_name[:8] != "jointfix": + joints[joint_name] = Joint(joint_name, bodies, i, j) + ordered_joints.append(joints[joint_name]) + + joints[joint_name].power_coef = 100.0 + + return parts, joints, ordered_joints, self.robot_body + + def reset(self): + self.ordered_joints = [] + + if self.self_collision: + self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene( + p.loadMJCF(os.path.join("mjcf", self.model_xml), flags=p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS)) + else: + self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene( + p.loadMJCF(os.path.join("mjcf", self.model_xml))) + + self.robot_specific_reset() + + s = self.calc_state() # optimization: calc_state() can calculate something in self.* for calc_potential() to use + + return s + + def calc_potential(self): + return 0 + +class Pose_Helper: # dummy class to comply to original interface + def __init__(self, body_part): + self.body_part = body_part + + def xyz(self): + return self.body_part.current_position() + + def rpy(self): + return p.getEulerFromQuaternion(self.body_part.current_orientation()) + + def orientation(self): + return self.body_part.current_orientation() + +class BodyPart: + def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex): + self.bodies = bodies + self.bodyIndex = bodyIndex + self.bodyPartIndex = bodyPartIndex + self.initialPosition = self.current_position() + self.initialOrientation = self.current_orientation() + self.bp_pose = Pose_Helper(self) + + def state_fields_of_pose_of(self, body_id, link_id=-1): # a method you will most probably need a lot to get pose and orientation + if link_id == -1: + (x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id) + else: + (x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id) + return np.array([x, y, z, a, b, c, d]) + + def get_pose(self): + return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex) + + def speed(self): + if self.bodyPartIndex == -1: + (vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex]) + else: + (x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1) + return np.array([vx, vy, vz]) + + def current_position(self): + return self.get_pose()[:3] + + def current_orientation(self): + return self.get_pose()[3:] + + def reset_position(self, position): + p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation()) + + def reset_orientation(self, orientation): + p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation) + + def reset_pose(self, position, orientation): + p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation) + + def pose(self): + return self.bp_pose + + def contact_list(self): + return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1) + + +class Joint: + def __init__(self, joint_name, bodies, bodyIndex, jointIndex): + self.bodies = bodies + self.bodyIndex = bodyIndex + self.jointIndex = jointIndex + self.joint_name = joint_name + _,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex) + self.power_coeff = 0 + + def set_state(self, x, vx): + p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx) + + def current_position(self): # just some synonyme method + return self.get_state() + + def current_relative_position(self): + pos, vel = self.get_state() + pos_mid = 0.5 * (self.lowerLimit + self.upperLimit); + return ( + 2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit), + 0.1 * vel + ) + + def get_state(self): + x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex) + return x, vx + + def set_position(self, position): + p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position) + + def set_velocity(self, velocity): + p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity) + + def set_motor_torque(self, torque): # just some synonyme method + self.set_torque(torque) + + def set_torque(self, torque): + p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1) + + def reset_current_position(self, position, velocity): # just some synonyme method + self.reset_position(position, velocity) + + def reset_position(self, position, velocity): + p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity) + self.disable_motor() + + def disable_motor(self): + p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0) \ No newline at end of file diff --git a/examples/pybullet/gym/envs/robot_locomotors.py b/examples/pybullet/gym/envs/robot_locomotors.py new file mode 100644 index 000000000..92cda05ac --- /dev/null +++ b/examples/pybullet/gym/envs/robot_locomotors.py @@ -0,0 +1,175 @@ +from robot_bases import MujocoXmlBasedRobot +import numpy as np + + +class WalkerBase(MujocoXmlBasedRobot): + def __init__(self, fn, robot_name, action_dim, obs_dim, power): + MujocoXmlBasedRobot.__init__(self, fn, robot_name, action_dim, obs_dim) + self.power = power + self.camera_x = 0 + self.walk_target_x = 1e3 # kilometer away + self.walk_target_y = 0 + + def robot_specific_reset(self): + for j in self.ordered_joints: + j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0) + + self.feet = [self.parts[f] for f in self.foot_list] + self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32) + self.scene.actor_introduce(self) + self.initial_z = None + + def apply_action(self, a): + assert (np.isfinite(a).all()) + for n, j in enumerate(self.ordered_joints): + j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1))) + + def calc_state(self): + j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten() + # even elements [0::2] position, scaled to -1..+1 between limits + # odd elements [1::2] angular speed, scaled to show -1..+1 + self.joint_speeds = j[1::2] + self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99) + + body_pose = self.robot_body.pose() + parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten() + self.body_xyz = ( + parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z + self.body_rpy = body_pose.rpy() + z = self.body_xyz[2] + if self.initial_z == None: + self.initial_z = z + r, p, yaw = self.body_rpy + self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1], + self.walk_target_x - self.body_xyz[0]) + self.walk_target_dist = np.linalg.norm( + [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]]) + angle_to_target = self.walk_target_theta - yaw + + rot_speed = np.array( + [[np.cos(-yaw), -np.sin(-yaw), 0], + [np.sin(-yaw), np.cos(-yaw), 0], + [ 0, 0, 1]] + ) + vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view + + more = np.array([ z-self.initial_z, + np.sin(angle_to_target), np.cos(angle_to_target), + 0.3* vx , 0.3* vy , 0.3* vz , # 0.3 is just scaling typical speed into -1..+1, no physical sense here + r, p], dtype=np.float32) + return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5) + + def calc_potential(self): + # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), + # all rewards have rew/frame units and close to 1.0 + return - self.walk_target_dist / self.scene.dt + + +class Hopper(WalkerBase): + foot_list = ["foot"] + + def __init__(self): + WalkerBase.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75) + + def alive_bonus(self, z, pitch): + return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 + + +class Walker2D(WalkerBase): + foot_list = ["foot", "foot_left"] + + def __init__(self): + WalkerBase.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40) + + def alive_bonus(self, z, pitch): + return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 + + def robot_specific_reset(self): + WalkerBase.robot_specific_reset(self) + for n in ["foot_joint", "foot_left_joint"]: + self.jdict[n].power_coef = 30.0 + + +class HalfCheetah(WalkerBase): + foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground + + def __init__(self): + WalkerBase.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90) + + def alive_bonus(self, z, pitch): + # Use contact other than feet to terminate episode: due to a lot of strange walks using knees + return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1 + + def robot_specific_reset(self): + WalkerBase.robot_specific_reset(self) + self.jdict["bthigh"].power_coef = 120.0 + self.jdict["bshin"].power_coef = 90.0 + self.jdict["bfoot"].power_coef = 60.0 + self.jdict["fthigh"].power_coef = 140.0 + self.jdict["fshin"].power_coef = 60.0 + self.jdict["ffoot"].power_coef = 30.0 + + +class Ant(WalkerBase): + foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot'] + + def __init__(self): + WalkerBase.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5) + + def alive_bonus(self, z, pitch): + return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground + + +class Humanoid(WalkerBase): + self_collision = True + foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand" + + def __init__(self): + WalkerBase.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41) + # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25 + + def robot_specific_reset(self): + WalkerBase.robot_specific_reset(self) + self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"] + self.motor_power = [100, 100, 100] + self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"] + self.motor_power += [100, 100, 300, 200] + self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"] + self.motor_power += [100, 100, 300, 200] + self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"] + self.motor_power += [75, 75, 75] + self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"] + self.motor_power += [75, 75, 75] + self.motors = [self.jdict[n] for n in self.motor_names] + # if self.random_yaw: # TODO: Make leaning work as soon as the rest works + # cpose = cpp_household.Pose() + # yaw = self.np_random.uniform(low=-3.14, high=3.14) + # if self.random_lean and self.np_random.randint(2)==0: + # cpose.set_xyz(0, 0, 1.4) + # if self.np_random.randint(2)==0: + # pitch = np.pi/2 + # cpose.set_xyz(0, 0, 0.45) + # else: + # pitch = np.pi*3/2 + # cpose.set_xyz(0, 0, 0.25) + # roll = 0 + # cpose.set_rpy(roll, pitch, yaw) + # else: + # cpose.set_xyz(0, 0, 1.4) + # cpose.set_rpy(0, 0, yaw) # just face random direction, but stay straight otherwise + # self.cpp_robot.set_pose_and_speed(cpose, 0,0,0) + self.initial_z = 0.8 + + random_yaw = False + random_lean = False + + def apply_action(self, a): + assert( np.isfinite(a).all() ) + force_gain = 1 + for i, m, power in zip(range(17), self.motors, self.motor_power): + m.set_motor_torque( float(force_gain * power*self.power*a[i]) ) + #m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1))) + + def alive_bonus(self, z, pitch): + return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying + diff --git a/examples/pybullet/gym/envs/robot_manipulators.py b/examples/pybullet/gym/envs/robot_manipulators.py new file mode 100644 index 000000000..e69de29bb