add yapf style and apply yapf to format all Python files

This recreates pull request #2192
This commit is contained in:
Erwin Coumans
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions

View File

@@ -5,229 +5,250 @@ from robot_manipulators import Reacher, Pusher, Striker, Thrower
class ReacherBulletEnv(MJCFBaseBulletEnv):
def __init__(self, render=False):
self.robot = Reacher()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1)
def __init__(self, render=False):
self.robot = Reacher()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def step(self, a):
assert (not self.scene.multiplayer)
self.robot.apply_action(a)
self.scene.global_step()
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1)
state = self.robot.calc_state() # sets self.to_target_vec
def step(self, a):
assert (not self.scene.multiplayer)
self.robot.apply_action(a)
self.scene.global_step()
potential_old = self.potential
self.potential = self.robot.calc_potential()
state = self.robot.calc_state() # sets self.to_target_vec
electricity_cost = (
-0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)) # work torque*angular_velocity
- 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy
)
stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0
self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
potential_old = self.potential
self.potential = self.robot.calc_potential()
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
electricity_cost = (
-0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)
) # work torque*angular_velocity
- 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy
)
stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0
self.rewards = [
float(self.potential - potential_old),
float(electricity_cost),
float(stuck_joint_cost)
]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
class PusherBulletEnv(MJCFBaseBulletEnv):
def __init__(self, render=False):
self.robot = Pusher()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
def __init__(self, render=False):
self.robot = Pusher()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
state = self.robot.calc_state() # sets self.to_target_vec
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
potential_old = self.potential
self.potential = self.robot.calc_potential()
state = self.robot.calc_state() # sets self.to_target_vec
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
potential_old = self.potential
self.potential = self.robot.calc_potential()
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
def calc_potential(self):
return -100 * np.linalg.norm(self.to_target_vec)
self.rewards = [
float(self.potential - potential_old),
float(electricity_cost),
float(stuck_joint_cost)
]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
def calc_potential(self):
return -100 * np.linalg.norm(self.to_target_vec)
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
class StrikerBulletEnv(MJCFBaseBulletEnv):
def __init__(self, render=False):
self.robot = Striker()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
self._striked = False
self._min_strike_dist = np.inf
self.strike_threshold = 0.1
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
def __init__(self, render=False):
self.robot = Striker()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
self._striked = False
self._min_strike_dist = np.inf
self.strike_threshold = 0.1
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
state = self.robot.calc_state() # sets self.to_target_vec
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
potential_old = self.potential
self.potential = self.robot.calc_potential()
state = self.robot.calc_state() # sets self.to_target_vec
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
potential_old = self.potential
self.potential = self.robot.calc_potential()
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz() # TODO: Should the object and target really belong to the robot? Maybe split this off
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec))
dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz(
) # TODO: Should the object and target really belong to the robot? Maybe split this off
if np.linalg.norm(dist_object_finger) < self.strike_threshold:
self._striked = True
self._strike_pos = self.robot.fingertip.pose().xyz()
self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec))
if self._striked:
reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos
else:
reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
if np.linalg.norm(dist_object_finger) < self.strike_threshold:
self._striked = True
self._strike_pos = self.robot.fingertip.pose().xyz()
reward_near = - np.linalg.norm(reward_near_vec)
if self._striked:
reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos
else:
reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
reward_dist = - np.linalg.norm(self._min_strike_dist)
reward_ctrl = - np.square(a).sum()
self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
reward_near = -np.linalg.norm(reward_near_vec)
def calc_potential(self):
return -100 * np.linalg.norm(self.to_target_vec)
reward_dist = -np.linalg.norm(self._min_strike_dist)
reward_ctrl = -np.square(a).sum()
self.rewards = [
float(self.potential - potential_old),
float(electricity_cost),
float(stuck_joint_cost), 3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near
]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
def calc_potential(self):
return -100 * np.linalg.norm(self.to_target_vec)
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
class ThrowerBulletEnv(MJCFBaseBulletEnv):
def __init__(self, render=False):
self.robot = Thrower()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5)
def __init__(self, render=False):
self.robot = Thrower()
MJCFBaseBulletEnv.__init__(self, self.robot, render)
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
state = self.robot.calc_state() # sets self.to_target_vec
def create_single_player_scene(self, bullet_client):
return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5)
potential_old = self.potential
self.potential = self.robot.calc_potential()
def step(self, a):
self.robot.apply_action(a)
self.scene.global_step()
state = self.robot.calc_state() # sets self.to_target_vec
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
potential_old = self.potential
self.potential = self.robot.calc_potential()
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
joint_vel = np.array([
self.robot.shoulder_pan_joint.get_velocity(),
self.robot.shoulder_lift_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.elbow_flex_joint.get_velocity(),
self.robot.upper_arm_roll_joint.get_velocity(),
self.robot.wrist_flex_joint.get_velocity(),
self.robot.wrist_roll_joint.get_velocity()
])
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
action_product = np.matmul(np.abs(a), np.abs(joint_vel))
action_sum = np.sum(a)
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
electricity_cost = (
-0.10 * action_product # work torque*angular_velocity
- 0.01 * action_sum # stall torque require some energy
)
object_xy = self.robot.object.pose().xyz()[:2]
target_xy = self.robot.target.pose().xyz()[:2]
stuck_joint_cost = 0
for j in self.robot.ordered_joints:
if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
stuck_joint_cost += -0.1
if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off
self.robot._object_hit_ground = True
self.robot._object_hit_location = self.robot.object.pose().xyz()
object_xy = self.robot.object.pose().xyz()[:2]
target_xy = self.robot.target.pose().xyz()[:2]
if self.robot._object_hit_ground:
object_hit_xy = self.robot._object_hit_location[:2]
reward_dist = -np.linalg.norm(object_hit_xy - target_xy)
else:
reward_dist = -np.linalg.norm(object_xy - target_xy)
reward_ctrl = - np.square(a).sum()
if not self.robot._object_hit_ground and self.robot.object.pose().xyz(
)[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off
self.robot._object_hit_ground = True
self.robot._object_hit_location = self.robot.object.pose().xyz()
self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
reward_dist, 0.002 * reward_ctrl]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
if self.robot._object_hit_ground:
object_hit_xy = self.robot._object_hit_location[:2]
reward_dist = -np.linalg.norm(object_hit_xy - target_xy)
else:
reward_dist = -np.linalg.norm(object_xy - target_xy)
reward_ctrl = -np.square(a).sum()
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
self.rewards = [
float(self.potential - potential_old),
float(electricity_cost),
float(stuck_joint_cost), reward_dist, 0.002 * reward_ctrl
]
self.HUD(state, a, False)
return state, sum(self.rewards), False, {}
def camera_adjust(self):
x, y, z = self.robot.fingertip.pose().xyz()
x *= 0.5
y *= 0.5
self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)