Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster.

Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray)
2018-01-15 12:48:32 -08:00
parent 9ffb05eb3b
commit 851ca5bfb3
22 changed files with 2173 additions and 102 deletions
--- a/examples/pybullet/gym/pybullet_envs/gym_manipulator_envs.py
+++ b/examples/pybullet/gym/pybullet_envs/gym_manipulator_envs.py
@@ -0,0 +1,234 @@
+from .scene_abstract import SingleRobotEmptyScene
+from .env_bases import MJCFBaseBulletEnv
+import numpy as np
+from robot_manipulators import Reacher, Pusher, Striker, Thrower
+
+
+class ReacherBulletEnv(MJCFBaseBulletEnv):
+	def __init__(self):
+		self.robot = Reacher()
+		MJCFBaseBulletEnv.__init__(self, self.robot)
+
+	def create_single_player_scene(self):
+		return SingleRobotEmptyScene(gravity=0.0, timestep=0.0165, frame_skip=1)
+
+	def _step(self, a):
+		assert (not self.scene.multiplayer)
+		self.robot.apply_action(a)
+		self.scene.global_step()
+
+		state = self.robot.calc_state()  # sets self.to_target_vec
+
+		potential_old = self.potential
+		self.potential = self.robot.calc_potential()
+
+		electricity_cost = (
+			-0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot))  # work torque*angular_velocity
+			- 0.01 * (np.abs(a[0]) + np.abs(a[1]))  # stall torque require some energy
+		)
+		stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0
+		self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
+		self.HUD(state, a, False)
+		return state, sum(self.rewards), False, {}
+
+	def camera_adjust(self):
+		x, y, z = self.robot.fingertip.pose().xyz()
+		x *= 0.5
+		y *= 0.5
+		self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
+
+
+class PusherBulletEnv(MJCFBaseBulletEnv):
+	def __init__(self):
+		self.robot = Pusher()
+		BaseBulletEnv.__init__(self, self.robot)
+
+	def create_single_player_scene(self):
+		return SingleRobotEmptyScene(gravity=9.81, timestep=0.0020, frame_skip=5)
+
+	def _step(self, a):
+		self.robot.apply_action(a)
+		self.scene.global_step()
+
+		state = self.robot.calc_state()  # sets self.to_target_vec
+
+		potential_old = self.potential
+		self.potential = self.robot.calc_potential()
+
+		joint_vel = np.array([
+			self.robot.shoulder_pan_joint.get_velocity(),
+			self.robot.shoulder_lift_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.elbow_flex_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.wrist_flex_joint.get_velocity(),
+			self.robot.wrist_roll_joint.get_velocity()
+		])
+
+		action_product = np.matmul(np.abs(a), np.abs(joint_vel))
+		action_sum = np.sum(a)
+
+		electricity_cost = (
+			-0.10 * action_product  # work torque*angular_velocity
+			- 0.01 * action_sum  # stall torque require some energy
+		)
+
+		stuck_joint_cost = 0
+		for j in self.robot.ordered_joints:
+			if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
+				stuck_joint_cost += -0.1
+
+		self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
+		self.HUD(state, a, False)
+		return state, sum(self.rewards), False, {}
+
+	def calc_potential(self):
+		return -100 * np.linalg.norm(self.to_target_vec)
+
+	def camera_adjust(self):
+		x, y, z = self.robot.fingertip.pose().xyz()
+		x *= 0.5
+		y *= 0.5
+		self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
+
+
+class StrikerBulletEnv(MJCFBaseBulletEnv):
+	def __init__(self):
+		self.robot = Striker()
+		BaseBulletEnv.__init__(self, self.robot)
+		self._striked = False
+		self._min_strike_dist = np.inf
+		self.strike_threshold = 0.1
+
+	def create_single_player_scene(self):
+		return SingleRobotEmptyScene(gravity=9.81, timestep=0.0020, frame_skip=5)
+
+	def _step(self, a):
+		self.robot.apply_action(a)
+		self.scene.global_step()
+
+		state = self.robot.calc_state()  # sets self.to_target_vec
+
+		potential_old = self.potential
+		self.potential = self.robot.calc_potential()
+
+		joint_vel = np.array([
+			self.robot.shoulder_pan_joint.get_velocity(),
+			self.robot.shoulder_lift_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.elbow_flex_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.wrist_flex_joint.get_velocity(),
+			self.robot.wrist_roll_joint.get_velocity()
+		])
+
+		action_product = np.matmul(np.abs(a), np.abs(joint_vel))
+		action_sum = np.sum(a)
+
+		electricity_cost = (
+			-0.10 * action_product  # work torque*angular_velocity
+			- 0.01 * action_sum  # stall torque require some energy
+		)
+
+		stuck_joint_cost = 0
+		for j in self.robot.ordered_joints:
+			if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
+				stuck_joint_cost += -0.1
+
+		dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
+		reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz()		# TODO: Should the object and target really belong to the robot? Maybe split this off
+
+		self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec))
+
+		if np.linalg.norm(dist_object_finger) < self.strike_threshold:
+			self._striked = True
+			self._strike_pos = self.robot.fingertip.pose().xyz()
+
+		if self._striked:
+			reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos
+		else:
+			reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
+
+		reward_near = - np.linalg.norm(reward_near_vec)
+
+		reward_dist = - np.linalg.norm(self._min_strike_dist)
+		reward_ctrl = - np.square(a).sum()
+		self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
+						3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near]
+		self.HUD(state, a, False)
+		return state, sum(self.rewards), False, {}
+
+	def calc_potential(self):
+		return -100 * np.linalg.norm(self.to_target_vec)
+
+	def camera_adjust(self):
+		x, y, z = self.robot.fingertip.pose().xyz()
+		x *= 0.5
+		y *= 0.5
+		self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
+
+
+class ThrowerBulletEnv(MJCFBaseBulletEnv):
+	def __init__(self):
+		self.robot = Thrower()
+		BaseBulletEnv.__init__(self, self.robot)
+
+	def create_single_player_scene(self):
+		return SingleRobotEmptyScene(gravity=0.0, timestep=0.0020, frame_skip=5)
+
+	def _step(self, a):
+		self.robot.apply_action(a)
+		self.scene.global_step()
+		state = self.robot.calc_state()  # sets self.to_target_vec
+
+		potential_old = self.potential
+		self.potential = self.robot.calc_potential()
+
+		joint_vel = np.array([
+			self.robot.shoulder_pan_joint.get_velocity(),
+			self.robot.shoulder_lift_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.elbow_flex_joint.get_velocity(),
+			self.robot.upper_arm_roll_joint.get_velocity(),
+			self.robot.wrist_flex_joint.get_velocity(),
+			self.robot.wrist_roll_joint.get_velocity()
+		])
+
+		action_product = np.matmul(np.abs(a), np.abs(joint_vel))
+		action_sum = np.sum(a)
+
+		electricity_cost = (
+			-0.10 * action_product  # work torque*angular_velocity
+			- 0.01 * action_sum  # stall torque require some energy
+		)
+
+		stuck_joint_cost = 0
+		for j in self.robot.ordered_joints:
+			if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
+				stuck_joint_cost += -0.1
+
+		object_xy = self.robot.object.pose().xyz()[:2]
+		target_xy = self.robot.target.pose().xyz()[:2]
+
+		if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25:						# TODO: Should the object and target really belong to the robot? Maybe split this off
+			self.robot._object_hit_ground = True
+			self.robot._object_hit_location = self.robot.object.pose().xyz()
+
+		if self.robot._object_hit_ground:
+			object_hit_xy = self.robot._object_hit_location[:2]
+			reward_dist = -np.linalg.norm(object_hit_xy - target_xy)
+		else:
+			reward_dist = -np.linalg.norm(object_xy - target_xy)
+		reward_ctrl = - np.square(a).sum()
+
+		self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
+						reward_dist, 0.002 * reward_ctrl]
+		self.HUD(state, a, False)
+		return state, sum(self.rewards), False, {}
+
+	def camera_adjust(self):
+		x, y, z = self.robot.fingertip.pose().xyz()
+		x *= 0.5
+		y *= 0.5
+		self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
+