Merge pull request #2244 from erwincoumans/master

add minitaur extended to pybullet_envs
2019-05-08 10:36:51 -07:00
parent a5fe998f13 36ed4419bf
commit 1ae63d097d
2 changed files with 264 additions and 0 deletions
--- a/examples/pybullet/gym/pybullet_envs/init.py
+++ b/examples/pybullet/gym/pybullet_envs/init.py
@@ -39,6 +39,14 @@ register(
    reward_threshold=5.0,
 )

+register(
+    id='MinitaurExtendedEnv-v0',
+    entry_point='pybullet_envs.minitaur.envs:MinitaurExtendedEnv',
+    max_episode_steps=1000,
+    reward_threshold=5.0,
+)
+
+
 register(
    id='MinitaurReactiveEnv-v0',
    entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
--- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
@@ -0,0 +1,256 @@
+"""Extends the environment by adding observation and action history.
+
+The implementation is a bit dirty import of the implementation in
+the experimental branch.
+
+"""
+
+from gym import spaces
+import numpy as np
+
+from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
+
+
+class MinitaurExtendedEnv(MinitaurReactiveEnv):
+  """The 'extended' environment for Markovian property.
+
+  This class implements to include prior actions and observations to the
+  observation vector, thus making the environment "more" Markovian. This is
+  especially useful for systems with latencies.
+
+  Args:
+    history_length: the length of the historic data
+    history_include_actions: a flag for including actions as history
+    history_include_states: a flag for including states as history
+    include_state_difference: a flag for including the first-order differences
+      as history
+    include_second_state_difference: a flag for including the second-order state
+      differences as history.
+    include_base_position: a flag for including the base as observation,
+    never_terminate: if this is on, the environment unconditionally never
+      terminates.
+    action_scale: the scale of actions,
+  """
+  MAX_BUFFER_SIZE = 1001
+  ACTION_DIM = 8
+  PARENT_OBSERVATION_DIM = 12
+  INIT_EXTENSION_POS = 2.0
+  INIT_SWING_POS = 0.0
+
+  metadata = {
+      "render.modes": ["human", "rgb_array"],
+      "video.frames_per_second": 50,
+  }
+
+  def __init__(self,
+               history_length=1,
+               history_include_actions=True,
+               history_include_states=False,
+               include_state_difference=False,
+               include_second_state_difference=False,
+               include_base_position=False,
+               include_leg_model=False,
+               never_terminate=False,
+               action_scale=0.5,
+               **kwargs):
+    self._kwargs = kwargs
+
+    self._history_length = history_length
+    self._history_include_actions = history_include_actions
+    self._history_include_states = history_include_states
+    self._include_state_difference = include_state_difference
+    self._include_second_state_difference = include_second_state_difference
+    self._include_base_position = include_base_position
+    self._include_leg_model = include_leg_model
+
+    self._never_terminate = never_terminate
+    self._action_scale = action_scale
+
+    self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1,
+                                               self.PARENT_OBSERVATION_DIM))
+    self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8))
+    self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM))
+    self._counter = 0
+
+    super(MinitaurExtendedEnv, self).__init__(**kwargs)
+    self.action_space = spaces.Box(-1.0, 1.0, self.action_space.shape)
+    self.observation_space = spaces.Box(-np.inf, np.inf,
+                                        self._get_observation().shape)
+    # This is mainly for the TF-Agents compatibility
+    self.action_space.flat_dim = len(self.action_space.low)
+    self.observation_space.flat_dim = len(self.observation_space.low)
+
+  def _get_observation(self):
+    """Maybe concatenate motor velocity and torque into observations."""
+    parent_observation = super(MinitaurExtendedEnv, self)._get_observation()
+    parent_observation = np.array(parent_observation)
+    # Base class might require this.
+    self._observation = parent_observation
+    self._past_parent_observations[self._counter] = parent_observation
+    num_motors = self.minitaur.num_motors
+    self._past_motor_angles[self._counter] = parent_observation[-num_motors:]
+
+    history_states = []
+    history_actions = []
+    for i in range(self._history_length):
+      t = max(self._counter - i - 1, 0)
+
+      if self._history_include_states:
+        history_states.append(self._past_parent_observations[t])
+
+      if self._history_include_actions:
+        history_actions.append(self._past_actions[t])
+
+    t = self._counter
+    tm, tmm = max(0, self._counter - 1), max(0, self._counter - 2)
+
+    state_difference, second_state_difference = [], []
+    if self._include_state_difference:
+      state_difference = [
+          self._past_motor_angles[t] - self._past_motor_angles[tm]
+      ]
+    if self._include_second_state_difference:
+      second_state_difference = [
+          self._past_motor_angles[t] - 2 * self._past_motor_angles[tm] +
+          self._past_motor_angles[tmm]
+      ]
+
+    base_position = []
+    if self._include_base_position:
+      base_position = np.array((self.minitaur.GetBasePosition()))
+
+    leg_model = []
+    if self._include_leg_model:
+      raw_motor_angles = self.minitaur.GetMotorAngles()
+      leg_model = self._convert_to_leg_model(raw_motor_angles)
+
+    observation_list = (
+        [parent_observation] + history_states + history_actions +
+        state_difference + second_state_difference + [base_position] +
+        [leg_model])
+
+    full_observation = np.concatenate(observation_list)
+    return full_observation
+
+  def reset(self):
+    """Resets the time and history buffer."""
+    self._counter = 0
+    self._signal(self._counter)  # This sets the current phase
+    self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1,
+                                               self.PARENT_OBSERVATION_DIM))
+    self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8))
+    self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM))
+    self._counter = 0
+
+    return np.array(super(MinitaurExtendedEnv, self).reset())
+
+  def step(self, action):
+    """Step function wrapper can be used to add shaping terms to the reward.
+
+    Args:
+      action: an array of the given action
+
+    Returns:
+      next_obs: the next observation
+      reward: the reward for this experience tuple
+      done: the terminal flag
+      info: an additional information
+    """
+
+    action *= self._action_scale
+
+    self._past_actions[self._counter] = action
+    self._counter += 1
+
+    next_obs, _, done, info = super(MinitaurExtendedEnv, self).step(action)
+
+    reward = self.reward()
+    info.update(base_reward=reward)
+
+    return next_obs, reward, done, info
+
+  def terminate(self):
+    """The helper function to terminate the environment."""
+    super(MinitaurExtendedEnv, self)._close()
+
+  def _termination(self):
+    """Determines whether the env is terminated or not.
+
+    checks whether 1) the front leg is bent too much or 2) the time exceeds
+    the manually set weights.
+
+    Returns:
+      terminal: the terminal flag whether the env is terminated or not
+    """
+    if self._never_terminate:
+      return False
+
+    leg_model = self._convert_to_leg_model(self.minitaur.GetMotorAngles())
+    swing0 = leg_model[0]
+    swing1 = leg_model[2]
+    maximum_swing_angle = 0.8
+    if swing0 > maximum_swing_angle or swing1 > maximum_swing_angle:
+      return True
+
+    if self._counter >= 500:
+      return True
+
+    return False
+
+  def reward(self):
+    """Compute rewards for the given time step.
+
+    It considers two terms: 1) forward velocity reward and 2) action
+    acceleration penalty.
+
+    Returns:
+      reward: the computed reward.
+    """
+    current_base_position = self.minitaur.GetBasePosition()
+    dt = self.control_time_step
+    velocity = (current_base_position[0] - self._last_base_position[0]) / dt
+    velocity_reward = np.clip(velocity, -0.5, 0.5)
+
+    action = self._past_actions[self._counter - 1]
+    prev_action = self._past_actions[max(self._counter - 2, 0)]
+    prev_prev_action = self._past_actions[max(self._counter - 3, 0)]
+    acc = action - 2 * prev_action + prev_prev_action
+    action_acceleration_penalty = np.mean(np.abs(acc))
+
+    reward = 0.0
+    reward += 1.0 * velocity_reward
+    reward -= 0.1 * action_acceleration_penalty
+
+    return reward
+
+  @staticmethod
+  def convert_to_leg_model(motor_angles):
+    """A helper function to convert motor angles to leg model.
+
+    Args:
+      motor_angles: raw motor angles:
+
+    Returns:
+      leg_angles: the leg pose model represented in swing and extension.
+    """
+    # TODO(sehoonha): clean up model conversion codes
+    num_legs = 4
+    # motor_angles = motor_angles / (np.pi / 4.)
+    leg_angles = np.zeros(num_legs * 2)
+    for i in range(num_legs):
+      motor1, motor2 = motor_angles[2 * i:2 * i + 2]
+      swing = (-1)**(i // 2) * 0.5 * (motor2 - motor1)
+      extension = 0.5 * (motor1 + motor2)
+
+      leg_angles[i] = swing
+      leg_angles[i + num_legs] = extension
+
+    return leg_angles
+
+  def __getstate__(self):
+    """A helper get state function for pickling."""
+    return {"kwargs": self._kwargs}
+
+  def __setstate__(self, state):
+    """A helper set state function for pickling."""
+    self.__init__(**state["kwargs"])