From 4e2dfbe58efb1197f0d9b654ef1e9fcc3dd40f55 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 16:05:52 -0700
Subject: [PATCH] add minitaur duck environment see also 
 https://www.youtube.com/watch?v=SLwUCMZdKLo and evolution strategies
 (hardmaru): https://twitter.com/hardmaru/status/925074585984237568

---
 .../pybullet/gym/pybullet_envs/__init__.py    |   9 +
 .../gym/pybullet_envs/agents/configs.py       |  16 +
 .../gym/pybullet_envs/bullet/__init__.py      |   1 +
 .../bullet/minitaur_duck_gym_env.py           | 387 ++++++++++++++++++
 .../pybullet_envs/bullet/minitaur_gym_env.py  |  15 +-
 5 files changed, 416 insertions(+), 12 deletions(-)
 create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py

diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py
index 74b9b7c9b..8f8211845 100644
--- a/examples/pybullet/gym/pybullet_envs/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/__init__.py
@@ -9,6 +9,15 @@ register(
     reward_threshold=5.0,
 )
 
+register(
+    id='MinitaurBulletDuckEnv-v0',
+    entry_point='pybullet_envs.bullet:MinitaurBulletDuckEnv',
+    timestep_limit=1000,
+    reward_threshold=5.0,
+)
+
+MinitaurBulletDuckEnv
+
 register(
     id='RacecarBulletEnv-v0',
     entry_point='pybullet_envs.bullet:RacecarGymEnv',
diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
index 6e787d129..84d92ab1d 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -23,6 +23,7 @@ import functools
 from agents import ppo
 from agents.scripts import networks
 from pybullet_envs.bullet import minitaur_gym_env
+from pybullet_envs.bullet import minitaur_duck_gym_env
 from pybullet_envs.bullet import minitaur_env_randomizer
 import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
 import pybullet_envs
@@ -124,3 +125,18 @@ def pybullet_minitaur():
   steps = 3e7  # 30M
   return locals()
 
+def pybullet_duck_minitaur():
+  """Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class."""
+  locals().update(default())
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  env = functools.partial(
+      minitaur_gym_env.MinitaurBulletDuckEnv,
+      accurate_motor_model_enabled=True,
+      motor_overheat_protection=True,
+      pd_control_enabled=True,
+      env_randomizer=randomizer,
+      render=False)
+  max_length = 1000
+  steps = 3e7  # 30M
+  return locals()
+
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
index 8b1735fb1..4c08abbba 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
@@ -1,5 +1,6 @@
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
 from pybullet_envs.bullet.minitaur_gym_env import MinitaurBulletEnv
+from pybullet_envs.bullet.minitaur_duck_gym_env import MinitaurBulletDuckEnv
 from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
 from pybullet_envs.bullet.simpleHumanoidGymEnv import SimpleHumanoidGymEnv
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
new file mode 100644
index 000000000..44b41dd7d
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
@@ -0,0 +1,387 @@
+"""This file implements the gym environment of minitaur.
+
+"""
+
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+
+import math
+import time
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+import pybullet
+from . import bullet_client
+from . import minitaur
+import os
+import pybullet_data
+from . import minitaur_env_randomizer
+
+NUM_SUBSTEPS = 5
+NUM_MOTORS = 8
+MOTOR_ANGLE_OBSERVATION_INDEX = 0
+MOTOR_VELOCITY_OBSERVATION_INDEX = MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS
+MOTOR_TORQUE_OBSERVATION_INDEX = MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS
+BASE_ORIENTATION_OBSERVATION_INDEX = MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS
+ACTION_EPS = 0.01
+OBSERVATION_EPS = 0.01
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
+duckStartPos = [0,0,0.25]
+duckStartOrn = [0.5,0.5,0.5,0.5]
+
+class MinitaurBulletDuckEnv(gym.Env):
+  """The gym environment for the minitaur.
+
+  It simulates the locomotion of a minitaur, a quadruped robot. The state space
+  include the angles, velocities and torques for all the motors and the action
+  space is the desired motor angle for each motor. The reward function is based
+  on how far the minitaur walks in 1000 steps and penalizes the energy
+  expenditure.
+
+  """
+  metadata = {
+      "render.modes": ["human", "rgb_array"],
+      "video.frames_per_second": 50
+  }
+
+  def __init__(self,
+               urdf_root=pybullet_data.getDataPath(),
+               action_repeat=1,
+               distance_weight=1.0,
+               energy_weight=0.005,
+               shake_weight=0.0,
+               drift_weight=0.0,
+               distance_limit=float("inf"),
+               observation_noise_stdev=0.0,
+               self_collision_enabled=True,
+               motor_velocity_limit=np.inf,
+               pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
+               leg_model_enabled=True,
+               accurate_motor_model_enabled=True,
+               motor_kp=1.0,
+               motor_kd=0.02,
+               torque_control_enabled=False,
+               motor_overheat_protection=True,
+               hard_reset=True,
+               on_rack=False,
+               render=False,
+               kd_for_pd_controllers=0.3,
+               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
+    """Initialize the minitaur gym environment.
+
+    Args:
+      urdf_root: The path to the urdf data folder.
+      action_repeat: The number of simulation steps before actions are applied.
+      distance_weight: The weight of the distance term in the reward.
+      energy_weight: The weight of the energy term in the reward.
+      shake_weight: The weight of the vertical shakiness term in the reward.
+      drift_weight: The weight of the sideways drift term in the reward.
+      distance_limit: The maximum distance to terminate the episode.
+      observation_noise_stdev: The standard deviation of observation noise.
+      self_collision_enabled: Whether to enable self collision in the sim.
+      motor_velocity_limit: The velocity limit of each motor.
+      pd_control_enabled: Whether to use PD controller for each motor.
+      leg_model_enabled: Whether to use a leg motor to reparameterize the action
+        space.
+      accurate_motor_model_enabled: Whether to use the accurate DC motor model.
+      motor_kp: proportional gain for the accurate motor model.
+      motor_kd: derivative gain for the accurate motor model.
+      torque_control_enabled: Whether to use the torque control, if set to
+        False, pose control will be used.
+      motor_overheat_protection: Whether to shutdown the motor that has exerted
+        large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time
+        (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more
+        details.
+      hard_reset: Whether to wipe the simulation and load everything when reset
+        is called. If set to false, reset just place the minitaur back to start
+        position and set its pose to initial configuration.
+      on_rack: Whether to place the minitaur on rack. This is only used to debug
+        the walking gait. In this mode, the minitaur's base is hanged midair so
+        that its walking gait is clearer to visualize.
+      render: Whether to render the simulation.
+      kd_for_pd_controllers: kd value for the pd controllers of the motors
+      env_randomizer: An EnvRandomizer to randomize the physical properties
+        during reset().
+    """
+    self._time_step = 0.01
+    self._action_repeat = action_repeat
+    self._num_bullet_solver_iterations = 300
+    self._urdf_root = urdf_root
+    self._self_collision_enabled = self_collision_enabled
+    self._motor_velocity_limit = motor_velocity_limit
+    self._observation = []
+    self._env_step_counter = 0
+    self._is_render = render
+    self._last_base_position = [0, 0, 0]
+    self._distance_weight = distance_weight
+    self._energy_weight = energy_weight
+    self._drift_weight = drift_weight
+    self._shake_weight = shake_weight
+    self._distance_limit = distance_limit
+    self._observation_noise_stdev = observation_noise_stdev
+    self._action_bound = 1
+    self._pd_control_enabled = pd_control_enabled
+    self._leg_model_enabled = leg_model_enabled
+    self._accurate_motor_model_enabled = accurate_motor_model_enabled
+    self._motor_kp = motor_kp
+    self._motor_kd = motor_kd
+    self._torque_control_enabled = torque_control_enabled
+    self._motor_overheat_protection = motor_overheat_protection
+    self._on_rack = on_rack
+    self._cam_dist = 1.0
+    self._cam_yaw = 0
+    self._duckId = -1
+    self._cam_pitch = -30
+    self._hard_reset = True
+    self._kd_for_pd_controllers = kd_for_pd_controllers
+    self._last_frame_time = 0.0
+    print("urdf_root=" + self._urdf_root)
+    self._env_randomizer = env_randomizer
+    # PD control needs smaller time step for stability.
+    if pd_control_enabled or accurate_motor_model_enabled:
+      self._time_step /= NUM_SUBSTEPS
+      self._num_bullet_solver_iterations /= NUM_SUBSTEPS
+      self._action_repeat *= NUM_SUBSTEPS
+
+    if self._is_render:
+      self._pybullet_client = bullet_client.BulletClient(
+          connection_mode=pybullet.GUI)
+    else:
+      self._pybullet_client = bullet_client.BulletClient()
+
+    self._seed()
+    self.reset()
+    observation_high = (
+        self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
+    observation_low = (
+        self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
+    action_dim = 8
+    action_high = np.array([self._action_bound] * action_dim)
+    self.action_space = spaces.Box(-action_high, action_high)
+    self.observation_space = spaces.Box(observation_low, observation_high)
+    self.viewer = None
+    self._hard_reset = hard_reset  # This assignment need to be after reset()
+
+  def set_env_randomizer(self, env_randomizer):
+    self._env_randomizer = env_randomizer
+
+  def configure(self, args):
+    self._args = args
+
+  def _reset(self):
+    if self._hard_reset:
+      self._pybullet_client.resetSimulation()
+      self._pybullet_client.setPhysicsEngineParameter(
+          numSolverIterations=int(self._num_bullet_solver_iterations))
+      self._pybullet_client.setTimeStep(self._time_step)
+      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
+      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
+      self._pybullet_client.setGravity(0, 0, -10)
+      acc_motor = self._accurate_motor_model_enabled
+      motor_protect = self._motor_overheat_protection
+      self.minitaur = (minitaur.Minitaur(
+          pybullet_client=self._pybullet_client,
+          urdf_root=self._urdf_root,
+          time_step=self._time_step,
+          self_collision_enabled=self._self_collision_enabled,
+          motor_velocity_limit=self._motor_velocity_limit,
+          pd_control_enabled=self._pd_control_enabled,
+          accurate_motor_model_enabled=acc_motor,
+          motor_kp=self._motor_kp,
+          motor_kd=self._motor_kd,
+          torque_control_enabled=self._torque_control_enabled,
+          motor_overheat_protection=motor_protect,
+          on_rack=self._on_rack,
+          kd_for_pd_controllers=self._kd_for_pd_controllers))
+    else:
+      self.minitaur.Reset(reload_urdf=False)
+      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
+    if self._env_randomizer is not None:
+      self._env_randomizer.randomize_env(self)
+
+    self._env_step_counter = 0
+    self._last_base_position = [0, 0, 0]
+    self._objectives = []
+    self._pybullet_client.resetDebugVisualizerCamera(
+        self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
+    if not self._torque_control_enabled:
+      for _ in range(100):
+        if self._pd_control_enabled or self._accurate_motor_model_enabled:
+          self.minitaur.ApplyAction([math.pi / 2] * 8)
+        self._pybullet_client.stepSimulation()
+    return self._noisy_observation()
+
+  def _seed(self, seed=None):
+    self.np_random, seed = seeding.np_random(seed)
+    return [seed]
+
+  def _transform_action_to_motor_command(self, action):
+    if self._leg_model_enabled:
+      for i, action_component in enumerate(action):
+        if not (-self._action_bound - ACTION_EPS <= action_component <=
+                self._action_bound + ACTION_EPS):
+          raise ValueError(
+              "{}th action {} out of bounds.".format(i, action_component))
+      action = self.minitaur.ConvertFromLegModel(action)
+    return action
+
+  def _step(self, action):
+    """Step forward the simulation, given the action.
+
+    Args:
+      action: A list of desired motor angles for eight motors.
+
+    Returns:
+      observations: The angles, velocities and torques of all motors.
+      reward: The reward for the current state-action pair.
+      done: Whether the episode has ended.
+      info: A dictionary that stores diagnostic information.
+
+    Raises:
+      ValueError: The action dimension is not the same as the number of motors.
+      ValueError: The magnitude of actions is out of bounds.
+    """
+    if self._is_render:
+      # Sleep, otherwise the computation takes less time than real time,
+      # which will make the visualization like a fast-forward video.
+      time_spent = time.time() - self._last_frame_time
+      self._last_frame_time = time.time()
+      time_to_sleep = self._action_repeat * self._time_step - time_spent
+      if time_to_sleep > 0:
+        time.sleep(time_to_sleep)
+      base_pos = self.minitaur.GetBasePosition()
+      self._pybullet_client.resetDebugVisualizerCamera(
+          self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
+    action = self._transform_action_to_motor_command(action)
+    for _ in range(self._action_repeat):
+      self.minitaur.ApplyAction(action)
+      self._pybullet_client.stepSimulation()
+
+    self._env_step_counter += 1
+    reward = self._reward()
+    done = self._termination()
+    return np.array(self._noisy_observation()), reward, done, {}
+
+  def _render(self, mode="rgb_array", close=False):
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos = self.minitaur.GetBasePosition()
+    view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._pybullet_client.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
+  def get_minitaur_motor_angles(self):
+    """Get the minitaur's motor angles.
+
+    Returns:
+      A numpy array of motor angles.
+    """
+    return np.array(
+        self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
+                          MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_motor_velocities(self):
+    """Get the minitaur's motor velocities.
+
+    Returns:
+      A numpy array of motor velocities.
+    """
+    return np.array(
+        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
+                          MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_motor_torques(self):
+    """Get the minitaur's motor torques.
+
+    Returns:
+      A numpy array of motor torques.
+    """
+    return np.array(
+        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
+                          MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_base_orientation(self):
+    """Get the minitaur's base orientation, represented by a quaternion.
+
+    Returns:
+      A numpy array of minitaur's orientation.
+    """
+    return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
+
+  def lost_duck(self):
+    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
+    return len(points)>0
+
+  def is_fallen(self):
+    """Decide whether the minitaur has fallen.
+
+    If the up directions between the base and the world is larger (the dot
+    product is smaller than 0.85) or the base is very low on the ground
+    (the height is smaller than 0.13 meter), the minitaur is considered fallen.
+
+    Returns:
+      Boolean value that indicates whether the minitaur has fallen.
+    """
+    orientation = self.minitaur.GetBaseOrientation()
+    rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
+    local_up = rot_mat[6:]
+    pos = self.minitaur.GetBasePosition()
+    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
+            pos[2] < 0.13)
+
+  def _termination(self):
+    position = self.minitaur.GetBasePosition()
+    distance = math.sqrt(position[0]**2 + position[1]**2)
+    return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
+
+  def _reward(self):
+    current_base_position = self.minitaur.GetBasePosition()
+    forward_reward = current_base_position[0] - self._last_base_position[0]
+    drift_reward = -abs(current_base_position[1] - self._last_base_position[1])
+    shake_reward = -abs(current_base_position[2] - self._last_base_position[2])
+    self._last_base_position = current_base_position
+    energy_reward = np.abs(
+        np.dot(self.minitaur.GetMotorTorques(),
+               self.minitaur.GetMotorVelocities())) * self._time_step
+    reward = (
+        self._distance_weight * forward_reward -
+        self._energy_weight * energy_reward + self._drift_weight * drift_reward
+        + self._shake_weight * shake_reward)
+    self._objectives.append(
+        [forward_reward, energy_reward, drift_reward, shake_reward])
+    return reward
+
+  def get_objectives(self):
+    return self._objectives
+
+  def _get_observation(self):
+    self._observation = self.minitaur.GetObservation()
+    return self._observation
+
+  def _noisy_observation(self):
+    self._get_observation()
+    observation = np.array(self._observation)
+    if self._observation_noise_stdev > 0:
+      observation += (np.random.normal(
+          scale=self._observation_noise_stdev, size=observation.shape) *
+                      self.minitaur.GetObservationUpperBound())
+    return observation
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
index f553ac46e..38ec631a5 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -32,9 +32,6 @@ OBSERVATION_EPS = 0.01
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 
-duckStartPos = [0,0,0.25]
-duckStartOrn = [0.5,0.5,0.5,0.5]
-
 class MinitaurBulletEnv(gym.Env):
   """The gym environment for the minitaur.
 
@@ -136,7 +133,6 @@ class MinitaurBulletEnv(gym.Env):
     self._on_rack = on_rack
     self._cam_dist = 1.0
     self._cam_yaw = 0
-    self._duckId = -1
     self._cam_pitch = -30
     self._hard_reset = True
     self._kd_for_pd_controllers = kd_for_pd_controllers
@@ -180,8 +176,7 @@ class MinitaurBulletEnv(gym.Env):
       self._pybullet_client.setPhysicsEngineParameter(
           numSolverIterations=int(self._num_bullet_solver_iterations))
       self._pybullet_client.setTimeStep(self._time_step)
-      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
-      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
+      self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
       self._pybullet_client.setGravity(0, 0, -10)
       acc_motor = self._accurate_motor_model_enabled
       motor_protect = self._motor_overheat_protection
@@ -201,7 +196,7 @@ class MinitaurBulletEnv(gym.Env):
           kd_for_pd_controllers=self._kd_for_pd_controllers))
     else:
       self.minitaur.Reset(reload_urdf=False)
-      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
+
     if self._env_randomizer is not None:
       self._env_randomizer.randomize_env(self)
 
@@ -327,10 +322,6 @@ class MinitaurBulletEnv(gym.Env):
     """
     return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
 
-  def lost_duck(self):
-    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
-    return len(points)>0
-
   def is_fallen(self):
     """Decide whether the minitaur has fallen.
 
@@ -351,7 +342,7 @@ class MinitaurBulletEnv(gym.Env):
   def _termination(self):
     position = self.minitaur.GetBasePosition()
     distance = math.sqrt(position[0]**2 + position[1]**2)
-    return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
+    return self.is_fallen() or distance > self._distance_limit
 
   def _reward(self):
     current_base_position = self.minitaur.GetBasePosition()