From 4e2dfbe58efb1197f0d9b654ef1e9fcc3dd40f55 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Tue, 31 Oct 2017 16:05:52 -0700 Subject: [PATCH] add minitaur duck environment see also https://www.youtube.com/watch?v=SLwUCMZdKLo and evolution strategies (hardmaru): https://twitter.com/hardmaru/status/925074585984237568 --- .../pybullet/gym/pybullet_envs/__init__.py | 9 + .../gym/pybullet_envs/agents/configs.py | 16 + .../gym/pybullet_envs/bullet/__init__.py | 1 + .../bullet/minitaur_duck_gym_env.py | 387 ++++++++++++++++++ .../pybullet_envs/bullet/minitaur_gym_env.py | 15 +- 5 files changed, 416 insertions(+), 12 deletions(-) create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index 74b9b7c9b..8f8211845 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -9,6 +9,15 @@ register( reward_threshold=5.0, ) +register( + id='MinitaurBulletDuckEnv-v0', + entry_point='pybullet_envs.bullet:MinitaurBulletDuckEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + +MinitaurBulletDuckEnv + register( id='RacecarBulletEnv-v0', entry_point='pybullet_envs.bullet:RacecarGymEnv', diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py index 6e787d129..84d92ab1d 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/configs.py +++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py @@ -23,6 +23,7 @@ import functools from agents import ppo from agents.scripts import networks from pybullet_envs.bullet import minitaur_gym_env +from pybullet_envs.bullet import minitaur_duck_gym_env from pybullet_envs.bullet import minitaur_env_randomizer import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env import pybullet_envs @@ -124,3 +125,18 @@ def pybullet_minitaur(): steps = 3e7 # 30M return locals() +def pybullet_duck_minitaur(): + """Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class.""" + locals().update(default()) + randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) + env = functools.partial( + minitaur_gym_env.MinitaurBulletDuckEnv, + accurate_motor_model_enabled=True, + motor_overheat_protection=True, + pd_control_enabled=True, + env_randomizer=randomizer, + render=False) + max_length = 1000 + steps = 3e7 # 30M + return locals() + diff --git a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py index 8b1735fb1..4c08abbba 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py @@ -1,5 +1,6 @@ from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv from pybullet_envs.bullet.minitaur_gym_env import MinitaurBulletEnv +from pybullet_envs.bullet.minitaur_duck_gym_env import MinitaurBulletDuckEnv from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv from pybullet_envs.bullet.simpleHumanoidGymEnv import SimpleHumanoidGymEnv diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py new file mode 100644 index 000000000..44b41dd7d --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py @@ -0,0 +1,387 @@ +"""This file implements the gym environment of minitaur. + +""" + +import os, inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0,parentdir) + + +import math +import time +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import pybullet +from . import bullet_client +from . import minitaur +import os +import pybullet_data +from . import minitaur_env_randomizer + +NUM_SUBSTEPS = 5 +NUM_MOTORS = 8 +MOTOR_ANGLE_OBSERVATION_INDEX = 0 +MOTOR_VELOCITY_OBSERVATION_INDEX = MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS +MOTOR_TORQUE_OBSERVATION_INDEX = MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS +BASE_ORIENTATION_OBSERVATION_INDEX = MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS +ACTION_EPS = 0.01 +OBSERVATION_EPS = 0.01 +RENDER_HEIGHT = 720 +RENDER_WIDTH = 960 + +duckStartPos = [0,0,0.25] +duckStartOrn = [0.5,0.5,0.5,0.5] + +class MinitaurBulletDuckEnv(gym.Env): + """The gym environment for the minitaur. + + It simulates the locomotion of a minitaur, a quadruped robot. The state space + include the angles, velocities and torques for all the motors and the action + space is the desired motor angle for each motor. The reward function is based + on how far the minitaur walks in 1000 steps and penalizes the energy + expenditure. + + """ + metadata = { + "render.modes": ["human", "rgb_array"], + "video.frames_per_second": 50 + } + + def __init__(self, + urdf_root=pybullet_data.getDataPath(), + action_repeat=1, + distance_weight=1.0, + energy_weight=0.005, + shake_weight=0.0, + drift_weight=0.0, + distance_limit=float("inf"), + observation_noise_stdev=0.0, + self_collision_enabled=True, + motor_velocity_limit=np.inf, + pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD) + leg_model_enabled=True, + accurate_motor_model_enabled=True, + motor_kp=1.0, + motor_kd=0.02, + torque_control_enabled=False, + motor_overheat_protection=True, + hard_reset=True, + on_rack=False, + render=False, + kd_for_pd_controllers=0.3, + env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()): + """Initialize the minitaur gym environment. + + Args: + urdf_root: The path to the urdf data folder. + action_repeat: The number of simulation steps before actions are applied. + distance_weight: The weight of the distance term in the reward. + energy_weight: The weight of the energy term in the reward. + shake_weight: The weight of the vertical shakiness term in the reward. + drift_weight: The weight of the sideways drift term in the reward. + distance_limit: The maximum distance to terminate the episode. + observation_noise_stdev: The standard deviation of observation noise. + self_collision_enabled: Whether to enable self collision in the sim. + motor_velocity_limit: The velocity limit of each motor. + pd_control_enabled: Whether to use PD controller for each motor. + leg_model_enabled: Whether to use a leg motor to reparameterize the action + space. + accurate_motor_model_enabled: Whether to use the accurate DC motor model. + motor_kp: proportional gain for the accurate motor model. + motor_kd: derivative gain for the accurate motor model. + torque_control_enabled: Whether to use the torque control, if set to + False, pose control will be used. + motor_overheat_protection: Whether to shutdown the motor that has exerted + large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time + (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more + details. + hard_reset: Whether to wipe the simulation and load everything when reset + is called. If set to false, reset just place the minitaur back to start + position and set its pose to initial configuration. + on_rack: Whether to place the minitaur on rack. This is only used to debug + the walking gait. In this mode, the minitaur's base is hanged midair so + that its walking gait is clearer to visualize. + render: Whether to render the simulation. + kd_for_pd_controllers: kd value for the pd controllers of the motors + env_randomizer: An EnvRandomizer to randomize the physical properties + during reset(). + """ + self._time_step = 0.01 + self._action_repeat = action_repeat + self._num_bullet_solver_iterations = 300 + self._urdf_root = urdf_root + self._self_collision_enabled = self_collision_enabled + self._motor_velocity_limit = motor_velocity_limit + self._observation = [] + self._env_step_counter = 0 + self._is_render = render + self._last_base_position = [0, 0, 0] + self._distance_weight = distance_weight + self._energy_weight = energy_weight + self._drift_weight = drift_weight + self._shake_weight = shake_weight + self._distance_limit = distance_limit + self._observation_noise_stdev = observation_noise_stdev + self._action_bound = 1 + self._pd_control_enabled = pd_control_enabled + self._leg_model_enabled = leg_model_enabled + self._accurate_motor_model_enabled = accurate_motor_model_enabled + self._motor_kp = motor_kp + self._motor_kd = motor_kd + self._torque_control_enabled = torque_control_enabled + self._motor_overheat_protection = motor_overheat_protection + self._on_rack = on_rack + self._cam_dist = 1.0 + self._cam_yaw = 0 + self._duckId = -1 + self._cam_pitch = -30 + self._hard_reset = True + self._kd_for_pd_controllers = kd_for_pd_controllers + self._last_frame_time = 0.0 + print("urdf_root=" + self._urdf_root) + self._env_randomizer = env_randomizer + # PD control needs smaller time step for stability. + if pd_control_enabled or accurate_motor_model_enabled: + self._time_step /= NUM_SUBSTEPS + self._num_bullet_solver_iterations /= NUM_SUBSTEPS + self._action_repeat *= NUM_SUBSTEPS + + if self._is_render: + self._pybullet_client = bullet_client.BulletClient( + connection_mode=pybullet.GUI) + else: + self._pybullet_client = bullet_client.BulletClient() + + self._seed() + self.reset() + observation_high = ( + self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS) + observation_low = ( + self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS) + action_dim = 8 + action_high = np.array([self._action_bound] * action_dim) + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(observation_low, observation_high) + self.viewer = None + self._hard_reset = hard_reset # This assignment need to be after reset() + + def set_env_randomizer(self, env_randomizer): + self._env_randomizer = env_randomizer + + def configure(self, args): + self._args = args + + def _reset(self): + if self._hard_reset: + self._pybullet_client.resetSimulation() + self._pybullet_client.setPhysicsEngineParameter( + numSolverIterations=int(self._num_bullet_solver_iterations)) + self._pybullet_client.setTimeStep(self._time_step) + self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root) + self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn) + self._pybullet_client.setGravity(0, 0, -10) + acc_motor = self._accurate_motor_model_enabled + motor_protect = self._motor_overheat_protection + self.minitaur = (minitaur.Minitaur( + pybullet_client=self._pybullet_client, + urdf_root=self._urdf_root, + time_step=self._time_step, + self_collision_enabled=self._self_collision_enabled, + motor_velocity_limit=self._motor_velocity_limit, + pd_control_enabled=self._pd_control_enabled, + accurate_motor_model_enabled=acc_motor, + motor_kp=self._motor_kp, + motor_kd=self._motor_kd, + torque_control_enabled=self._torque_control_enabled, + motor_overheat_protection=motor_protect, + on_rack=self._on_rack, + kd_for_pd_controllers=self._kd_for_pd_controllers)) + else: + self.minitaur.Reset(reload_urdf=False) + self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn) + if self._env_randomizer is not None: + self._env_randomizer.randomize_env(self) + + self._env_step_counter = 0 + self._last_base_position = [0, 0, 0] + self._objectives = [] + self._pybullet_client.resetDebugVisualizerCamera( + self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0]) + if not self._torque_control_enabled: + for _ in range(100): + if self._pd_control_enabled or self._accurate_motor_model_enabled: + self.minitaur.ApplyAction([math.pi / 2] * 8) + self._pybullet_client.stepSimulation() + return self._noisy_observation() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _transform_action_to_motor_command(self, action): + if self._leg_model_enabled: + for i, action_component in enumerate(action): + if not (-self._action_bound - ACTION_EPS <= action_component <= + self._action_bound + ACTION_EPS): + raise ValueError( + "{}th action {} out of bounds.".format(i, action_component)) + action = self.minitaur.ConvertFromLegModel(action) + return action + + def _step(self, action): + """Step forward the simulation, given the action. + + Args: + action: A list of desired motor angles for eight motors. + + Returns: + observations: The angles, velocities and torques of all motors. + reward: The reward for the current state-action pair. + done: Whether the episode has ended. + info: A dictionary that stores diagnostic information. + + Raises: + ValueError: The action dimension is not the same as the number of motors. + ValueError: The magnitude of actions is out of bounds. + """ + if self._is_render: + # Sleep, otherwise the computation takes less time than real time, + # which will make the visualization like a fast-forward video. + time_spent = time.time() - self._last_frame_time + self._last_frame_time = time.time() + time_to_sleep = self._action_repeat * self._time_step - time_spent + if time_to_sleep > 0: + time.sleep(time_to_sleep) + base_pos = self.minitaur.GetBasePosition() + self._pybullet_client.resetDebugVisualizerCamera( + self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos) + action = self._transform_action_to_motor_command(action) + for _ in range(self._action_repeat): + self.minitaur.ApplyAction(action) + self._pybullet_client.stepSimulation() + + self._env_step_counter += 1 + reward = self._reward() + done = self._termination() + return np.array(self._noisy_observation()), reward, done, {} + + def _render(self, mode="rgb_array", close=False): + if mode != "rgb_array": + return np.array([]) + base_pos = self.minitaur.GetBasePosition() + view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._pybullet_client.computeProjectionMatrixFOV( + fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT, + nearVal=0.1, farVal=100.0) + (_, _, px, _, _) = self._pybullet_client.getCameraImage( + width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix, + projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) + rgb_array = np.array(px) + rgb_array = rgb_array[:, :, :3] + return rgb_array + + def get_minitaur_motor_angles(self): + """Get the minitaur's motor angles. + + Returns: + A numpy array of motor angles. + """ + return np.array( + self._observation[MOTOR_ANGLE_OBSERVATION_INDEX: + MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS]) + + def get_minitaur_motor_velocities(self): + """Get the minitaur's motor velocities. + + Returns: + A numpy array of motor velocities. + """ + return np.array( + self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX: + MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS]) + + def get_minitaur_motor_torques(self): + """Get the minitaur's motor torques. + + Returns: + A numpy array of motor torques. + """ + return np.array( + self._observation[MOTOR_TORQUE_OBSERVATION_INDEX: + MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS]) + + def get_minitaur_base_orientation(self): + """Get the minitaur's base orientation, represented by a quaternion. + + Returns: + A numpy array of minitaur's orientation. + """ + return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:]) + + def lost_duck(self): + points = self._pybullet_client.getContactPoints(self._duckId, self._groundId); + return len(points)>0 + + def is_fallen(self): + """Decide whether the minitaur has fallen. + + If the up directions between the base and the world is larger (the dot + product is smaller than 0.85) or the base is very low on the ground + (the height is smaller than 0.13 meter), the minitaur is considered fallen. + + Returns: + Boolean value that indicates whether the minitaur has fallen. + """ + orientation = self.minitaur.GetBaseOrientation() + rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation) + local_up = rot_mat[6:] + pos = self.minitaur.GetBasePosition() + return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or + pos[2] < 0.13) + + def _termination(self): + position = self.minitaur.GetBasePosition() + distance = math.sqrt(position[0]**2 + position[1]**2) + return self.lost_duck() or self.is_fallen() or distance > self._distance_limit + + def _reward(self): + current_base_position = self.minitaur.GetBasePosition() + forward_reward = current_base_position[0] - self._last_base_position[0] + drift_reward = -abs(current_base_position[1] - self._last_base_position[1]) + shake_reward = -abs(current_base_position[2] - self._last_base_position[2]) + self._last_base_position = current_base_position + energy_reward = np.abs( + np.dot(self.minitaur.GetMotorTorques(), + self.minitaur.GetMotorVelocities())) * self._time_step + reward = ( + self._distance_weight * forward_reward - + self._energy_weight * energy_reward + self._drift_weight * drift_reward + + self._shake_weight * shake_reward) + self._objectives.append( + [forward_reward, energy_reward, drift_reward, shake_reward]) + return reward + + def get_objectives(self): + return self._objectives + + def _get_observation(self): + self._observation = self.minitaur.GetObservation() + return self._observation + + def _noisy_observation(self): + self._get_observation() + observation = np.array(self._observation) + if self._observation_noise_stdev > 0: + observation += (np.random.normal( + scale=self._observation_noise_stdev, size=observation.shape) * + self.minitaur.GetObservationUpperBound()) + return observation diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py index f553ac46e..38ec631a5 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py @@ -32,9 +32,6 @@ OBSERVATION_EPS = 0.01 RENDER_HEIGHT = 720 RENDER_WIDTH = 960 -duckStartPos = [0,0,0.25] -duckStartOrn = [0.5,0.5,0.5,0.5] - class MinitaurBulletEnv(gym.Env): """The gym environment for the minitaur. @@ -136,7 +133,6 @@ class MinitaurBulletEnv(gym.Env): self._on_rack = on_rack self._cam_dist = 1.0 self._cam_yaw = 0 - self._duckId = -1 self._cam_pitch = -30 self._hard_reset = True self._kd_for_pd_controllers = kd_for_pd_controllers @@ -180,8 +176,7 @@ class MinitaurBulletEnv(gym.Env): self._pybullet_client.setPhysicsEngineParameter( numSolverIterations=int(self._num_bullet_solver_iterations)) self._pybullet_client.setTimeStep(self._time_step) - self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root) - self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn) + self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root) self._pybullet_client.setGravity(0, 0, -10) acc_motor = self._accurate_motor_model_enabled motor_protect = self._motor_overheat_protection @@ -201,7 +196,7 @@ class MinitaurBulletEnv(gym.Env): kd_for_pd_controllers=self._kd_for_pd_controllers)) else: self.minitaur.Reset(reload_urdf=False) - self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn) + if self._env_randomizer is not None: self._env_randomizer.randomize_env(self) @@ -327,10 +322,6 @@ class MinitaurBulletEnv(gym.Env): """ return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:]) - def lost_duck(self): - points = self._pybullet_client.getContactPoints(self._duckId, self._groundId); - return len(points)>0 - def is_fallen(self): """Decide whether the minitaur has fallen. @@ -351,7 +342,7 @@ class MinitaurBulletEnv(gym.Env): def _termination(self): position = self.minitaur.GetBasePosition() distance = math.sqrt(position[0]**2 + position[1]**2) - return self.lost_duck() or self.is_fallen() or distance > self._distance_limit + return self.is_fallen() or distance > self._distance_limit def _reward(self): current_base_position = self.minitaur.GetBasePosition()