add minitaur environment randomizer, enable it by default
this improves PPO training ( more stable)
This commit is contained in:
@@ -0,0 +1,25 @@
|
|||||||
|
"""Abstract base class for environment randomizer."""
|
||||||
|
|
||||||
|
import abc
|
||||||
|
|
||||||
|
|
||||||
|
class EnvRandomizerBase(object):
|
||||||
|
"""Abstract base class for environment randomizer.
|
||||||
|
|
||||||
|
An EnvRandomizer is called in environment.reset(). It will
|
||||||
|
randomize physical parameters of the objects in the simulation.
|
||||||
|
The physical parameters will be fixed for that episode and be
|
||||||
|
randomized again in the next environment.reset().
|
||||||
|
"""
|
||||||
|
|
||||||
|
__metaclass__ = abc.ABCMeta
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def randomize_env(self, env):
|
||||||
|
"""Randomize the simulated_objects in the environment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: The environment to be randomized.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
"""Randomize the minitaur_gym_env when reset() is called."""
|
||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
from . import env_randomizer_base
|
||||||
|
|
||||||
|
# Relative range.
|
||||||
|
MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||||
|
MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||||
|
# Absolute range.
|
||||||
|
BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt
|
||||||
|
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel)
|
||||||
|
MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless
|
||||||
|
|
||||||
|
|
||||||
|
class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
|
||||||
|
"""A randomizer that change the minitaur_gym_env during every reset."""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
|
||||||
|
minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
|
||||||
|
battery_voltage_range=BATTERY_VOLTAGE_RANGE,
|
||||||
|
motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
|
||||||
|
self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
|
||||||
|
self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
|
||||||
|
self._battery_voltage_range = battery_voltage_range
|
||||||
|
self._motor_viscous_damping_range = motor_viscous_damping_range
|
||||||
|
|
||||||
|
def randomize_env(self, env):
|
||||||
|
self._randomize_minitaur(env.minitaur)
|
||||||
|
|
||||||
|
def _randomize_minitaur(self, minitaur):
|
||||||
|
"""Randomize various physical properties of minitaur.
|
||||||
|
|
||||||
|
It randomizes the mass/inertia of the base, mass/inertia of the legs,
|
||||||
|
friction coefficient of the feet, the battery voltage and the motor damping
|
||||||
|
at each reset() of the environment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
minitaur: the Minitaur instance in minitaur_gym_env environment.
|
||||||
|
"""
|
||||||
|
base_mass = minitaur.GetBaseMassFromURDF()
|
||||||
|
randomized_base_mass = random.uniform(
|
||||||
|
base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
|
||||||
|
base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
|
||||||
|
minitaur.SetBaseMass(randomized_base_mass)
|
||||||
|
|
||||||
|
leg_masses = minitaur.GetLegMassesFromURDF()
|
||||||
|
leg_masses_lower_bound = np.array(leg_masses) * (
|
||||||
|
1.0 + self._minitaur_leg_mass_err_range[0])
|
||||||
|
leg_masses_upper_bound = np.array(leg_masses) * (
|
||||||
|
1.0 + self._minitaur_leg_mass_err_range[1])
|
||||||
|
randomized_leg_masses = [
|
||||||
|
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
|
||||||
|
for i in range(len(leg_masses))
|
||||||
|
]
|
||||||
|
minitaur.SetLegMasses(randomized_leg_masses)
|
||||||
|
|
||||||
|
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
|
||||||
|
BATTERY_VOLTAGE_RANGE[1])
|
||||||
|
minitaur.SetBatteryVoltage(randomized_battery_voltage)
|
||||||
|
|
||||||
|
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
|
||||||
|
MOTOR_VISCOUS_DAMPING_RANGE[1])
|
||||||
|
minitaur.SetMotorViscousDamping(randomized_motor_damping)
|
||||||
|
|
||||||
|
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
|
||||||
|
MINITAUR_LEG_FRICTION[1])
|
||||||
|
minitaur.SetFootFriction(randomized_foot_friction)
|
||||||
@@ -19,6 +19,7 @@ from . import bullet_client
|
|||||||
from . import minitaur
|
from . import minitaur
|
||||||
import os
|
import os
|
||||||
import pybullet_data
|
import pybullet_data
|
||||||
|
from . import minitaur_env_randomizer
|
||||||
|
|
||||||
NUM_SUBSTEPS = 5
|
NUM_SUBSTEPS = 5
|
||||||
NUM_MOTORS = 8
|
NUM_MOTORS = 8
|
||||||
@@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
on_rack=False,
|
on_rack=False,
|
||||||
render=False,
|
render=False,
|
||||||
kd_for_pd_controllers=0.3,
|
kd_for_pd_controllers=0.3,
|
||||||
env_randomizer=None):
|
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||||
"""Initialize the minitaur gym environment.
|
"""Initialize the minitaur gym environment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
@@ -0,0 +1,167 @@
|
|||||||
|
r"""An example to run of the minitaur gym environment with sine gaits.
|
||||||
|
|
||||||
|
blaze run -c opt //robotics/reinforcement_learning/minitaur/\
|
||||||
|
envs:minitaur_gym_env_example
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
from pybullet_envs.bullet import minitaur_gym_env
|
||||||
|
import argparse
|
||||||
|
from pybullet_envs.bullet import minitaur_env_randomizer
|
||||||
|
|
||||||
|
def ResetPoseExample():
|
||||||
|
"""An example that the minitaur stands still using the reset pose."""
|
||||||
|
steps = 1000
|
||||||
|
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||||
|
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||||
|
render=True,
|
||||||
|
leg_model_enabled=False,
|
||||||
|
motor_velocity_limit=np.inf,
|
||||||
|
pd_control_enabled=True,
|
||||||
|
accurate_motor_model_enabled=True,
|
||||||
|
motor_overheat_protection=True,
|
||||||
|
env_randomizer = randomizer,
|
||||||
|
hard_reset=False)
|
||||||
|
action = [math.pi / 2] * 8
|
||||||
|
for _ in range(steps):
|
||||||
|
_, _, done, _ = environment.step(action)
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
environment.reset()
|
||||||
|
|
||||||
|
def MotorOverheatExample():
|
||||||
|
"""An example of minitaur motor overheat protection is triggered.
|
||||||
|
|
||||||
|
The minitaur is leaning forward and the motors are getting obove threshold
|
||||||
|
torques. The overheat protection will be triggered in ~1 sec.
|
||||||
|
"""
|
||||||
|
|
||||||
|
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||||
|
render=True,
|
||||||
|
leg_model_enabled=False,
|
||||||
|
motor_velocity_limit=np.inf,
|
||||||
|
motor_overheat_protection=True,
|
||||||
|
accurate_motor_model_enabled=True,
|
||||||
|
motor_kp=1.20,
|
||||||
|
motor_kd=0.00,
|
||||||
|
on_rack=False)
|
||||||
|
|
||||||
|
action = [2.0] * 8
|
||||||
|
for i in range(8):
|
||||||
|
action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
|
||||||
|
|
||||||
|
steps = 500
|
||||||
|
actions_and_observations = []
|
||||||
|
for step_counter in range(steps):
|
||||||
|
# Matches the internal timestep.
|
||||||
|
time_step = 0.01
|
||||||
|
t = step_counter * time_step
|
||||||
|
current_row = [t]
|
||||||
|
current_row.extend(action)
|
||||||
|
|
||||||
|
observation, _, _, _ = environment.step(action)
|
||||||
|
current_row.extend(observation.tolist())
|
||||||
|
actions_and_observations.append(current_row)
|
||||||
|
environment.reset()
|
||||||
|
|
||||||
|
def SineStandExample():
|
||||||
|
"""An example of minitaur standing and squatting on the floor.
|
||||||
|
|
||||||
|
To validate the accurate motor model we command the robot and sit and stand up
|
||||||
|
periodically in both simulation and experiment. We compare the measured motor
|
||||||
|
trajectories, torques and gains.
|
||||||
|
"""
|
||||||
|
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||||
|
render=True,
|
||||||
|
leg_model_enabled=False,
|
||||||
|
motor_velocity_limit=np.inf,
|
||||||
|
motor_overheat_protection=True,
|
||||||
|
accurate_motor_model_enabled=True,
|
||||||
|
motor_kp=1.20,
|
||||||
|
motor_kd=0.02,
|
||||||
|
on_rack=False)
|
||||||
|
steps = 1000
|
||||||
|
amplitude = 0.5
|
||||||
|
speed = 3
|
||||||
|
|
||||||
|
actions_and_observations = []
|
||||||
|
|
||||||
|
for step_counter in range(steps):
|
||||||
|
# Matches the internal timestep.
|
||||||
|
time_step = 0.01
|
||||||
|
t = step_counter * time_step
|
||||||
|
current_row = [t]
|
||||||
|
|
||||||
|
action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
|
||||||
|
current_row.extend(action)
|
||||||
|
|
||||||
|
observation, _, _, _ = environment.step(action)
|
||||||
|
current_row.extend(observation.tolist())
|
||||||
|
actions_and_observations.append(current_row)
|
||||||
|
|
||||||
|
environment.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def SinePolicyExample():
|
||||||
|
"""An example of minitaur walking with a sine gait."""
|
||||||
|
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||||
|
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||||
|
render=True,
|
||||||
|
motor_velocity_limit=np.inf,
|
||||||
|
pd_control_enabled=True,
|
||||||
|
hard_reset=False,
|
||||||
|
env_randomizer = randomizer,
|
||||||
|
on_rack=False)
|
||||||
|
sum_reward = 0
|
||||||
|
steps = 20000
|
||||||
|
amplitude_1_bound = 0.5
|
||||||
|
amplitude_2_bound = 0.5
|
||||||
|
speed = 40
|
||||||
|
|
||||||
|
for step_counter in range(steps):
|
||||||
|
time_step = 0.01
|
||||||
|
t = step_counter * time_step
|
||||||
|
|
||||||
|
amplitude1 = amplitude_1_bound
|
||||||
|
amplitude2 = amplitude_2_bound
|
||||||
|
steering_amplitude = 0
|
||||||
|
if t < 10:
|
||||||
|
steering_amplitude = 0.5
|
||||||
|
elif t < 20:
|
||||||
|
steering_amplitude = -0.5
|
||||||
|
else:
|
||||||
|
steering_amplitude = 0
|
||||||
|
|
||||||
|
# Applying asymmetrical sine gaits to different legs can steer the minitaur.
|
||||||
|
a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
|
||||||
|
a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
|
||||||
|
a3 = math.sin(t * speed) * amplitude2
|
||||||
|
a4 = math.sin(t * speed + math.pi) * amplitude2
|
||||||
|
action = [a1, a2, a2, a1, a3, a4, a4, a3]
|
||||||
|
_, reward, done, _ = environment.step(action)
|
||||||
|
sum_reward += reward
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
environment.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0)
|
||||||
|
args = parser.parse_args()
|
||||||
|
print("--env=" + str(args.env))
|
||||||
|
|
||||||
|
if (args.env == 0):
|
||||||
|
SinePolicyExample()
|
||||||
|
if (args.env == 1):
|
||||||
|
SineStandExample()
|
||||||
|
if (args.env == 2):
|
||||||
|
ResetPoseExample()
|
||||||
|
if (args.env == 3):
|
||||||
|
MotorOverheatExample()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user