add minitaur environment randomizer, enable it by default

this improves PPO training ( more stable)
This commit is contained in:
Erwin Coumans
2017-09-27 09:07:21 -07:00
parent 12f28c5f76
commit c37919604f
4 changed files with 262 additions and 1 deletions

View File

@@ -0,0 +1,25 @@
"""Abstract base class for environment randomizer."""
import abc
class EnvRandomizerBase(object):
"""Abstract base class for environment randomizer.
An EnvRandomizer is called in environment.reset(). It will
randomize physical parameters of the objects in the simulation.
The physical parameters will be fixed for that episode and be
randomized again in the next environment.reset().
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def randomize_env(self, env):
"""Randomize the simulated_objects in the environment.
Args:
env: The environment to be randomized.
"""
pass

View File

@@ -0,0 +1,68 @@
"""Randomize the minitaur_gym_env when reset() is called."""
import random
import numpy as np
from . import env_randomizer_base
# Relative range.
MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
# Absolute range.
BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel)
MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless
class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
"""A randomizer that change the minitaur_gym_env during every reset."""
def __init__(self,
minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
battery_voltage_range=BATTERY_VOLTAGE_RANGE,
motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
self._battery_voltage_range = battery_voltage_range
self._motor_viscous_damping_range = motor_viscous_damping_range
def randomize_env(self, env):
self._randomize_minitaur(env.minitaur)
def _randomize_minitaur(self, minitaur):
"""Randomize various physical properties of minitaur.
It randomizes the mass/inertia of the base, mass/inertia of the legs,
friction coefficient of the feet, the battery voltage and the motor damping
at each reset() of the environment.
Args:
minitaur: the Minitaur instance in minitaur_gym_env environment.
"""
base_mass = minitaur.GetBaseMassFromURDF()
randomized_base_mass = random.uniform(
base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
minitaur.SetBaseMass(randomized_base_mass)
leg_masses = minitaur.GetLegMassesFromURDF()
leg_masses_lower_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[1])
randomized_leg_masses = [
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
for i in range(len(leg_masses))
]
minitaur.SetLegMasses(randomized_leg_masses)
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
BATTERY_VOLTAGE_RANGE[1])
minitaur.SetBatteryVoltage(randomized_battery_voltage)
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
MOTOR_VISCOUS_DAMPING_RANGE[1])
minitaur.SetMotorViscousDamping(randomized_motor_damping)
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
MINITAUR_LEG_FRICTION[1])
minitaur.SetFootFriction(randomized_foot_friction)

View File

@@ -19,6 +19,7 @@ from . import bullet_client
from . import minitaur
import os
import pybullet_data
from . import minitaur_env_randomizer
NUM_SUBSTEPS = 5
NUM_MOTORS = 8
@@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=None):
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
"""Initialize the minitaur gym environment.
Args: