fix a number of issues in a series of new Minitaur environments

add them to pybullet_envs through __init__.py
    id='MinitaurReactiveEnv-v0',
    id='MinitaurTrottingEnv-v0',
    id='MinitaurBallGymEnv-v0',
    id='MinitaurStandGymEnv-v0',
    id='MinitaurAlternatingLegsEnv-v0',
    id='MinitaurFourLegStandEnv-v0',

disable reflection of minitaur_four_leg_stand_env, since the floor changes orientation (reflection is a fixed plane with [0,0,1] normal)

from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv
from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv
from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv
from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv
from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv
from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv
This commit is contained in:
erwincoumans
2018-04-11 10:09:03 -07:00
parent c2869e0a3c
commit 698b20938f
14 changed files with 91 additions and 25 deletions

View File

@@ -30,6 +30,53 @@ register(
)
register(
id='MinitaurReactiveEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
render=True,
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='MinitaurBallGymEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='MinitaurTrottingEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='MinitaurStandGymEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurStandGymEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='MinitaurAlternatingLegsEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurAlternatingLegsEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='MinitaurFourLegStandEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurFourLegStandEnv',
timestep_limit=1000,
reward_threshold=5.0,
)
register(
id='RacecarBulletEnv-v0',
entry_point='pybullet_envs.bullet:RacecarGymEnv',

View File

@@ -0,0 +1 @@
import gym

View File

@@ -0,0 +1,10 @@
from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv
from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv
#from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv
from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv
from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv
from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv

View File

@@ -6,7 +6,7 @@ bent legs, desired_pitch from user input, battery voltage and motor damping.
import numpy as np
import tensorflow as tf
from pybullet_envs.minitaur.minitaur.envs import env_randomizer_base
from pybullet_envs.minitaur.envs import env_randomizer_base
# Absolute range.
NUM_LEGS = 4

View File

@@ -3,7 +3,7 @@
"""
import time
import os
import numpy as np
import tensorflow as tf
import minitaur_alternating_legs_env
@@ -83,10 +83,10 @@ def hand_tuned_balance_example(log_path=None):
log_path=log_path)
np.random.seed(100)
avg_reward = 0
for i in xrange(episodes):
for i in range(episodes):
sum_reward = 0
observation = environment.reset()
for _ in xrange(steps):
for _ in range(steps):
# Sleep to prevent serial buffer overflow on microcontroller.
time.sleep(0.002)
action = hand_tuned_agent(observation,
@@ -101,7 +101,7 @@ def hand_tuned_balance_example(log_path=None):
def main(unused_argv):
hand_tuned_balance_example(log_path=FLAGS.log_path)
hand_tuned_balance_example(log_path=os.getcwd())
if __name__ == "__main__":

View File

@@ -116,6 +116,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
on_rack=on_rack,
render=render,
env_randomizer=env_randomizer,
reflection = False,
log_path=log_path)
action_dim = 4
@@ -196,6 +197,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
t = float(float(t) / float(MOVING_FLOOR_TOTAL_STEP))
ori = map(operator.add, [x * (1.0 - t) for x in self._cur_ori],
[x * t for x in self._goal_ori])
ori=list(ori)
print("ori=",ori)
self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], ori)
if self._env_step_counter % PERTURBATION_TOTAL_STEP == 0:
self._perturbation_magnitude = random.uniform(0.0, 0.0)

View File

@@ -40,10 +40,10 @@ def feed_forward_only_control_example(log_path=None):
np.random.seed(100)
avg_reward = 0
for i in xrange(episodes):
for i in range(episodes):
sum_reward = 0
observation = environment.reset()
for _ in xrange(steps):
for _ in range(steps):
action = [0] * 4
uroll = kroll * observation[0]
upitch = kpitch * observation[1]

View File

@@ -92,6 +92,7 @@ class MinitaurGymEnv(gym.Env):
control_time_step=None,
env_randomizer=None,
forward_reward_cap=float("inf"),
reflection=True,
log_path=None):
"""Initialize the minitaur gym environment.
@@ -218,6 +219,7 @@ class MinitaurGymEnv(gym.Env):
self._pd_latency = pd_latency
self._urdf_version = urdf_version
self._ground_id = None
self._reflection = reflection
self._env_randomizers = convert_to_list(
env_randomizer) if env_randomizer else []
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
@@ -261,6 +263,9 @@ class MinitaurGymEnv(gym.Env):
self._pybullet_client.setTimeStep(self._time_step)
self._ground_id = self._pybullet_client.loadURDF(
"%s/plane.urdf" % self._urdf_root)
if (self._reflection):
self._pybullet_client.changeVisualShape(self._ground_id,-1,rgbaColor=[1,1,1,0.8])
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0)
self._pybullet_client.setGravity(0, 0, -10)
acc_motor = self._accurate_motor_model_enabled
motor_protect = self._motor_overheat_protection

View File

@@ -120,7 +120,7 @@ class MinitaurLogging(object):
tf.gfile.MakeDirs(self._log_path)
ts = time.time()
time_stamp = datetime.datetime.fromtimestamp(ts).strftime(
"%Y-%m-%d-%H:%M:%S")
"%Y-%m-%d-%H%M%S")
log_path = os.path.join(self._log_path,
"minitaur_log_{}".format(time_stamp))
with tf.gfile.Open(log_path, "w") as f:

View File

@@ -8,13 +8,12 @@ import numpy as np
from pybullet_envs.minitaur.envs import minitaur
from pybullet_envs.minitaur.envs import minitaur_gym_env
flags.DEFINE_string(
'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj',
'The directory which contains terrain obj files to be used.')
flags.DEFINE_string('storage_dir', '/tmp',
'The full path to the temporary directory to be used.')
FLAGS = flags.FLAGS
#flags.DEFINE_string(
# 'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj',
# 'The directory which contains terrain obj files to be used.')
#flags.DEFINE_string('storage_dir', '/tmp',
# 'The full path to the temporary directory to be used.')
#FLAGS = flags.FLAGS
class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv):

View File

@@ -8,6 +8,7 @@ import time
from gym import spaces
import numpy as np
from pybullet_envs.minitaur.envs import minitaur_gym_env
import pybullet_data
ACTION_EPS = 0.01
# RANGE_OF_LEG_MOTION defines how far legs can rotate in both directions
@@ -34,7 +35,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
}
def __init__(self,
urdf_root="third_party/bullet/data",
urdf_root=pybullet_data.getDataPath(),
action_repeat=1,
observation_noise_stdev=minitaur_gym_env.SENSOR_NOISE_STDDEV,
self_collision_enabled=True,
@@ -81,7 +82,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
done: Whether the episode has ended.
info: A dictionary that stores diagnostic information.
"""
for t in xrange(5000):
for t in range(5000):
if self._is_render:
base_pos = self.minitaur.GetBasePosition()
self._pybullet_client.resetDebugVisualizerCamera(

View File

@@ -18,7 +18,7 @@ def StandUpExample():
motor_velocity_limit=np.inf)
action = [0.5]
_, _, done, _ = environment.step(action)
for t in xrange(steps):
for t in range(steps):
# A policy that oscillates between -1 and 1
action = [math.sin(t * math.pi * 0.01)]
_, _, done, _ = environment.step(action)
@@ -32,4 +32,4 @@ def main(unused_argv):
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
app.run()
main("unused")

View File

@@ -160,9 +160,9 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
"""
motor_pose = np.zeros(NUM_MOTORS)
for i in range(NUM_LEGS):
motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
motor_pose[2 * i
+ 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
motor_pose[int(2 * i)] = leg_pose[NUM_LEGS + i] - (-1)**int(i / 2) * leg_pose[i]
motor_pose[int(2 * i
+ 1)] = leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i]
return motor_pose
def _gen_signal(self, t, phase):

View File

@@ -2,7 +2,7 @@
"""
import time
import os
import numpy as np
import tensorflow as tf
from pybullet_envs.minitaur.envs import minitaur_gym_env
@@ -18,14 +18,14 @@ def main(unused_argv):
use_signal_in_observation=False,
use_angle_in_observation=False,
render=True,
log_path=FLAGS.log_path)
log_path=os.getcwd())
np.random.seed(100)
sum_reward = 0
environment.reset()
steps = 5000
for _ in xrange(steps):
for _ in range(steps):
# Sleep to prevent serial buffer overflow on microcontroller.
time.sleep(0.002)
action = [0] * 8