From 698b20938f93e1aa63befcd44ce7283c948835d0 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 11 Apr 2018 10:09:03 -0700 Subject: [PATCH] fix a number of issues in a series of new Minitaur environments add them to pybullet_envs through __init__.py id='MinitaurReactiveEnv-v0', id='MinitaurTrottingEnv-v0', id='MinitaurBallGymEnv-v0', id='MinitaurStandGymEnv-v0', id='MinitaurAlternatingLegsEnv-v0', id='MinitaurFourLegStandEnv-v0', disable reflection of minitaur_four_leg_stand_env, since the floor changes orientation (reflection is a fixed plane with [0,0,1] normal) from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv --- .../pybullet/gym/pybullet_envs/__init__.py | 47 +++++++++++++++++++ .../gym/pybullet_envs/minitaur/__init__.py | 1 + .../pybullet_envs/minitaur/envs/__init__.py | 10 ++++ ...initaur_alternating_legs_env_randomizer.py | 2 +- .../minitaur_alternating_legs_env_example.py | 8 ++-- .../envs/minitaur_four_leg_stand_env.py | 3 ++ .../minitaur_four_leg_stand_env_example.py | 4 +- .../minitaur/envs/minitaur_gym_env.py | 5 ++ .../minitaur/envs/minitaur_logging.py | 2 +- .../minitaur_randomize_terrain_gym_env.py | 13 +++-- .../minitaur/envs/minitaur_stand_gym_env.py | 5 +- .../envs/minitaur_stand_gym_env_example.py | 4 +- .../minitaur/envs/minitaur_trotting_env.py | 6 +-- .../envs/minitaur_trotting_env_example.py | 6 +-- 14 files changed, 91 insertions(+), 25 deletions(-) create mode 100644 examples/pybullet/gym/pybullet_envs/minitaur/__init__.py create mode 100644 examples/pybullet/gym/pybullet_envs/minitaur/envs/__init__.py diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index 5dddff7e9..97059fd8c 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -30,6 +30,53 @@ register( ) +register( + id='MinitaurReactiveEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv', + render=True, + timestep_limit=1000, + reward_threshold=5.0, +) + + +register( + id='MinitaurBallGymEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + + +register( + id='MinitaurTrottingEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + +register( + id='MinitaurStandGymEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurStandGymEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + +register( + id='MinitaurAlternatingLegsEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurAlternatingLegsEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + +register( + id='MinitaurFourLegStandEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurFourLegStandEnv', + timestep_limit=1000, + reward_threshold=5.0, +) + + + register( id='RacecarBulletEnv-v0', entry_point='pybullet_envs.bullet:RacecarGymEnv', diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/__init__.py b/examples/pybullet/gym/pybullet_envs/minitaur/__init__.py new file mode 100644 index 000000000..39e44ec8a --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/minitaur/__init__.py @@ -0,0 +1 @@ +import gym diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/__init__.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/__init__.py new file mode 100644 index 000000000..37def777d --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/__init__.py @@ -0,0 +1,10 @@ +from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv +from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv +#from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv +from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv +from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv +from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv +from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv + + + diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/env_randomizers/minitaur_alternating_legs_env_randomizer.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/env_randomizers/minitaur_alternating_legs_env_randomizer.py index 8dcb1f688..ec1a94fc0 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/env_randomizers/minitaur_alternating_legs_env_randomizer.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/env_randomizers/minitaur_alternating_legs_env_randomizer.py @@ -6,7 +6,7 @@ bent legs, desired_pitch from user input, battery voltage and motor damping. import numpy as np import tensorflow as tf -from pybullet_envs.minitaur.minitaur.envs import env_randomizer_base +from pybullet_envs.minitaur.envs import env_randomizer_base # Absolute range. NUM_LEGS = 4 diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py index aa467133c..1ac2198d0 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_alternating_legs_env_example.py @@ -3,7 +3,7 @@ """ import time - +import os import numpy as np import tensorflow as tf import minitaur_alternating_legs_env @@ -83,10 +83,10 @@ def hand_tuned_balance_example(log_path=None): log_path=log_path) np.random.seed(100) avg_reward = 0 - for i in xrange(episodes): + for i in range(episodes): sum_reward = 0 observation = environment.reset() - for _ in xrange(steps): + for _ in range(steps): # Sleep to prevent serial buffer overflow on microcontroller. time.sleep(0.002) action = hand_tuned_agent(observation, @@ -101,7 +101,7 @@ def hand_tuned_balance_example(log_path=None): def main(unused_argv): - hand_tuned_balance_example(log_path=FLAGS.log_path) + hand_tuned_balance_example(log_path=os.getcwd()) if __name__ == "__main__": diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py index 12a792206..e7778d028 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env.py @@ -116,6 +116,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv): on_rack=on_rack, render=render, env_randomizer=env_randomizer, + reflection = False, log_path=log_path) action_dim = 4 @@ -196,6 +197,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv): t = float(float(t) / float(MOVING_FLOOR_TOTAL_STEP)) ori = map(operator.add, [x * (1.0 - t) for x in self._cur_ori], [x * t for x in self._goal_ori]) + ori=list(ori) + print("ori=",ori) self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], ori) if self._env_step_counter % PERTURBATION_TOTAL_STEP == 0: self._perturbation_magnitude = random.uniform(0.0, 0.0) diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env_example.py index 4893c30f7..c154e879a 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env_example.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_four_leg_stand_env_example.py @@ -40,10 +40,10 @@ def feed_forward_only_control_example(log_path=None): np.random.seed(100) avg_reward = 0 - for i in xrange(episodes): + for i in range(episodes): sum_reward = 0 observation = environment.reset() - for _ in xrange(steps): + for _ in range(steps): action = [0] * 4 uroll = kroll * observation[0] upitch = kpitch * observation[1] diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py index 189accae2..ba7d53f5a 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_gym_env.py @@ -92,6 +92,7 @@ class MinitaurGymEnv(gym.Env): control_time_step=None, env_randomizer=None, forward_reward_cap=float("inf"), + reflection=True, log_path=None): """Initialize the minitaur gym environment. @@ -218,6 +219,7 @@ class MinitaurGymEnv(gym.Env): self._pd_latency = pd_latency self._urdf_version = urdf_version self._ground_id = None + self._reflection = reflection self._env_randomizers = convert_to_list( env_randomizer) if env_randomizer else [] self._episode_proto = minitaur_logging_pb2.MinitaurEpisode() @@ -261,6 +263,9 @@ class MinitaurGymEnv(gym.Env): self._pybullet_client.setTimeStep(self._time_step) self._ground_id = self._pybullet_client.loadURDF( "%s/plane.urdf" % self._urdf_root) + if (self._reflection): + self._pybullet_client.changeVisualShape(self._ground_id,-1,rgbaColor=[1,1,1,0.8]) + self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0) self._pybullet_client.setGravity(0, 0, -10) acc_motor = self._accurate_motor_model_enabled motor_protect = self._motor_overheat_protection diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_logging.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_logging.py index f7022b728..0c426ff29 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_logging.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_logging.py @@ -120,7 +120,7 @@ class MinitaurLogging(object): tf.gfile.MakeDirs(self._log_path) ts = time.time() time_stamp = datetime.datetime.fromtimestamp(ts).strftime( - "%Y-%m-%d-%H:%M:%S") + "%Y-%m-%d-%H%M%S") log_path = os.path.join(self._log_path, "minitaur_log_{}".format(time_stamp)) with tf.gfile.Open(log_path, "w") as f: diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_randomize_terrain_gym_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_randomize_terrain_gym_env.py index 4b8056175..1f30ae2f3 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_randomize_terrain_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_randomize_terrain_gym_env.py @@ -8,13 +8,12 @@ import numpy as np from pybullet_envs.minitaur.envs import minitaur from pybullet_envs.minitaur.envs import minitaur_gym_env -flags.DEFINE_string( - 'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj', - 'The directory which contains terrain obj files to be used.') -flags.DEFINE_string('storage_dir', '/tmp', - 'The full path to the temporary directory to be used.') - -FLAGS = flags.FLAGS +#flags.DEFINE_string( +# 'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj', +# 'The directory which contains terrain obj files to be used.') +#flags.DEFINE_string('storage_dir', '/tmp', +# 'The full path to the temporary directory to be used.') +#FLAGS = flags.FLAGS class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv): diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env.py index 76fa9d2ba..f2a589d40 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env.py @@ -8,6 +8,7 @@ import time from gym import spaces import numpy as np from pybullet_envs.minitaur.envs import minitaur_gym_env +import pybullet_data ACTION_EPS = 0.01 # RANGE_OF_LEG_MOTION defines how far legs can rotate in both directions @@ -34,7 +35,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv): } def __init__(self, - urdf_root="third_party/bullet/data", + urdf_root=pybullet_data.getDataPath(), action_repeat=1, observation_noise_stdev=minitaur_gym_env.SENSOR_NOISE_STDDEV, self_collision_enabled=True, @@ -81,7 +82,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv): done: Whether the episode has ended. info: A dictionary that stores diagnostic information. """ - for t in xrange(5000): + for t in range(5000): if self._is_render: base_pos = self.minitaur.GetBasePosition() self._pybullet_client.resetDebugVisualizerCamera( diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env_example.py index ff7ed2948..848c39d4a 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env_example.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_stand_gym_env_example.py @@ -18,7 +18,7 @@ def StandUpExample(): motor_velocity_limit=np.inf) action = [0.5] _, _, done, _ = environment.step(action) - for t in xrange(steps): + for t in range(steps): # A policy that oscillates between -1 and 1 action = [math.sin(t * math.pi * 0.01)] _, _, done, _ = environment.step(action) @@ -32,4 +32,4 @@ def main(unused_argv): if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) - app.run() + main("unused") diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py index 3b5ff9e83..bad90e36f 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env.py @@ -160,9 +160,9 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv): """ motor_pose = np.zeros(NUM_MOTORS) for i in range(NUM_LEGS): - motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i] - motor_pose[2 * i - + 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i] + motor_pose[int(2 * i)] = leg_pose[NUM_LEGS + i] - (-1)**int(i / 2) * leg_pose[i] + motor_pose[int(2 * i + + 1)] = leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i] return motor_pose def _gen_signal(self, t, phase): diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py index 1edba8727..5dc57add6 100644 --- a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_trotting_env_example.py @@ -2,7 +2,7 @@ """ import time - +import os import numpy as np import tensorflow as tf from pybullet_envs.minitaur.envs import minitaur_gym_env @@ -18,14 +18,14 @@ def main(unused_argv): use_signal_in_observation=False, use_angle_in_observation=False, render=True, - log_path=FLAGS.log_path) + log_path=os.getcwd()) np.random.seed(100) sum_reward = 0 environment.reset() steps = 5000 - for _ in xrange(steps): + for _ in range(steps): # Sleep to prevent serial buffer overflow on microcontroller. time.sleep(0.002) action = [0] * 8