fix a number of issues in a series of new Minitaur environments
add them to pybullet_envs through __init__.py
id='MinitaurReactiveEnv-v0',
id='MinitaurTrottingEnv-v0',
id='MinitaurBallGymEnv-v0',
id='MinitaurStandGymEnv-v0',
id='MinitaurAlternatingLegsEnv-v0',
id='MinitaurFourLegStandEnv-v0',
disable reflection of minitaur_four_leg_stand_env, since the floor changes orientation (reflection is a fixed plane with [0,0,1] normal)
from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv
from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv
from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv
from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv
from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv
from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv
This commit is contained in:
@@ -30,6 +30,53 @@ register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurReactiveEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
|
||||||
|
render=True,
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurBallGymEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurTrottingEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurStandGymEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurStandGymEnv',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurAlternatingLegsEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurAlternatingLegsEnv',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='MinitaurFourLegStandEnv-v0',
|
||||||
|
entry_point='pybullet_envs.minitaur.envs:MinitaurFourLegStandEnv',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='RacecarBulletEnv-v0',
|
id='RacecarBulletEnv-v0',
|
||||||
entry_point='pybullet_envs.bullet:RacecarGymEnv',
|
entry_point='pybullet_envs.bullet:RacecarGymEnv',
|
||||||
|
|||||||
1
examples/pybullet/gym/pybullet_envs/minitaur/__init__.py
Normal file
1
examples/pybullet/gym/pybullet_envs/minitaur/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
import gym
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
from pybullet_envs.minitaur.envs.minitaur_alternating_legs_env import MinitaurAlternatingLegsEnv
|
||||||
|
from pybullet_envs.minitaur.envs.minitaur_ball_gym_env import MinitaurBallGymEnv
|
||||||
|
#from pybullet_envs.minitaur.envs.minitaur_randomize_terrain_gym_env import MinitaurRandomizeTerrainGymEnv
|
||||||
|
from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
|
||||||
|
from pybullet_envs.minitaur.envs.minitaur_stand_gym_env import MinitaurStandGymEnv
|
||||||
|
from pybullet_envs.minitaur.envs.minitaur_trotting_env import MinitaurTrottingEnv
|
||||||
|
from pybullet_envs.minitaur.envs.minitaur_four_leg_stand_env import MinitaurFourLegStandEnv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -6,7 +6,7 @@ bent legs, desired_pitch from user input, battery voltage and motor damping.
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from pybullet_envs.minitaur.minitaur.envs import env_randomizer_base
|
from pybullet_envs.minitaur.envs import env_randomizer_base
|
||||||
|
|
||||||
# Absolute range.
|
# Absolute range.
|
||||||
NUM_LEGS = 4
|
NUM_LEGS = 4
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
"""
|
"""
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import minitaur_alternating_legs_env
|
import minitaur_alternating_legs_env
|
||||||
@@ -83,10 +83,10 @@ def hand_tuned_balance_example(log_path=None):
|
|||||||
log_path=log_path)
|
log_path=log_path)
|
||||||
np.random.seed(100)
|
np.random.seed(100)
|
||||||
avg_reward = 0
|
avg_reward = 0
|
||||||
for i in xrange(episodes):
|
for i in range(episodes):
|
||||||
sum_reward = 0
|
sum_reward = 0
|
||||||
observation = environment.reset()
|
observation = environment.reset()
|
||||||
for _ in xrange(steps):
|
for _ in range(steps):
|
||||||
# Sleep to prevent serial buffer overflow on microcontroller.
|
# Sleep to prevent serial buffer overflow on microcontroller.
|
||||||
time.sleep(0.002)
|
time.sleep(0.002)
|
||||||
action = hand_tuned_agent(observation,
|
action = hand_tuned_agent(observation,
|
||||||
@@ -101,7 +101,7 @@ def hand_tuned_balance_example(log_path=None):
|
|||||||
|
|
||||||
|
|
||||||
def main(unused_argv):
|
def main(unused_argv):
|
||||||
hand_tuned_balance_example(log_path=FLAGS.log_path)
|
hand_tuned_balance_example(log_path=os.getcwd())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -116,6 +116,7 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
|
|||||||
on_rack=on_rack,
|
on_rack=on_rack,
|
||||||
render=render,
|
render=render,
|
||||||
env_randomizer=env_randomizer,
|
env_randomizer=env_randomizer,
|
||||||
|
reflection = False,
|
||||||
log_path=log_path)
|
log_path=log_path)
|
||||||
|
|
||||||
action_dim = 4
|
action_dim = 4
|
||||||
@@ -196,6 +197,8 @@ class MinitaurFourLegStandEnv(minitaur_gym_env.MinitaurGymEnv):
|
|||||||
t = float(float(t) / float(MOVING_FLOOR_TOTAL_STEP))
|
t = float(float(t) / float(MOVING_FLOOR_TOTAL_STEP))
|
||||||
ori = map(operator.add, [x * (1.0 - t) for x in self._cur_ori],
|
ori = map(operator.add, [x * (1.0 - t) for x in self._cur_ori],
|
||||||
[x * t for x in self._goal_ori])
|
[x * t for x in self._goal_ori])
|
||||||
|
ori=list(ori)
|
||||||
|
print("ori=",ori)
|
||||||
self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], ori)
|
self._pybullet_client.resetBasePositionAndOrientation(0, [0, 0, 0], ori)
|
||||||
if self._env_step_counter % PERTURBATION_TOTAL_STEP == 0:
|
if self._env_step_counter % PERTURBATION_TOTAL_STEP == 0:
|
||||||
self._perturbation_magnitude = random.uniform(0.0, 0.0)
|
self._perturbation_magnitude = random.uniform(0.0, 0.0)
|
||||||
|
|||||||
@@ -40,10 +40,10 @@ def feed_forward_only_control_example(log_path=None):
|
|||||||
|
|
||||||
np.random.seed(100)
|
np.random.seed(100)
|
||||||
avg_reward = 0
|
avg_reward = 0
|
||||||
for i in xrange(episodes):
|
for i in range(episodes):
|
||||||
sum_reward = 0
|
sum_reward = 0
|
||||||
observation = environment.reset()
|
observation = environment.reset()
|
||||||
for _ in xrange(steps):
|
for _ in range(steps):
|
||||||
action = [0] * 4
|
action = [0] * 4
|
||||||
uroll = kroll * observation[0]
|
uroll = kroll * observation[0]
|
||||||
upitch = kpitch * observation[1]
|
upitch = kpitch * observation[1]
|
||||||
|
|||||||
@@ -92,6 +92,7 @@ class MinitaurGymEnv(gym.Env):
|
|||||||
control_time_step=None,
|
control_time_step=None,
|
||||||
env_randomizer=None,
|
env_randomizer=None,
|
||||||
forward_reward_cap=float("inf"),
|
forward_reward_cap=float("inf"),
|
||||||
|
reflection=True,
|
||||||
log_path=None):
|
log_path=None):
|
||||||
"""Initialize the minitaur gym environment.
|
"""Initialize the minitaur gym environment.
|
||||||
|
|
||||||
@@ -218,6 +219,7 @@ class MinitaurGymEnv(gym.Env):
|
|||||||
self._pd_latency = pd_latency
|
self._pd_latency = pd_latency
|
||||||
self._urdf_version = urdf_version
|
self._urdf_version = urdf_version
|
||||||
self._ground_id = None
|
self._ground_id = None
|
||||||
|
self._reflection = reflection
|
||||||
self._env_randomizers = convert_to_list(
|
self._env_randomizers = convert_to_list(
|
||||||
env_randomizer) if env_randomizer else []
|
env_randomizer) if env_randomizer else []
|
||||||
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
|
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
|
||||||
@@ -261,6 +263,9 @@ class MinitaurGymEnv(gym.Env):
|
|||||||
self._pybullet_client.setTimeStep(self._time_step)
|
self._pybullet_client.setTimeStep(self._time_step)
|
||||||
self._ground_id = self._pybullet_client.loadURDF(
|
self._ground_id = self._pybullet_client.loadURDF(
|
||||||
"%s/plane.urdf" % self._urdf_root)
|
"%s/plane.urdf" % self._urdf_root)
|
||||||
|
if (self._reflection):
|
||||||
|
self._pybullet_client.changeVisualShape(self._ground_id,-1,rgbaColor=[1,1,1,0.8])
|
||||||
|
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0)
|
||||||
self._pybullet_client.setGravity(0, 0, -10)
|
self._pybullet_client.setGravity(0, 0, -10)
|
||||||
acc_motor = self._accurate_motor_model_enabled
|
acc_motor = self._accurate_motor_model_enabled
|
||||||
motor_protect = self._motor_overheat_protection
|
motor_protect = self._motor_overheat_protection
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ class MinitaurLogging(object):
|
|||||||
tf.gfile.MakeDirs(self._log_path)
|
tf.gfile.MakeDirs(self._log_path)
|
||||||
ts = time.time()
|
ts = time.time()
|
||||||
time_stamp = datetime.datetime.fromtimestamp(ts).strftime(
|
time_stamp = datetime.datetime.fromtimestamp(ts).strftime(
|
||||||
"%Y-%m-%d-%H:%M:%S")
|
"%Y-%m-%d-%H%M%S")
|
||||||
log_path = os.path.join(self._log_path,
|
log_path = os.path.join(self._log_path,
|
||||||
"minitaur_log_{}".format(time_stamp))
|
"minitaur_log_{}".format(time_stamp))
|
||||||
with tf.gfile.Open(log_path, "w") as f:
|
with tf.gfile.Open(log_path, "w") as f:
|
||||||
|
|||||||
@@ -8,13 +8,12 @@ import numpy as np
|
|||||||
from pybullet_envs.minitaur.envs import minitaur
|
from pybullet_envs.minitaur.envs import minitaur
|
||||||
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
||||||
|
|
||||||
flags.DEFINE_string(
|
#flags.DEFINE_string(
|
||||||
'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj',
|
# 'terrain_dir', '/cns/od-d/home/brain-minitaur/terrain_obj',
|
||||||
'The directory which contains terrain obj files to be used.')
|
# 'The directory which contains terrain obj files to be used.')
|
||||||
flags.DEFINE_string('storage_dir', '/tmp',
|
#flags.DEFINE_string('storage_dir', '/tmp',
|
||||||
'The full path to the temporary directory to be used.')
|
# 'The full path to the temporary directory to be used.')
|
||||||
|
#FLAGS = flags.FLAGS
|
||||||
FLAGS = flags.FLAGS
|
|
||||||
|
|
||||||
|
|
||||||
class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv):
|
class MinitaurRandomizeTerrainGymEnv(minitaur_gym_env.MinitaurGymEnv):
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import time
|
|||||||
from gym import spaces
|
from gym import spaces
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
||||||
|
import pybullet_data
|
||||||
|
|
||||||
ACTION_EPS = 0.01
|
ACTION_EPS = 0.01
|
||||||
# RANGE_OF_LEG_MOTION defines how far legs can rotate in both directions
|
# RANGE_OF_LEG_MOTION defines how far legs can rotate in both directions
|
||||||
@@ -34,7 +35,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
urdf_root="third_party/bullet/data",
|
urdf_root=pybullet_data.getDataPath(),
|
||||||
action_repeat=1,
|
action_repeat=1,
|
||||||
observation_noise_stdev=minitaur_gym_env.SENSOR_NOISE_STDDEV,
|
observation_noise_stdev=minitaur_gym_env.SENSOR_NOISE_STDDEV,
|
||||||
self_collision_enabled=True,
|
self_collision_enabled=True,
|
||||||
@@ -81,7 +82,7 @@ class MinitaurStandGymEnv(minitaur_gym_env.MinitaurGymEnv):
|
|||||||
done: Whether the episode has ended.
|
done: Whether the episode has ended.
|
||||||
info: A dictionary that stores diagnostic information.
|
info: A dictionary that stores diagnostic information.
|
||||||
"""
|
"""
|
||||||
for t in xrange(5000):
|
for t in range(5000):
|
||||||
if self._is_render:
|
if self._is_render:
|
||||||
base_pos = self.minitaur.GetBasePosition()
|
base_pos = self.minitaur.GetBasePosition()
|
||||||
self._pybullet_client.resetDebugVisualizerCamera(
|
self._pybullet_client.resetDebugVisualizerCamera(
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ def StandUpExample():
|
|||||||
motor_velocity_limit=np.inf)
|
motor_velocity_limit=np.inf)
|
||||||
action = [0.5]
|
action = [0.5]
|
||||||
_, _, done, _ = environment.step(action)
|
_, _, done, _ = environment.step(action)
|
||||||
for t in xrange(steps):
|
for t in range(steps):
|
||||||
# A policy that oscillates between -1 and 1
|
# A policy that oscillates between -1 and 1
|
||||||
action = [math.sin(t * math.pi * 0.01)]
|
action = [math.sin(t * math.pi * 0.01)]
|
||||||
_, _, done, _ = environment.step(action)
|
_, _, done, _ = environment.step(action)
|
||||||
@@ -32,4 +32,4 @@ def main(unused_argv):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tf.logging.set_verbosity(tf.logging.INFO)
|
tf.logging.set_verbosity(tf.logging.INFO)
|
||||||
app.run()
|
main("unused")
|
||||||
|
|||||||
@@ -160,9 +160,9 @@ class MinitaurTrottingEnv(minitaur_gym_env.MinitaurGymEnv):
|
|||||||
"""
|
"""
|
||||||
motor_pose = np.zeros(NUM_MOTORS)
|
motor_pose = np.zeros(NUM_MOTORS)
|
||||||
for i in range(NUM_LEGS):
|
for i in range(NUM_LEGS):
|
||||||
motor_pose[2 * i] = leg_pose[NUM_LEGS + i] - (-1)**(i / 2) * leg_pose[i]
|
motor_pose[int(2 * i)] = leg_pose[NUM_LEGS + i] - (-1)**int(i / 2) * leg_pose[i]
|
||||||
motor_pose[2 * i
|
motor_pose[int(2 * i
|
||||||
+ 1] = leg_pose[NUM_LEGS + i] + (-1)**(i / 2) * leg_pose[i]
|
+ 1)] = leg_pose[NUM_LEGS + i] + (-1)**int(i / 2) * leg_pose[i]
|
||||||
return motor_pose
|
return motor_pose
|
||||||
|
|
||||||
def _gen_signal(self, t, phase):
|
def _gen_signal(self, t, phase):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
import time
|
import time
|
||||||
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
from pybullet_envs.minitaur.envs import minitaur_gym_env
|
||||||
@@ -18,14 +18,14 @@ def main(unused_argv):
|
|||||||
use_signal_in_observation=False,
|
use_signal_in_observation=False,
|
||||||
use_angle_in_observation=False,
|
use_angle_in_observation=False,
|
||||||
render=True,
|
render=True,
|
||||||
log_path=FLAGS.log_path)
|
log_path=os.getcwd())
|
||||||
|
|
||||||
np.random.seed(100)
|
np.random.seed(100)
|
||||||
sum_reward = 0
|
sum_reward = 0
|
||||||
environment.reset()
|
environment.reset()
|
||||||
|
|
||||||
steps = 5000
|
steps = 5000
|
||||||
for _ in xrange(steps):
|
for _ in range(steps):
|
||||||
# Sleep to prevent serial buffer overflow on microcontroller.
|
# Sleep to prevent serial buffer overflow on microcontroller.
|
||||||
time.sleep(0.002)
|
time.sleep(0.002)
|
||||||
action = [0] * 8
|
action = [0] * 8
|
||||||
|
|||||||
Reference in New Issue
Block a user