diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py new file mode 100644 index 000000000..bc078323e --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py @@ -0,0 +1,52 @@ +import numpy as np +import sys +import os +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0,parentdir) +print("parentdir=",parentdir) + + +from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv +from pybullet_envs.deep_mimic.learning.rl_world import RLWorld +from pybullet_utils.logger import Logger +from testrl import update_world, update_timestep, build_world +import pybullet_utils.mpi_util as MPIUtil + +args = [] +world = None + +def run(): + global update_timestep + global world + + done = False + while not (done): + update_world(world, update_timestep) + + return + +def shutdown(): + global world + + Logger.print2('Shutting down...') + world.shutdown() + return + +def main(): + global args + global world + + # Command line arguments + args = sys.argv[1:] + + world = build_world(args, enable_draw=False) + + run() + shutdown() + + return + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py index 65276cf5d..272c9df13 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py @@ -248,7 +248,7 @@ class PyBulletDeepMimicEnv(Env): isEnded = self._humanoid.terminates() #also check maximum time, 20 seconds (todo get from file) #print("self.t=",self.t) - if (self.t>3): + if (self.t>20): isEnded = True return isEnded diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py new file mode 100644 index 000000000..85d70abb1 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py @@ -0,0 +1,23 @@ +import sys +import subprocess +from pybullet_utils.arg_parser import ArgParser +from pybullet_utils.logger import Logger + +def main(): + # Command line arguments + args = sys.argv[1:] + arg_parser = ArgParser() + arg_parser.load_args(args) + + num_workers = arg_parser.parse_int('num_workers', 1) + assert(num_workers > 0) + + Logger.print2('Running with {:d} workers'.format(num_workers)) + cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers) + cmd += ' '.join(args) + Logger.print2('cmd: ' + cmd) + subprocess.call(cmd, shell=True) + return + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py index f30ec7ac6..d8b6f90b2 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py @@ -15,6 +15,18 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim import sys import random + +def update_world(world, time_elapsed): + timeStep = 1./600. + world.update(timeStep) + reward = world.env.calc_reward(agent_id=0) + #print("reward=",reward) + end_episode = world.env.is_episode_end() + if (end_episode): + world.end_episode() + world.reset() + return + def build_arg_parser(args): arg_parser = ArgParser() arg_parser.load_args(args) @@ -28,43 +40,40 @@ def build_arg_parser(args): return arg_parser args = sys.argv[1:] -arg_parser = build_arg_parser(args) -render=False#True -env = PyBulletDeepMimicEnv (args,render) - -world = RLWorld(env, arg_parser) -motion_file = arg_parser.parse_string("motion_file") -print("motion_file=",motion_file) -bodies = arg_parser.parse_ints("fall_contact_bodies") -print("bodies=",bodies) -int_output_path = arg_parser.parse_string("int_output_path") -print("int_output_path=",int_output_path) -agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files") +def build_world(args, enable_draw, playback_speed=1): + arg_parser = build_arg_parser(args) + env = PyBulletDeepMimicEnv(args, enable_draw) + world = RLWorld(env, arg_parser) + #world.env.set_playback_speed(playback_speed) -AGENT_TYPE_KEY = "AgentType" + motion_file = arg_parser.parse_string("motion_file") + print("motion_file=",motion_file) + bodies = arg_parser.parse_ints("fall_contact_bodies") + print("bodies=",bodies) + int_output_path = arg_parser.parse_string("int_output_path") + print("int_output_path=",int_output_path) + agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files") -print("agent_file=",agent_files) -with open(agent_files) as data_file: - json_data = json.load(data_file) - print("json_data=",json_data) - assert AGENT_TYPE_KEY in json_data - agent_type = json_data[AGENT_TYPE_KEY] - print("agent_type=",agent_type) - agent = PPOAgent(world, id, json_data) + AGENT_TYPE_KEY = "AgentType" + + print("agent_file=",agent_files) + with open(agent_files) as data_file: + json_data = json.load(data_file) + print("json_data=",json_data) + assert AGENT_TYPE_KEY in json_data + agent_type = json_data[AGENT_TYPE_KEY] + print("agent_type=",agent_type) + agent = PPOAgent(world, id, json_data) - agent.set_enable_training(True) - world.reset() - while (world.env._pybullet_client.isConnected()): - - timeStep = 1./600. - world.update(timeStep) - reward = world.env.calc_reward(agent_id=0) - #print("reward=",reward) - - end_episode = world.env.is_episode_end() - if (end_episode): - world.end_episode() + agent.set_enable_training(True) world.reset() + return world + +world = build_world(args, True) +while (world.env._pybullet_client.isConnected()): + timeStep = 1./600. + update_world(world, timeStep) +