From 1cef924973bde24aa56e955bf3c98d25ae105e8d Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Fri, 1 Feb 2019 17:18:13 -0800 Subject: [PATCH] deepmimic fix: reference initialization wasn't spread over all frames --- .../data/agents/ct_agent_humanoid_ppo.txt | 52 +++++++++++++++++++ .../deep_mimic/env/humanoid_stable_pd.py | 3 +- .../deep_mimic/env/pybullet_deep_mimic_env.py | 6 ++- .../deep_mimic/learning/rl_world.py | 5 +- .../gym/pybullet_envs/deep_mimic/testrl.py | 3 +- 5 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt diff --git a/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt new file mode 100644 index 000000000..ebc90c946 --- /dev/null +++ b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt @@ -0,0 +1,52 @@ +{ + "AgentType": "PPO", + + "ActorNet": "fc_2layers_1024units", + "ActorStepsize": 0.0000025, + "ActorMomentum": 0.9, + "ActorWeightDecay": 0.0005, + "ActorInitOutputScale": 0.01, + + "CriticNet": "fc_2layers_1024units", + "CriticStepsize": 0.01, + "CriticMomentum": 0.9, + "CriticWeightDecay": 0, + + "UpdatePeriod": 1, + "ItersPerUpdate": 1, + "Discount": 0.95, + "BatchSize": 4096, + "MiniBatchSize": 256, + "Epochs": 1, + "ReplayBufferSize": 500000, + "InitSamples": 1, + "NormalizerSamples": 1000000, + + "RatioClip": 0.2, + "NormAdvClip": 4, + "TDLambda": 0.95, + + "OutputIters": 10, + "IntOutputIters": 400, + "TestEpisodes": 32, + + "ExpAnnealSamples": 64000000, + + "ExpParamsBeg": + { + "Rate": 1, + "InitActionRate": 1, + "Noise": 0.05, + "NoiseInternal": 0, + "Temp": 0.1 + }, + + "ExpParamsEnd": + { + "Rate": 0.2, + "InitActionRate": 0.01, + "Noise": 0.05, + "NoiseInternal": 0, + "Temp": 0.001 + } +} \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py index 464593650..3239a7e6c 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py @@ -68,7 +68,7 @@ class HumanoidStablePD(object): self.resetPose() def resetPose(self): - print("resetPose with self._frameFraction=",self._frameFraction) + print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction) pose = self.computePose(self._frameFraction) self.initializePose(self._poseInterpolator, self._sim_model, initBase=True) self.initializePose(self._poseInterpolator, self._kin_model, initBase=False) @@ -126,7 +126,6 @@ class HumanoidStablePD(object): keyFrameDuration = self._mocap_data.KeyFrameDuraction() cycleTime = self.getCycleTime() #print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames()) - #print("cycleTime=",cycleTime) cycles = self.calcCycleCount(t, cycleTime) #print("cycles=",cycles) frameTime = t - cycles*cycleTime diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py index 380c90cbc..65276cf5d 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py @@ -64,8 +64,12 @@ class PyBulletDeepMimicEnv(Env): #self._humanoid.applyPDForces(taus) #self._pybullet_client.stepSimulation() time.sleep(timeStep) - + #print("numframes = ", self._humanoid._mocap_data.NumFrames()) startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2) + rnrange = 1000 + rn = random.randint(0,rnrange) + startTime = float(rn)/rnrange * self._humanoid.getCycleTime() + self._humanoid.setSimTime(startTime) self._humanoid.resetPose() #this clears the contact points. Todo: add API to explicitly clear all contact points? diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py index 2bd505abc..3fd7bee76 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py @@ -3,6 +3,7 @@ import learning.agent_builder as AgentBuilder import learning.tf_util as TFUtil from learning.rl_agent import RLAgent from pybullet_utils.logger import Logger +import pybullet_data class RLWorld(object): def __init__(self, env, arg_parser): @@ -79,7 +80,7 @@ class RLWorld(object): if (len(model_files) > 0): curr_model_file = model_files[i] if curr_model_file != 'none': - curr_agent.load_model(curr_model_file) + curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file) self.agents.append(curr_agent) Logger.print2('') @@ -139,4 +140,4 @@ class RLWorld(object): assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file) return agent - \ No newline at end of file + diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py index 2d82e4521..f30ec7ac6 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py @@ -30,7 +30,7 @@ def build_arg_parser(args): args = sys.argv[1:] arg_parser = build_arg_parser(args) -render=True +render=False#True env = PyBulletDeepMimicEnv (args,render) world = RLWorld(env, arg_parser) @@ -57,7 +57,6 @@ with open(agent_files) as data_file: agent.set_enable_training(True) world.reset() - while (world.env._pybullet_client.isConnected()): timeStep = 1./600.