deepmimic fix: reference initialization wasn't spread over all frames

2019-02-01 17:18:13 -08:00
parent 7cc2685faf
commit 1cef924973
5 changed files with 62 additions and 7 deletions
--- a/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt
+++ b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt
@@ -0,0 +1,52 @@
+{
+	"AgentType": "PPO",
+
+	"ActorNet": "fc_2layers_1024units",
+	"ActorStepsize": 0.0000025,
+	"ActorMomentum": 0.9,
+	"ActorWeightDecay": 0.0005,
+	"ActorInitOutputScale": 0.01,
+
+	"CriticNet": "fc_2layers_1024units",
+	"CriticStepsize": 0.01,
+	"CriticMomentum": 0.9,
+	"CriticWeightDecay": 0,
+
+	"UpdatePeriod": 1,
+	"ItersPerUpdate": 1,
+	"Discount": 0.95,
+	"BatchSize": 4096,
+	"MiniBatchSize": 256,
+	"Epochs": 1,
+	"ReplayBufferSize": 500000,
+	"InitSamples": 1,
+	"NormalizerSamples": 1000000,
+
+	"RatioClip": 0.2,
+	"NormAdvClip": 4,
+	"TDLambda": 0.95,
+	
+	"OutputIters": 10,
+	"IntOutputIters": 400,
+	"TestEpisodes": 32,
+
+	"ExpAnnealSamples": 64000000,
+	
+	"ExpParamsBeg":
+	{
+		"Rate": 1,
+		"InitActionRate": 1,
+		"Noise": 0.05,
+		"NoiseInternal": 0,
+		"Temp": 0.1
+	},
+
+	"ExpParamsEnd":
+	{
+		"Rate": 0.2,
+		"InitActionRate": 0.01,
+		"Noise": 0.05,
+		"NoiseInternal": 0,
+		"Temp": 0.001
+	}
+}
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
@@ -68,7 +68,7 @@ class HumanoidStablePD(object):
    self.resetPose()
    
  def resetPose(self):
-    print("resetPose with self._frameFraction=",self._frameFraction)
+    print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
    pose = self.computePose(self._frameFraction)
    self.initializePose(self._poseInterpolator, self._sim_model, initBase=True)
    self.initializePose(self._poseInterpolator, self._kin_model, initBase=False)
@@ -126,7 +126,6 @@ class HumanoidStablePD(object):
    keyFrameDuration = self._mocap_data.KeyFrameDuraction()
    cycleTime = self.getCycleTime()
    #print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
-    #print("cycleTime=",cycleTime)
    cycles = self.calcCycleCount(t, cycleTime)
    #print("cycles=",cycles)
    frameTime = t - cycles*cycleTime
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
@@ -64,8 +64,12 @@ class PyBulletDeepMimicEnv(Env):
              #self._humanoid.applyPDForces(taus)
              #self._pybullet_client.stepSimulation()
            time.sleep(timeStep)
-
+      #print("numframes = ", self._humanoid._mocap_data.NumFrames())
      startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
+      rnrange = 1000
+      rn = random.randint(0,rnrange)
+      startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
+
      self._humanoid.setSimTime(startTime)
      self._humanoid.resetPose()
      #this clears the contact points. Todo: add API to explicitly clear all contact points?
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
@@ -3,6 +3,7 @@ import learning.agent_builder as AgentBuilder
 import learning.tf_util as TFUtil
 from learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
+import pybullet_data

 class RLWorld(object):
    def __init__(self, env, arg_parser):
@@ -79,7 +80,7 @@ class RLWorld(object):
                if (len(model_files) > 0):
                    curr_model_file = model_files[i]
                    if curr_model_file != 'none':
-                        curr_agent.load_model(curr_model_file)
+                        curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)

            self.agents.append(curr_agent)
            Logger.print2('')
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
@@ -30,7 +30,7 @@ def build_arg_parser(args):
 args = sys.argv[1:]
 arg_parser = build_arg_parser(args)

-render=True
+render=False#True
 env = PyBulletDeepMimicEnv (args,render)
 
 world = RLWorld(env, arg_parser)
@@ -57,7 +57,6 @@ with open(agent_files) as data_file:
    
    agent.set_enable_training(True)
    world.reset()
-
    while (world.env._pybullet_client.isConnected()):

      timeStep = 1./600.