deepmimic fix: reference initialization wasn't spread over all frames

2019-02-01 17:18:13 -08:00
parent 7cc2685faf
commit 1cef924973
5 changed files with 62 additions and 7 deletions
--- a/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt
+++ b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt
@@ -0,0 +1,52 @@
 {
 	"AgentType": "PPO",
 	"ActorNet": "fc_2layers_1024units",
 	"ActorStepsize": 0.0000025,
 	"ActorMomentum": 0.9,
 	"ActorWeightDecay": 0.0005,
 	"ActorInitOutputScale": 0.01,
 	"CriticNet": "fc_2layers_1024units",
 	"CriticStepsize": 0.01,
 	"CriticMomentum": 0.9,
 	"CriticWeightDecay": 0,
 	"UpdatePeriod": 1,
 	"ItersPerUpdate": 1,
 	"Discount": 0.95,
 	"BatchSize": 4096,
 	"MiniBatchSize": 256,
 	"Epochs": 1,
 	"ReplayBufferSize": 500000,
 	"InitSamples": 1,
 	"NormalizerSamples": 1000000,
 	"RatioClip": 0.2,
 	"NormAdvClip": 4,
 	"TDLambda": 0.95,
 	"OutputIters": 10,
 	"IntOutputIters": 400,
 	"TestEpisodes": 32,
 	"ExpAnnealSamples": 64000000,
 	"ExpParamsBeg":
 	{
 		"Rate": 1,
 		"InitActionRate": 1,
 		"Noise": 0.05,
 		"NoiseInternal": 0,
 		"Temp": 0.1
 	},
 	"ExpParamsEnd":
 	{
 		"Rate": 0.2,
 		"InitActionRate": 0.01,
 		"Noise": 0.05,
 		"NoiseInternal": 0,
 		"Temp": 0.001
 	}
 }
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
@@ -68,7 +68,7 @@ class HumanoidStablePD(object):
    self.resetPose()
  def resetPose(self):
-    print("resetPose with self._frameFraction=",self._frameFraction)
+    print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
    pose = self.computePose(self._frameFraction)
    self.initializePose(self._poseInterpolator, self._sim_model, initBase=True)
    self.initializePose(self._poseInterpolator, self._kin_model, initBase=False)
@@ -126,7 +126,6 @@ class HumanoidStablePD(object):
    keyFrameDuration = self._mocap_data.KeyFrameDuraction()
    cycleTime = self.getCycleTime()
    #print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
    #print("cycleTime=",cycleTime)
    cycles = self.calcCycleCount(t, cycleTime)
    #print("cycles=",cycles)
    frameTime = t - cycles*cycleTime
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
@@ -64,8 +64,12 @@ class PyBulletDeepMimicEnv(Env):
              #self._humanoid.applyPDForces(taus)
              #self._pybullet_client.stepSimulation()
            time.sleep(timeStep)
-
+      #print("numframes = ", self._humanoid._mocap_data.NumFrames())
      startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
      rnrange = 1000
      rn = random.randint(0,rnrange)
      startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
      self._humanoid.setSimTime(startTime)
      self._humanoid.resetPose()
      #this clears the contact points. Todo: add API to explicitly clear all contact points?
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py
@@ -3,6 +3,7 @@ import learning.agent_builder as AgentBuilder
 import learning.tf_util as TFUtil
 from learning.rl_agent import RLAgent
 from pybullet_utils.logger import Logger
 import pybullet_data
 class RLWorld(object):
    def __init__(self, env, arg_parser):
@@ -79,7 +80,7 @@ class RLWorld(object):
                if (len(model_files) > 0):
                    curr_model_file = model_files[i]
                    if curr_model_file != 'none':
-                        curr_agent.load_model(curr_model_file)
+                        curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
            self.agents.append(curr_agent)
            Logger.print2('')
@@ -139,4 +140,4 @@ class RLWorld(object):
            assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
        return agent
-        
+        
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
@@ -30,7 +30,7 @@ def build_arg_parser(args):
 args = sys.argv[1:]
 arg_parser = build_arg_parser(args)
-render=True
+render=False#True
 env = PyBulletDeepMimicEnv (args,render)
 world = RLWorld(env, arg_parser)
@@ -57,7 +57,6 @@ with open(agent_files) as data_file:
    agent.set_enable_training(True)
    world.reset()
    while (world.env._pybullet_client.isConnected()):
      timeStep = 1./600.