deepmimic fix: reference initialization wasn't spread over all frames
This commit is contained in:
@@ -0,0 +1,52 @@
|
|||||||
|
{
|
||||||
|
"AgentType": "PPO",
|
||||||
|
|
||||||
|
"ActorNet": "fc_2layers_1024units",
|
||||||
|
"ActorStepsize": 0.0000025,
|
||||||
|
"ActorMomentum": 0.9,
|
||||||
|
"ActorWeightDecay": 0.0005,
|
||||||
|
"ActorInitOutputScale": 0.01,
|
||||||
|
|
||||||
|
"CriticNet": "fc_2layers_1024units",
|
||||||
|
"CriticStepsize": 0.01,
|
||||||
|
"CriticMomentum": 0.9,
|
||||||
|
"CriticWeightDecay": 0,
|
||||||
|
|
||||||
|
"UpdatePeriod": 1,
|
||||||
|
"ItersPerUpdate": 1,
|
||||||
|
"Discount": 0.95,
|
||||||
|
"BatchSize": 4096,
|
||||||
|
"MiniBatchSize": 256,
|
||||||
|
"Epochs": 1,
|
||||||
|
"ReplayBufferSize": 500000,
|
||||||
|
"InitSamples": 1,
|
||||||
|
"NormalizerSamples": 1000000,
|
||||||
|
|
||||||
|
"RatioClip": 0.2,
|
||||||
|
"NormAdvClip": 4,
|
||||||
|
"TDLambda": 0.95,
|
||||||
|
|
||||||
|
"OutputIters": 10,
|
||||||
|
"IntOutputIters": 400,
|
||||||
|
"TestEpisodes": 32,
|
||||||
|
|
||||||
|
"ExpAnnealSamples": 64000000,
|
||||||
|
|
||||||
|
"ExpParamsBeg":
|
||||||
|
{
|
||||||
|
"Rate": 1,
|
||||||
|
"InitActionRate": 1,
|
||||||
|
"Noise": 0.05,
|
||||||
|
"NoiseInternal": 0,
|
||||||
|
"Temp": 0.1
|
||||||
|
},
|
||||||
|
|
||||||
|
"ExpParamsEnd":
|
||||||
|
{
|
||||||
|
"Rate": 0.2,
|
||||||
|
"InitActionRate": 0.01,
|
||||||
|
"Noise": 0.05,
|
||||||
|
"NoiseInternal": 0,
|
||||||
|
"Temp": 0.001
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -68,7 +68,7 @@ class HumanoidStablePD(object):
|
|||||||
self.resetPose()
|
self.resetPose()
|
||||||
|
|
||||||
def resetPose(self):
|
def resetPose(self):
|
||||||
print("resetPose with self._frameFraction=",self._frameFraction)
|
print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
|
||||||
pose = self.computePose(self._frameFraction)
|
pose = self.computePose(self._frameFraction)
|
||||||
self.initializePose(self._poseInterpolator, self._sim_model, initBase=True)
|
self.initializePose(self._poseInterpolator, self._sim_model, initBase=True)
|
||||||
self.initializePose(self._poseInterpolator, self._kin_model, initBase=False)
|
self.initializePose(self._poseInterpolator, self._kin_model, initBase=False)
|
||||||
@@ -126,7 +126,6 @@ class HumanoidStablePD(object):
|
|||||||
keyFrameDuration = self._mocap_data.KeyFrameDuraction()
|
keyFrameDuration = self._mocap_data.KeyFrameDuraction()
|
||||||
cycleTime = self.getCycleTime()
|
cycleTime = self.getCycleTime()
|
||||||
#print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
|
#print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
|
||||||
#print("cycleTime=",cycleTime)
|
|
||||||
cycles = self.calcCycleCount(t, cycleTime)
|
cycles = self.calcCycleCount(t, cycleTime)
|
||||||
#print("cycles=",cycles)
|
#print("cycles=",cycles)
|
||||||
frameTime = t - cycles*cycleTime
|
frameTime = t - cycles*cycleTime
|
||||||
|
|||||||
@@ -64,8 +64,12 @@ class PyBulletDeepMimicEnv(Env):
|
|||||||
#self._humanoid.applyPDForces(taus)
|
#self._humanoid.applyPDForces(taus)
|
||||||
#self._pybullet_client.stepSimulation()
|
#self._pybullet_client.stepSimulation()
|
||||||
time.sleep(timeStep)
|
time.sleep(timeStep)
|
||||||
|
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||||
startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||||
|
rnrange = 1000
|
||||||
|
rn = random.randint(0,rnrange)
|
||||||
|
startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
|
||||||
|
|
||||||
self._humanoid.setSimTime(startTime)
|
self._humanoid.setSimTime(startTime)
|
||||||
self._humanoid.resetPose()
|
self._humanoid.resetPose()
|
||||||
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import learning.agent_builder as AgentBuilder
|
|||||||
import learning.tf_util as TFUtil
|
import learning.tf_util as TFUtil
|
||||||
from learning.rl_agent import RLAgent
|
from learning.rl_agent import RLAgent
|
||||||
from pybullet_utils.logger import Logger
|
from pybullet_utils.logger import Logger
|
||||||
|
import pybullet_data
|
||||||
|
|
||||||
class RLWorld(object):
|
class RLWorld(object):
|
||||||
def __init__(self, env, arg_parser):
|
def __init__(self, env, arg_parser):
|
||||||
@@ -79,7 +80,7 @@ class RLWorld(object):
|
|||||||
if (len(model_files) > 0):
|
if (len(model_files) > 0):
|
||||||
curr_model_file = model_files[i]
|
curr_model_file = model_files[i]
|
||||||
if curr_model_file != 'none':
|
if curr_model_file != 'none':
|
||||||
curr_agent.load_model(curr_model_file)
|
curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
|
||||||
|
|
||||||
self.agents.append(curr_agent)
|
self.agents.append(curr_agent)
|
||||||
Logger.print2('')
|
Logger.print2('')
|
||||||
@@ -139,4 +140,4 @@ class RLWorld(object):
|
|||||||
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||||
|
|
||||||
return agent
|
return agent
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ def build_arg_parser(args):
|
|||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
arg_parser = build_arg_parser(args)
|
arg_parser = build_arg_parser(args)
|
||||||
|
|
||||||
render=True
|
render=False#True
|
||||||
env = PyBulletDeepMimicEnv (args,render)
|
env = PyBulletDeepMimicEnv (args,render)
|
||||||
|
|
||||||
world = RLWorld(env, arg_parser)
|
world = RLWorld(env, arg_parser)
|
||||||
@@ -57,7 +57,6 @@ with open(agent_files) as data_file:
|
|||||||
|
|
||||||
agent.set_enable_training(True)
|
agent.set_enable_training(True)
|
||||||
world.reset()
|
world.reset()
|
||||||
|
|
||||||
while (world.env._pybullet_client.isConnected()):
|
while (world.env._pybullet_client.isConnected()):
|
||||||
|
|
||||||
timeStep = 1./600.
|
timeStep = 1./600.
|
||||||
|
|||||||
Reference in New Issue
Block a user