add yapf style and apply yapf to format all Python files
This recreates pull request #2192
This commit is contained in:
@@ -4,11 +4,10 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
print("parentdir=",parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
print("parentdir=", parentdir)
|
||||
|
||||
|
||||
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||
from pybullet_utils.logger import Logger
|
||||
from pybullet_envs.deep_mimic.testrl import update_world, update_timestep, build_world
|
||||
@@ -17,36 +16,40 @@ import pybullet_utils.mpi_util as MPIUtil
|
||||
args = []
|
||||
world = None
|
||||
|
||||
|
||||
def run():
|
||||
global update_timestep
|
||||
global world
|
||||
global update_timestep
|
||||
global world
|
||||
|
||||
done = False
|
||||
while not (done):
|
||||
update_world(world, update_timestep)
|
||||
done = False
|
||||
while not (done):
|
||||
update_world(world, update_timestep)
|
||||
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
def shutdown():
|
||||
global world
|
||||
global world
|
||||
|
||||
Logger.print2('Shutting down...')
|
||||
world.shutdown()
|
||||
return
|
||||
|
||||
Logger.print2('Shutting down...')
|
||||
world.shutdown()
|
||||
return
|
||||
|
||||
def main():
|
||||
global args
|
||||
global world
|
||||
global args
|
||||
global world
|
||||
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
enable_draw = False
|
||||
world = build_world(args, enable_draw)
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
enable_draw = False
|
||||
world = build_world(args, enable_draw)
|
||||
|
||||
run()
|
||||
shutdown()
|
||||
run()
|
||||
shutdown()
|
||||
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ActionSpace(Enum):
|
||||
Null = 0
|
||||
Continuous = 1
|
||||
Discrete = 2
|
||||
Null = 0
|
||||
Continuous = 1
|
||||
Discrete = 2
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
from abc import abstractmethod
|
||||
from abc import abstractmethod
|
||||
import sys, abc
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC
|
||||
ABC = abc.ABC
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
import numpy as np
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Env(ABC):
|
||||
class Terminate(Enum):
|
||||
Null = 0
|
||||
Fail = 1
|
||||
Succ = 2
|
||||
|
||||
def __init__(self, args, enable_draw):
|
||||
self.enable_draw = enable_draw
|
||||
return
|
||||
|
||||
class Terminate(Enum):
|
||||
Null = 0
|
||||
Fail = 1
|
||||
Succ = 2
|
||||
|
||||
def __init__(self, args, enable_draw):
|
||||
self.enable_draw = enable_draw
|
||||
return
|
||||
|
||||
@@ -1,36 +1,61 @@
|
||||
from pybullet_utils import bullet_client
|
||||
import math
|
||||
|
||||
|
||||
|
||||
class HumanoidPoseInterpolator(object):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def Reset(self,basePos=[0,0,0], baseOrn=[0,0,0,1],chestRot=[0,0,0,1], neckRot=[0,0,0,1],rightHipRot= [0,0,0,1], rightKneeRot=[0],rightAnkleRot = [0,0,0,1],
|
||||
rightShoulderRot = [0,0,0,1],rightElbowRot = [0], leftHipRot = [0,0,0,1], leftKneeRot = [0],leftAnkleRot = [0,0,0,1],
|
||||
leftShoulderRot = [0,0,0,1] ,leftElbowRot = [0],
|
||||
baseLinVel = [0,0,0],baseAngVel = [0,0,0], chestVel = [0,0,0],neckVel = [0,0,0],rightHipVel = [0,0,0],rightKneeVel = [0],
|
||||
rightAnkleVel = [0,0,0],rightShoulderVel = [0,0,0],rightElbowVel = [0],leftHipVel = [0,0,0],leftKneeVel = [0],leftAnkleVel = [0,0,0],leftShoulderVel = [0,0,0],leftElbowVel = [0]
|
||||
):
|
||||
|
||||
|
||||
def Reset(self,
|
||||
basePos=[0, 0, 0],
|
||||
baseOrn=[0, 0, 0, 1],
|
||||
chestRot=[0, 0, 0, 1],
|
||||
neckRot=[0, 0, 0, 1],
|
||||
rightHipRot=[0, 0, 0, 1],
|
||||
rightKneeRot=[0],
|
||||
rightAnkleRot=[0, 0, 0, 1],
|
||||
rightShoulderRot=[0, 0, 0, 1],
|
||||
rightElbowRot=[0],
|
||||
leftHipRot=[0, 0, 0, 1],
|
||||
leftKneeRot=[0],
|
||||
leftAnkleRot=[0, 0, 0, 1],
|
||||
leftShoulderRot=[0, 0, 0, 1],
|
||||
leftElbowRot=[0],
|
||||
baseLinVel=[0, 0, 0],
|
||||
baseAngVel=[0, 0, 0],
|
||||
chestVel=[0, 0, 0],
|
||||
neckVel=[0, 0, 0],
|
||||
rightHipVel=[0, 0, 0],
|
||||
rightKneeVel=[0],
|
||||
rightAnkleVel=[0, 0, 0],
|
||||
rightShoulderVel=[0, 0, 0],
|
||||
rightElbowVel=[0],
|
||||
leftHipVel=[0, 0, 0],
|
||||
leftKneeVel=[0],
|
||||
leftAnkleVel=[0, 0, 0],
|
||||
leftShoulderVel=[0, 0, 0],
|
||||
leftElbowVel=[0]):
|
||||
|
||||
self._basePos = basePos
|
||||
self._baseLinVel = baseLinVel
|
||||
#print("HumanoidPoseInterpolator.Reset: baseLinVel = ", baseLinVel)
|
||||
self._baseOrn =baseOrn
|
||||
self._baseOrn = baseOrn
|
||||
self._baseAngVel = baseAngVel
|
||||
|
||||
|
||||
self._chestRot = chestRot
|
||||
self._chestVel =chestVel
|
||||
self._chestVel = chestVel
|
||||
self._neckRot = neckRot
|
||||
self._neckVel = neckVel
|
||||
|
||||
|
||||
self._rightHipRot = rightHipRot
|
||||
self._rightHipVel = rightHipVel
|
||||
self._rightKneeRot =rightKneeRot
|
||||
self._rightKneeRot = rightKneeRot
|
||||
self._rightKneeVel = rightKneeVel
|
||||
self._rightAnkleRot = rightAnkleRot
|
||||
self._rightAnkleVel = rightAnkleVel
|
||||
|
||||
self._rightShoulderRot =rightShoulderRot
|
||||
|
||||
self._rightShoulderRot = rightShoulderRot
|
||||
self._rightShoulderVel = rightShoulderVel
|
||||
self._rightElbowRot = rightElbowRot
|
||||
self._rightElbowVel = rightElbowVel
|
||||
@@ -39,225 +64,253 @@ class HumanoidPoseInterpolator(object):
|
||||
self._leftHipVel = leftHipVel
|
||||
self._leftKneeRot = leftKneeRot
|
||||
self._leftKneeVel = leftKneeVel
|
||||
self._leftAnkleRot =leftAnkleRot
|
||||
self._leftAnkleRot = leftAnkleRot
|
||||
self._leftAnkleVel = leftAnkleVel
|
||||
|
||||
|
||||
self._leftShoulderRot = leftShoulderRot
|
||||
self._leftShoulderVel = leftShoulderVel
|
||||
self._leftElbowRot =leftElbowRot
|
||||
self._leftElbowRot = leftElbowRot
|
||||
self._leftElbowVel = leftElbowVel
|
||||
|
||||
def ComputeLinVel(self,posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
|
||||
def ComputeLinVel(self, posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
|
||||
(posEnd[2] - posStart[2]) / deltaTime]
|
||||
return vel
|
||||
|
||||
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
|
||||
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
|
||||
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
|
||||
ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
|
||||
|
||||
def NormalizeVector(self, vec):
|
||||
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
|
||||
if (length2>0):
|
||||
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2]
|
||||
if (length2 > 0):
|
||||
length = math.sqrt(length2)
|
||||
|
||||
|
||||
def NormalizeQuaternion(self, orn):
|
||||
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
|
||||
if (length2>0):
|
||||
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2] + orn[3] * orn[3]
|
||||
if (length2 > 0):
|
||||
length = math.sqrt(length2)
|
||||
orn[0]/=length
|
||||
orn[1]/=length
|
||||
orn[2]/=length
|
||||
orn[3]/=length
|
||||
orn[0] /= length
|
||||
orn[1] /= length
|
||||
orn[2] /= length
|
||||
orn[3] /= length
|
||||
return orn
|
||||
|
||||
|
||||
#print("Normalize? length=",length)
|
||||
|
||||
|
||||
def PostProcessMotionData(self, frameData):
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
|
||||
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||
|
||||
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||
|
||||
|
||||
def GetPose(self):
|
||||
pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
|
||||
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
|
||||
self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
|
||||
self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
|
||||
self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
|
||||
self._rightKneeRot[0],
|
||||
self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
|
||||
self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
|
||||
self._rightElbowRot[0],
|
||||
self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
|
||||
self._leftKneeRot[0],
|
||||
self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
|
||||
self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
|
||||
self._leftElbowRot[0] ]
|
||||
return pose
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
|
||||
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
|
||||
|
||||
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
|
||||
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
|
||||
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
|
||||
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
|
||||
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
|
||||
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
|
||||
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
|
||||
|
||||
def GetPose(self):
|
||||
pose = [
|
||||
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
|
||||
self._baseOrn[2], self._baseOrn[3], self._chestRot[0], self._chestRot[1],
|
||||
self._chestRot[2], self._chestRot[3], self._neckRot[0], self._neckRot[1], self._neckRot[2],
|
||||
self._neckRot[3], self._rightHipRot[0], self._rightHipRot[1], self._rightHipRot[2],
|
||||
self._rightHipRot[3], self._rightKneeRot[0], self._rightAnkleRot[0],
|
||||
self._rightAnkleRot[1], self._rightAnkleRot[2], self._rightAnkleRot[3],
|
||||
self._rightShoulderRot[0], self._rightShoulderRot[1], self._rightShoulderRot[2],
|
||||
self._rightShoulderRot[3], self._rightElbowRot[0], self._leftHipRot[0],
|
||||
self._leftHipRot[1], self._leftHipRot[2], self._leftHipRot[3], self._leftKneeRot[0],
|
||||
self._leftAnkleRot[0], self._leftAnkleRot[1], self._leftAnkleRot[2], self._leftAnkleRot[3],
|
||||
self._leftShoulderRot[0], self._leftShoulderRot[1], self._leftShoulderRot[2],
|
||||
self._leftShoulderRot[3], self._leftElbowRot[0]
|
||||
]
|
||||
return pose
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
|
||||
keyFrameDuration = frameData[0]
|
||||
basePos1Start = [frameData[1],frameData[2],frameData[3]]
|
||||
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
|
||||
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
|
||||
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
|
||||
|
||||
basePos1Start = [frameData[1], frameData[2], frameData[3]]
|
||||
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._basePos = [
|
||||
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
|
||||
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
|
||||
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
|
||||
]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
##pre-rotate to make z-up
|
||||
#y2zPos=[0,0,0.0]
|
||||
#y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
|
||||
#basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)
|
||||
|
||||
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||
chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
|
||||
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
|
||||
self._chestVel = self.ComputeAngVelRel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||
neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
|
||||
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
|
||||
self._neckVel = self.ComputeAngVelRel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||
rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
|
||||
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
|
||||
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
|
||||
chestRotEnd = [frameDataNext[9], frameDataNext[10], frameDataNext[11], frameDataNext[8]]
|
||||
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart, chestRotEnd, frameFraction)
|
||||
self._chestVel = self.ComputeAngVelRel(chestRotStart, chestRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
|
||||
neckRotEnd = [frameDataNext[13], frameDataNext[14], frameDataNext[15], frameDataNext[12]]
|
||||
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart, neckRotEnd, frameFraction)
|
||||
self._neckVel = self.ComputeAngVelRel(neckRotStart, neckRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
|
||||
rightHipRotEnd = [frameDataNext[17], frameDataNext[18], frameDataNext[19], frameDataNext[16]]
|
||||
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart, rightHipRotEnd,
|
||||
frameFraction)
|
||||
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart, rightHipRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
rightKneeRotStart = [frameData[20]]
|
||||
rightKneeRotEnd = [frameDataNext[20]]
|
||||
self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
|
||||
self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
|
||||
|
||||
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||
rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
|
||||
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
|
||||
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||
rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
|
||||
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
|
||||
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
|
||||
|
||||
self._rightKneeRot = [
|
||||
rightKneeRotStart[0] + frameFraction * (rightKneeRotEnd[0] - rightKneeRotStart[0])
|
||||
]
|
||||
self._rightKneeVel = [(rightKneeRotEnd[0] - rightKneeRotStart[0]) / keyFrameDuration]
|
||||
|
||||
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
|
||||
rightAnkleRotEnd = [frameDataNext[22], frameDataNext[23], frameDataNext[24], frameDataNext[21]]
|
||||
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart, rightAnkleRotEnd,
|
||||
frameFraction)
|
||||
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart, rightAnkleRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
|
||||
rightShoulderRotEnd = [
|
||||
frameDataNext[26], frameDataNext[27], frameDataNext[28], frameDataNext[25]
|
||||
]
|
||||
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,
|
||||
rightShoulderRotEnd, frameFraction)
|
||||
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart, rightShoulderRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
rightElbowRotStart = [frameData[29]]
|
||||
rightElbowRotEnd = [frameDataNext[29]]
|
||||
self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
|
||||
self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
|
||||
|
||||
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||
leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
|
||||
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
|
||||
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
self._rightElbowRot = [
|
||||
rightElbowRotStart[0] + frameFraction * (rightElbowRotEnd[0] - rightElbowRotStart[0])
|
||||
]
|
||||
self._rightElbowVel = [(rightElbowRotEnd[0] - rightElbowRotStart[0]) / keyFrameDuration]
|
||||
|
||||
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
|
||||
leftHipRotEnd = [frameDataNext[31], frameDataNext[32], frameDataNext[33], frameDataNext[30]]
|
||||
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart, leftHipRotEnd,
|
||||
frameFraction)
|
||||
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
leftKneeRotStart = [frameData[34]]
|
||||
leftKneeRotEnd = [frameDataNext[34]]
|
||||
self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
|
||||
self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
|
||||
|
||||
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||
leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
|
||||
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
|
||||
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||
self._leftKneeRot = [
|
||||
leftKneeRotStart[0] + frameFraction * (leftKneeRotEnd[0] - leftKneeRotStart[0])
|
||||
]
|
||||
self._leftKneeVel = [(leftKneeRotEnd[0] - leftKneeRotStart[0]) / keyFrameDuration]
|
||||
|
||||
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||
leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
|
||||
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
|
||||
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
|
||||
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
|
||||
leftAnkleRotEnd = [frameDataNext[36], frameDataNext[37], frameDataNext[38], frameDataNext[35]]
|
||||
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart, leftAnkleRotEnd,
|
||||
frameFraction)
|
||||
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart, leftAnkleRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
|
||||
leftShoulderRotEnd = [
|
||||
frameDataNext[40], frameDataNext[41], frameDataNext[42], frameDataNext[39]
|
||||
]
|
||||
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,
|
||||
leftShoulderRotEnd, frameFraction)
|
||||
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart, leftShoulderRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
leftElbowRotStart = [frameData[43]]
|
||||
leftElbowRotEnd = [frameDataNext[43]]
|
||||
self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
|
||||
self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
|
||||
self._leftElbowRot = [
|
||||
leftElbowRotStart[0] + frameFraction * (leftElbowRotEnd[0] - leftElbowRotStart[0])
|
||||
]
|
||||
self._leftElbowVel = [(leftElbowRotEnd[0] - leftElbowRotStart[0]) / keyFrameDuration]
|
||||
|
||||
pose = self.GetPose()
|
||||
return pose
|
||||
|
||||
|
||||
def ConvertFromAction(self, pybullet_client, action):
|
||||
#turn action into pose
|
||||
|
||||
self.Reset()#?? needed?
|
||||
index=0
|
||||
|
||||
self.Reset() #?? needed?
|
||||
index = 0
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
#print("pose._chestRot=",pose._chestRot)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._rightKneeRot = [angle]
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._rightElbowRot = [angle]
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._leftKneeRot = [angle]
|
||||
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._leftElbowRot = [angle]
|
||||
|
||||
|
||||
pose = self.GetPose()
|
||||
return pose
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
import math
|
||||
|
||||
|
||||
class MotionCaptureData(object):
|
||||
|
||||
def __init__(self):
|
||||
self.Reset()
|
||||
|
||||
@@ -13,30 +15,33 @@ class MotionCaptureData(object):
|
||||
self._motion_data = json.load(f)
|
||||
|
||||
def NumFrames(self):
|
||||
return len(self._motion_data['Frames'])
|
||||
return len(self._motion_data['Frames'])
|
||||
|
||||
def KeyFrameDuraction(self):
|
||||
return self._motion_data['Frames'][0][0]
|
||||
|
||||
return self._motion_data['Frames'][0][0]
|
||||
|
||||
def getCycleTime(self):
|
||||
keyFrameDuration = self.KeyFrameDuraction()
|
||||
cycleTime = keyFrameDuration*(self.NumFrames()-1)
|
||||
cycleTime = keyFrameDuration * (self.NumFrames() - 1)
|
||||
return cycleTime
|
||||
|
||||
|
||||
def calcCycleCount(self, simTime, cycleTime):
|
||||
phases = simTime / cycleTime;
|
||||
phases = simTime / cycleTime
|
||||
count = math.floor(phases)
|
||||
loop = True
|
||||
#count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
|
||||
return count
|
||||
|
||||
|
||||
def computeCycleOffset(self):
|
||||
firstFrame=0
|
||||
lastFrame = self.NumFrames()-1
|
||||
firstFrame = 0
|
||||
lastFrame = self.NumFrames() - 1
|
||||
frameData = self._motion_data['Frames'][0]
|
||||
frameDataNext = self._motion_data['Frames'][lastFrame]
|
||||
|
||||
basePosStart = [frameData[1],frameData[2],frameData[3]]
|
||||
basePosEnd = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._cycleOffset = [basePosEnd[0]-basePosStart[0],basePosEnd[1]-basePosStart[1],basePosEnd[2]-basePosStart[2]]
|
||||
return self._cycleOffset
|
||||
|
||||
basePosStart = [frameData[1], frameData[2], frameData[3]]
|
||||
basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._cycleOffset = [
|
||||
basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
|
||||
basePosEnd[2] - basePosStart[2]
|
||||
]
|
||||
return self._cycleOffset
|
||||
|
||||
@@ -9,301 +9,313 @@ from pybullet_envs.deep_mimic.env import humanoid_stable_pd
|
||||
import pybullet_data
|
||||
import pybullet as p1
|
||||
import random
|
||||
|
||||
|
||||
|
||||
|
||||
class PyBulletDeepMimicEnv(Env):
|
||||
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
|
||||
super().__init__(arg_parser, enable_draw)
|
||||
self._num_agents = 1
|
||||
self._pybullet_client = pybullet_client
|
||||
self._isInitialized = False
|
||||
self._useStablePD = True
|
||||
self._arg_parser = arg_parser
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
|
||||
|
||||
if not self._isInitialized:
|
||||
if self.enable_draw:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI,0)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
|
||||
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
|
||||
#print("planeId=",self._planeId)
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
|
||||
self._pybullet_client.setGravity(0,-9.8,0)
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
|
||||
self._mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motion_file = self._arg_parser.parse_strings('motion_file')
|
||||
print("motion_file=",motion_file[0])
|
||||
|
||||
motionPath = pybullet_data.getDataPath()+"/"+motion_file[0]
|
||||
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
|
||||
self._mocapData.Load(motionPath)
|
||||
timeStep = 1./600.
|
||||
useFixedBase=False
|
||||
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
|
||||
self._isInitialized = True
|
||||
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
|
||||
|
||||
|
||||
selfCheck = False
|
||||
if (selfCheck):
|
||||
curTime = 0
|
||||
while self._pybullet_client.isConnected():
|
||||
self._humanoid.setSimTime(curTime)
|
||||
state = self._humanoid.getState()
|
||||
#print("state=",state)
|
||||
pose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
for i in range (10):
|
||||
curTime+=timeStep
|
||||
#taus = self._humanoid.computePDForces(pose)
|
||||
#self._humanoid.applyPDForces(taus)
|
||||
#self._pybullet_client.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||
rnrange = 1000
|
||||
rn = random.randint(0,rnrange)
|
||||
startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
|
||||
self.t = startTime
|
||||
|
||||
self._humanoid.setSimTime(startTime)
|
||||
|
||||
self._humanoid.resetPose()
|
||||
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||
#self._pybullet_client.stepSimulation()
|
||||
self._humanoid.resetPose()
|
||||
self.needs_update_time = self.t-1#force update
|
||||
|
||||
def get_num_agents(self):
|
||||
return self._num_agents
|
||||
|
||||
def get_action_space(self, agent_id):
|
||||
return ActionSpace(ActionSpace.Continuous)
|
||||
|
||||
def get_reward_min(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_reward_max(self, agent_id):
|
||||
return 1
|
||||
|
||||
def get_reward_fail(self, agent_id):
|
||||
return self.get_reward_min(agent_id)
|
||||
|
||||
def get_reward_succ(self, agent_id):
|
||||
return self.get_reward_max(agent_id)
|
||||
|
||||
#scene_name == "imitate" -> cDrawSceneImitate
|
||||
def get_state_size(self, agent_id):
|
||||
#cCtController::GetStateSize()
|
||||
#int state_size = cDeepMimicCharController::GetStateSize();
|
||||
# state_size += GetStatePoseSize();#106
|
||||
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
|
||||
#state_size += GetStatePhaseSize();#1
|
||||
#197
|
||||
return 197
|
||||
|
||||
def build_state_norm_groups(self, agent_id):
|
||||
#if (mEnablePhaseInput)
|
||||
#{
|
||||
#int phase_group = gNormGroupNone;
|
||||
#int phase_offset = GetStatePhaseOffset();
|
||||
#int phase_size = GetStatePhaseSize();
|
||||
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
|
||||
groups = [0]*self.get_state_size(agent_id)
|
||||
groups[0] = -1
|
||||
return groups
|
||||
|
||||
def build_state_offset(self, agent_id):
|
||||
out_offset = [0]*self.get_state_size(agent_id)
|
||||
phase_offset = -0.5
|
||||
out_offset[0] = phase_offset
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_state_scale(self, agent_id):
|
||||
out_scale = [1]*self.get_state_size(agent_id)
|
||||
phase_scale = 2
|
||||
out_scale[0] = phase_scale
|
||||
return np.array(out_scale)
|
||||
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
|
||||
super().__init__(arg_parser, enable_draw)
|
||||
self._num_agents = 1
|
||||
self._pybullet_client = pybullet_client
|
||||
self._isInitialized = False
|
||||
self._useStablePD = True
|
||||
self._arg_parser = arg_parser
|
||||
self.reset()
|
||||
|
||||
def get_goal_size(self, agent_id):
|
||||
return 0
|
||||
def reset(self):
|
||||
|
||||
def get_action_size(self, agent_id):
|
||||
ctrl_size = 43 #numDof
|
||||
root_size = 7
|
||||
return ctrl_size - root_size
|
||||
|
||||
def build_goal_norm_groups(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_offset(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_scale(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_action_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_action_size(agent_id)
|
||||
out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
|
||||
-0.200000000,0.0000000000,0.0000000000, 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, -1.5700000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, -1.5700000]
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_action_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
|
||||
1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
|
||||
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
|
||||
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
|
||||
0.159235668789]
|
||||
return np.array(out_scale)
|
||||
|
||||
def build_action_bound_min(self, agent_id):
|
||||
#see cCtCtrlUtil::BuildBoundsPDSpherical
|
||||
out_scale = [-1] * self.get_action_size(agent_id)
|
||||
out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
|
||||
-1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000, -1.000000000,
|
||||
-1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, -1.000000000,
|
||||
-12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000,
|
||||
-6.280000000, -1.000000000, -1.000000000, -1.000000000, -8.460000000,
|
||||
-1.000000000, -1.000000000, -1.000000000, -4.710000000]
|
||||
if not self._isInitialized:
|
||||
if self.enable_draw:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
|
||||
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
|
||||
z2y,
|
||||
useMaximalCoordinates=True)
|
||||
#print("planeId=",self._planeId)
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
self._pybullet_client.setGravity(0, -9.8, 0)
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
|
||||
self._mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motion_file = self._arg_parser.parse_strings('motion_file')
|
||||
print("motion_file=", motion_file[0])
|
||||
|
||||
motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
|
||||
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
|
||||
self._mocapData.Load(motionPath)
|
||||
timeStep = 1. / 600.
|
||||
useFixedBase = False
|
||||
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData,
|
||||
timeStep, useFixedBase)
|
||||
self._isInitialized = True
|
||||
|
||||
return out_scale
|
||||
|
||||
def build_action_bound_max(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
out_scale=[
|
||||
4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
|
||||
1.000000000,1.000000000,8.779999999,1.000000000, 1.0000000, 1.0000000,
|
||||
4.7100000, 6.2800000, 1.0000000, 1.0000000, 1.0000000,
|
||||
12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000,
|
||||
6.2800000, 1.0000000, 1.0000000, 1.0000000, 10.100000,
|
||||
1.0000000, 1.0000000, 1.0000000, 7.8500000]
|
||||
return out_scale
|
||||
|
||||
def set_mode(self, mode):
|
||||
self._mode = mode
|
||||
|
||||
def need_new_action(self, agent_id):
|
||||
if self.t>=self.needs_update_time:
|
||||
self.needs_update_time = self.t + 1./30.
|
||||
return True
|
||||
return False
|
||||
|
||||
def record_state(self, agent_id):
|
||||
state = self._humanoid.getState()
|
||||
|
||||
return np.array(state)
|
||||
|
||||
|
||||
def record_goal(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def calc_reward(self, agent_id):
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
reward = self._humanoid.getReward(kinPose)
|
||||
return reward
|
||||
|
||||
def set_action(self, agent_id, action):
|
||||
#print("action=",)
|
||||
#for a in action:
|
||||
# print(a)
|
||||
np.savetxt("pb_action.csv", action, delimiter=",")
|
||||
self.desiredPose = self._humanoid.convertActionToPose(action)
|
||||
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
|
||||
self.desiredPose[0] = 0
|
||||
self.desiredPose[1] = 0
|
||||
self.desiredPose[2] = 0
|
||||
self.desiredPose[3] = 0
|
||||
self.desiredPose[4] = 0
|
||||
self.desiredPose[5] = 0
|
||||
self.desiredPose[6] = 0
|
||||
target_pose = np.array(self.desiredPose)
|
||||
|
||||
|
||||
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
|
||||
|
||||
#print("set_action: desiredPose=", self.desiredPose)
|
||||
|
||||
def log_val(self, agent_id, val):
|
||||
pass
|
||||
|
||||
|
||||
def update(self, timeStep):
|
||||
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._humanoid._timeStep = timeStep
|
||||
|
||||
for i in range(1):
|
||||
self.t += timeStep
|
||||
self._humanoid.setSimTime(self.t)
|
||||
|
||||
if self.desiredPose:
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=True)
|
||||
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
|
||||
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
|
||||
#print("desiredPositions=",self.desiredPose)
|
||||
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
|
||||
|
||||
if self._useStablePD:
|
||||
taus = self._humanoid.computePDForces(self.desiredPose, desiredVelocities=None, maxForces=maxForces)
|
||||
self._humanoid.applyPDForces(taus)
|
||||
else:
|
||||
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
|
||||
|
||||
self._pybullet_client.stepSimulation()
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
|
||||
|
||||
def set_sample_count(self, count):
|
||||
return
|
||||
|
||||
def check_terminate(self, agent_id):
|
||||
return Env.Terminate(self.is_episode_end())
|
||||
|
||||
def is_episode_end(self):
|
||||
isEnded = self._humanoid.terminates()
|
||||
#also check maximum time, 20 seconds (todo get from file)
|
||||
#print("self.t=",self.t)
|
||||
if (self.t>20):
|
||||
isEnded = True
|
||||
return isEnded
|
||||
|
||||
def check_valid_episode(self):
|
||||
#could check if limbs exceed velocity threshold
|
||||
return true
|
||||
|
||||
def getKeyboardEvents(self):
|
||||
return self._pybullet_client.getKeyboardEvents()
|
||||
|
||||
def isKeyTriggered(self, keys, key):
|
||||
o = ord(key)
|
||||
#print("ord=",o)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
selfCheck = False
|
||||
if (selfCheck):
|
||||
curTime = 0
|
||||
while self._pybullet_client.isConnected():
|
||||
self._humanoid.setSimTime(curTime)
|
||||
state = self._humanoid.getState()
|
||||
#print("state=",state)
|
||||
pose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
for i in range(10):
|
||||
curTime += timeStep
|
||||
#taus = self._humanoid.computePDForces(pose)
|
||||
#self._humanoid.applyPDForces(taus)
|
||||
#self._pybullet_client.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||
rnrange = 1000
|
||||
rn = random.randint(0, rnrange)
|
||||
startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
|
||||
self.t = startTime
|
||||
|
||||
self._humanoid.setSimTime(startTime)
|
||||
|
||||
self._humanoid.resetPose()
|
||||
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||
#self._pybullet_client.stepSimulation()
|
||||
self._humanoid.resetPose()
|
||||
self.needs_update_time = self.t - 1 #force update
|
||||
|
||||
def get_num_agents(self):
|
||||
return self._num_agents
|
||||
|
||||
def get_action_space(self, agent_id):
|
||||
return ActionSpace(ActionSpace.Continuous)
|
||||
|
||||
def get_reward_min(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_reward_max(self, agent_id):
|
||||
return 1
|
||||
|
||||
def get_reward_fail(self, agent_id):
|
||||
return self.get_reward_min(agent_id)
|
||||
|
||||
def get_reward_succ(self, agent_id):
|
||||
return self.get_reward_max(agent_id)
|
||||
|
||||
#scene_name == "imitate" -> cDrawSceneImitate
|
||||
def get_state_size(self, agent_id):
|
||||
#cCtController::GetStateSize()
|
||||
#int state_size = cDeepMimicCharController::GetStateSize();
|
||||
# state_size += GetStatePoseSize();#106
|
||||
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
|
||||
#state_size += GetStatePhaseSize();#1
|
||||
#197
|
||||
return 197
|
||||
|
||||
def build_state_norm_groups(self, agent_id):
|
||||
#if (mEnablePhaseInput)
|
||||
#{
|
||||
#int phase_group = gNormGroupNone;
|
||||
#int phase_offset = GetStatePhaseOffset();
|
||||
#int phase_size = GetStatePhaseSize();
|
||||
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
|
||||
groups = [0] * self.get_state_size(agent_id)
|
||||
groups[0] = -1
|
||||
return groups
|
||||
|
||||
def build_state_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_state_size(agent_id)
|
||||
phase_offset = -0.5
|
||||
out_offset[0] = phase_offset
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_state_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_state_size(agent_id)
|
||||
phase_scale = 2
|
||||
out_scale[0] = phase_scale
|
||||
return np.array(out_scale)
|
||||
|
||||
def get_goal_size(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_action_size(self, agent_id):
|
||||
ctrl_size = 43 #numDof
|
||||
root_size = 7
|
||||
return ctrl_size - root_size
|
||||
|
||||
def build_goal_norm_groups(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_offset(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_scale(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_action_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_action_size(agent_id)
|
||||
out_offset = [
|
||||
0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
|
||||
0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
|
||||
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
|
||||
-0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
|
||||
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
|
||||
-0.2000000, -1.5700000
|
||||
]
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_action_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
out_scale = [
|
||||
0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
|
||||
1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
|
||||
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
|
||||
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
|
||||
0.159235668789
|
||||
]
|
||||
return np.array(out_scale)
|
||||
|
||||
def build_action_bound_min(self, agent_id):
|
||||
#see cCtCtrlUtil::BuildBoundsPDSpherical
|
||||
out_scale = [-1] * self.get_action_size(agent_id)
|
||||
out_scale = [
|
||||
-4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
|
||||
-1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
|
||||
-1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
|
||||
-1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -4.710000000
|
||||
]
|
||||
|
||||
return out_scale
|
||||
|
||||
def build_action_bound_max(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
out_scale = [
|
||||
4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
|
||||
1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
|
||||
1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
|
||||
1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
|
||||
]
|
||||
return out_scale
|
||||
|
||||
def set_mode(self, mode):
|
||||
self._mode = mode
|
||||
|
||||
def need_new_action(self, agent_id):
|
||||
if self.t >= self.needs_update_time:
|
||||
self.needs_update_time = self.t + 1. / 30.
|
||||
return True
|
||||
return False
|
||||
|
||||
def record_state(self, agent_id):
|
||||
state = self._humanoid.getState()
|
||||
|
||||
return np.array(state)
|
||||
|
||||
def record_goal(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def calc_reward(self, agent_id):
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
reward = self._humanoid.getReward(kinPose)
|
||||
return reward
|
||||
|
||||
def set_action(self, agent_id, action):
|
||||
#print("action=",)
|
||||
#for a in action:
|
||||
# print(a)
|
||||
np.savetxt("pb_action.csv", action, delimiter=",")
|
||||
self.desiredPose = self._humanoid.convertActionToPose(action)
|
||||
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
|
||||
self.desiredPose[0] = 0
|
||||
self.desiredPose[1] = 0
|
||||
self.desiredPose[2] = 0
|
||||
self.desiredPose[3] = 0
|
||||
self.desiredPose[4] = 0
|
||||
self.desiredPose[5] = 0
|
||||
self.desiredPose[6] = 0
|
||||
target_pose = np.array(self.desiredPose)
|
||||
|
||||
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
|
||||
|
||||
#print("set_action: desiredPose=", self.desiredPose)
|
||||
|
||||
def log_val(self, agent_id, val):
|
||||
pass
|
||||
|
||||
def update(self, timeStep):
|
||||
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._humanoid._timeStep = timeStep
|
||||
|
||||
for i in range(1):
|
||||
self.t += timeStep
|
||||
self._humanoid.setSimTime(self.t)
|
||||
|
||||
if self.desiredPose:
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
self._humanoid.initializePose(self._humanoid._poseInterpolator,
|
||||
self._humanoid._kin_model,
|
||||
initBase=True)
|
||||
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
|
||||
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
|
||||
#print("desiredPositions=",self.desiredPose)
|
||||
maxForces = [
|
||||
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
|
||||
90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
|
||||
100, 100, 60
|
||||
]
|
||||
|
||||
if self._useStablePD:
|
||||
taus = self._humanoid.computePDForces(self.desiredPose,
|
||||
desiredVelocities=None,
|
||||
maxForces=maxForces)
|
||||
self._humanoid.applyPDForces(taus)
|
||||
else:
|
||||
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
|
||||
|
||||
self._pybullet_client.stepSimulation()
|
||||
|
||||
def set_sample_count(self, count):
|
||||
return
|
||||
|
||||
def check_terminate(self, agent_id):
|
||||
return Env.Terminate(self.is_episode_end())
|
||||
|
||||
def is_episode_end(self):
|
||||
isEnded = self._humanoid.terminates()
|
||||
#also check maximum time, 20 seconds (todo get from file)
|
||||
#print("self.t=",self.t)
|
||||
if (self.t > 20):
|
||||
isEnded = True
|
||||
return isEnded
|
||||
|
||||
def check_valid_episode(self):
|
||||
#could check if limbs exceed velocity threshold
|
||||
return true
|
||||
|
||||
def getKeyboardEvents(self):
|
||||
return self._pybullet_client.getKeyboardEvents()
|
||||
|
||||
def isKeyTriggered(self, keys, key):
|
||||
o = ord(key)
|
||||
#print("ord=",o)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
|
||||
@@ -1,54 +1,66 @@
|
||||
from pybullet_utils import bullet_client
|
||||
import math
|
||||
|
||||
|
||||
|
||||
class QuadrupedPoseInterpolator(object):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
def ComputeLinVel(self,posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
|
||||
|
||||
def ComputeLinVel(self, posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
|
||||
(posEnd[2] - posStart[2]) / deltaTime]
|
||||
return vel
|
||||
|
||||
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
|
||||
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
|
||||
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
|
||||
ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
|
||||
keyFrameDuration = frameData[0]
|
||||
basePos1Start = [frameData[1],frameData[2],frameData[3]]
|
||||
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
|
||||
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
|
||||
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
|
||||
|
||||
jointPositions=[self._basePos[0],self._basePos[1],self._basePos[2],
|
||||
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3]]
|
||||
jointVelocities=[self._baseLinVel[0],self._baseLinVel[1],self._baseLinVel[2],
|
||||
self._baseAngVel[0],self._baseAngVel[1],self._baseAngVel[2]]
|
||||
|
||||
for j in range (12):
|
||||
index=j+8
|
||||
jointPosStart=frameData[index]
|
||||
jointPosEnd=frameDataNext[index]
|
||||
jointPos=jointPosStart+frameFraction*(jointPosEnd-jointPosStart)
|
||||
jointVel=(jointPosEnd-jointPosStart)/keyFrameDuration
|
||||
basePos1Start = [frameData[1], frameData[2], frameData[3]]
|
||||
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._basePos = [
|
||||
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
|
||||
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
|
||||
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
|
||||
]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
jointPositions = [
|
||||
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
|
||||
self._baseOrn[2], self._baseOrn[3]
|
||||
]
|
||||
jointVelocities = [
|
||||
self._baseLinVel[0], self._baseLinVel[1], self._baseLinVel[2], self._baseAngVel[0],
|
||||
self._baseAngVel[1], self._baseAngVel[2]
|
||||
]
|
||||
|
||||
for j in range(12):
|
||||
index = j + 8
|
||||
jointPosStart = frameData[index]
|
||||
jointPosEnd = frameDataNext[index]
|
||||
jointPos = jointPosStart + frameFraction * (jointPosEnd - jointPosStart)
|
||||
jointVel = (jointPosEnd - jointPosStart) / keyFrameDuration
|
||||
jointPositions.append(jointPos)
|
||||
jointVelocities.append(jointVel)
|
||||
self._jointPositions = jointPositions
|
||||
self._jointVelocities = jointVelocities
|
||||
return jointPositions,jointVelocities
|
||||
return jointPositions, jointVelocities
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,87 +8,89 @@ import pybullet as p1
|
||||
import humanoid_pose_interpolator
|
||||
import numpy as np
|
||||
|
||||
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
|
||||
pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
|
||||
z2y = pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
|
||||
#planeId = pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
|
||||
planeId= pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
|
||||
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
planeId = pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
|
||||
z2y,
|
||||
useMaximalCoordinates=True)
|
||||
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
#print("planeId=",planeId)
|
||||
|
||||
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
|
||||
pybullet_client.setGravity(0,-9.8,0)
|
||||
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
pybullet_client.setGravity(0, -9.8, 0)
|
||||
|
||||
pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
|
||||
|
||||
mocapData = motion_capture_data.MotionCaptureData()
|
||||
#motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_walk.txt"
|
||||
motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_backflip.txt"
|
||||
motionPath = pybullet_data.getDataPath() + "/data/motions/humanoid3d_backflip.txt"
|
||||
mocapData.Load(motionPath)
|
||||
timeStep = 1./600
|
||||
useFixedBase=False
|
||||
timeStep = 1. / 600
|
||||
useFixedBase = False
|
||||
humanoid = humanoid_stable_pd.HumanoidStablePD(pybullet_client, mocapData, timeStep, useFixedBase)
|
||||
isInitialized = True
|
||||
|
||||
pybullet_client.setTimeStep(timeStep)
|
||||
pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
|
||||
timeId = pybullet_client.addUserDebugParameter("time",0,10,0)
|
||||
timeId = pybullet_client.addUserDebugParameter("time", 0, 10, 0)
|
||||
|
||||
|
||||
|
||||
def isKeyTriggered(keys, key):
|
||||
o = ord(key)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
|
||||
|
||||
|
||||
animating = False
|
||||
singleStep = False
|
||||
|
||||
|
||||
t=0
|
||||
t = 0
|
||||
while (1):
|
||||
|
||||
keys = pybullet_client.getKeyboardEvents()
|
||||
#print(keys)
|
||||
if isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
animating = not animating
|
||||
|
||||
if isKeyTriggered(keys, 'b'):
|
||||
singleStep = True
|
||||
|
||||
singleStep = True
|
||||
|
||||
if animating or singleStep:
|
||||
|
||||
|
||||
|
||||
singleStep = False
|
||||
#t = pybullet_client.readUserDebugParameter(timeId)
|
||||
#print("t=",t)
|
||||
for i in range (1):
|
||||
for i in range(1):
|
||||
|
||||
print("t=",t)
|
||||
print("t=", t)
|
||||
humanoid.setSimTime(t)
|
||||
|
||||
|
||||
humanoid.computePose(humanoid._frameFraction)
|
||||
pose = humanoid._poseInterpolator
|
||||
#humanoid.initializePose(pose=pose, phys_model = humanoid._sim_model, initBase=True, initializeVelocity=True)
|
||||
#humanoid.resetPose()
|
||||
|
||||
|
||||
|
||||
desiredPose = humanoid.computePose(humanoid._frameFraction)
|
||||
#desiredPose = desiredPose.GetPose()
|
||||
#desiredPose = desiredPose.GetPose()
|
||||
#curPose = HumanoidPoseInterpolator()
|
||||
#curPose.reset()
|
||||
s = humanoid.getState()
|
||||
#np.savetxt("pb_record_state_s.csv", s, delimiter=",")
|
||||
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
|
||||
maxForces = [
|
||||
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90, 90,
|
||||
90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100, 100,
|
||||
100, 60
|
||||
]
|
||||
taus = humanoid.computePDForces(desiredPose, desiredVelocities=None, maxForces=maxForces)
|
||||
|
||||
|
||||
#print("taus=",taus)
|
||||
humanoid.applyPDForces(taus)
|
||||
|
||||
|
||||
pybullet_client.stepSimulation()
|
||||
t+=1./600.
|
||||
|
||||
|
||||
time.sleep(1./600.)
|
||||
t += 1. / 600.
|
||||
|
||||
time.sleep(1. / 600.)
|
||||
|
||||
@@ -8,240 +8,258 @@ import motion_capture_data
|
||||
import quadrupedPoseInterpolator
|
||||
|
||||
useConstraints = False
|
||||
|
||||
p = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
|
||||
p = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
p.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
|
||||
plane = p.loadURDF("plane.urdf")
|
||||
p.setGravity(0,0,-10)
|
||||
timeStep=1./500
|
||||
p.setGravity(0, 0, -10)
|
||||
timeStep = 1. / 500
|
||||
p.setTimeStep(timeStep)
|
||||
#p.setDefaultContactERP(0)
|
||||
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
urdfFlags = p.URDF_USE_SELF_COLLISION
|
||||
|
||||
|
||||
startPos=[0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
|
||||
startOrn=[0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
|
||||
quadruped = p.loadURDF("laikago/laikago.urdf",startPos,startOrn, flags = urdfFlags,useFixedBase=False)
|
||||
p.resetBasePositionAndOrientation(quadruped,startPos,startOrn)
|
||||
startPos = [0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
|
||||
startOrn = [0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
|
||||
quadruped = p.loadURDF("laikago/laikago.urdf",
|
||||
startPos,
|
||||
startOrn,
|
||||
flags=urdfFlags,
|
||||
useFixedBase=False)
|
||||
p.resetBasePositionAndOrientation(quadruped, startPos, startOrn)
|
||||
if not useConstraints:
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.setJointMotorControl2(quadruped,j,p.POSITION_CONTROL,force=0)
|
||||
|
||||
#This cube is added as a soft constraint to keep the laikago from falling
|
||||
p.setJointMotorControl2(quadruped, j, p.POSITION_CONTROL, force=0)
|
||||
|
||||
#This cube is added as a soft constraint to keep the laikago from falling
|
||||
#since we didn't train it yet, it doesn't balance
|
||||
cube = p.loadURDF("cube_no_rotation.urdf",[0,0,-0.5],[0,0.5,0.5,0])
|
||||
p.setCollisionFilterGroupMask(cube,-1,0,0)
|
||||
cube = p.loadURDF("cube_no_rotation.urdf", [0, 0, -0.5], [0, 0.5, 0.5, 0])
|
||||
p.setCollisionFilterGroupMask(cube, -1, 0, 0)
|
||||
for j in range(p.getNumJoints(cube)):
|
||||
p.setJointMotorControl2(cube,j,p.POSITION_CONTROL,force=0)
|
||||
p.setCollisionFilterGroupMask(cube,j,0,0)
|
||||
p.changeVisualShape(cube,j,rgbaColor=[1,0,0,0])
|
||||
cid = p.createConstraint(cube,p.getNumJoints(cube)-1,quadruped,-1,p.JOINT_FIXED,[0,0,0],[0,1,0],[0,0,0])
|
||||
p.setJointMotorControl2(cube, j, p.POSITION_CONTROL, force=0)
|
||||
p.setCollisionFilterGroupMask(cube, j, 0, 0)
|
||||
p.changeVisualShape(cube, j, rgbaColor=[1, 0, 0, 0])
|
||||
cid = p.createConstraint(cube,
|
||||
p.getNumJoints(cube) - 1, quadruped, -1, p.JOINT_FIXED, [0, 0, 0],
|
||||
[0, 1, 0], [0, 0, 0])
|
||||
p.changeConstraint(cid, maxForce=10)
|
||||
|
||||
jointIds = []
|
||||
paramIds = []
|
||||
jointOffsets = []
|
||||
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
|
||||
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
jointIds=[]
|
||||
paramIds=[]
|
||||
jointOffsets=[]
|
||||
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
|
||||
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
|
||||
for i in range (4):
|
||||
for i in range(4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
|
||||
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
|
||||
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
|
||||
startQ=[0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892, -0.068262, 0.836745, -1.534517]
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.resetJointState(quadruped,jointIds[j],jointDirections[j]*startQ[j]+jointOffsets[j])
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
startQ = [
|
||||
0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892,
|
||||
-0.068262, 0.836745, -1.534517
|
||||
]
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.resetJointState(quadruped, jointIds[j], jointDirections[j] * startQ[j] + jointOffsets[j])
|
||||
|
||||
qpi = quadrupedPoseInterpolator.QuadrupedPoseInterpolator()
|
||||
|
||||
#enable collision between lower legs
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
print(p.getJointInfo(quadruped,j))
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
print(p.getJointInfo(quadruped, j))
|
||||
|
||||
#2,5,8 and 11 are the lower legs
|
||||
lower_legs = [2,5,8,11]
|
||||
lower_legs = [2, 5, 8, 11]
|
||||
for l0 in lower_legs:
|
||||
for l1 in lower_legs:
|
||||
if (l1>l0):
|
||||
enableCollision = 1
|
||||
print("collision for pair",l0,l1, p.getJointInfo(quadruped,l0)[12],p.getJointInfo(quadruped,l1)[12], "enabled=",enableCollision)
|
||||
p.setCollisionFilterPair(quadruped, quadruped, 2,5,enableCollision)
|
||||
for l1 in lower_legs:
|
||||
if (l1 > l0):
|
||||
enableCollision = 1
|
||||
print("collision for pair", l0, l1,
|
||||
p.getJointInfo(quadruped, l0)[12],
|
||||
p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
|
||||
p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)
|
||||
|
||||
jointIds=[]
|
||||
paramIds=[]
|
||||
jointOffsets=[]
|
||||
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
|
||||
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
jointIds = []
|
||||
paramIds = []
|
||||
jointOffsets = []
|
||||
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
|
||||
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
for i in range (4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
for i in range(4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
|
||||
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
|
||||
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
|
||||
p.getCameraImage(480,320)
|
||||
p.getCameraImage(480, 320)
|
||||
p.setRealTimeSimulation(0)
|
||||
|
||||
joints=[]
|
||||
|
||||
joints = []
|
||||
|
||||
mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motionPath = pybullet_data.getDataPath()+"/data/motions/laikago_walk.txt"
|
||||
motionPath = pybullet_data.getDataPath() + "/data/motions/laikago_walk.txt"
|
||||
|
||||
mocapData.Load(motionPath)
|
||||
print("mocapData.NumFrames=",mocapData.NumFrames())
|
||||
print("mocapData.KeyFrameDuraction=",mocapData.KeyFrameDuraction())
|
||||
print("mocapData.getCycleTime=",mocapData.getCycleTime())
|
||||
print("mocapData.computeCycleOffset=",mocapData.computeCycleOffset())
|
||||
print("mocapData.NumFrames=", mocapData.NumFrames())
|
||||
print("mocapData.KeyFrameDuraction=", mocapData.KeyFrameDuraction())
|
||||
print("mocapData.getCycleTime=", mocapData.getCycleTime())
|
||||
print("mocapData.computeCycleOffset=", mocapData.computeCycleOffset())
|
||||
|
||||
stablePD = pd_controller_stable.PDControllerStable(p)
|
||||
|
||||
cycleTime = mocapData.getCycleTime()
|
||||
t=0
|
||||
t = 0
|
||||
|
||||
while t<10.*cycleTime:
|
||||
while t < 10. * cycleTime:
|
||||
#get interpolated joint
|
||||
keyFrameDuration = mocapData.KeyFrameDuraction()
|
||||
cycleTime = mocapData.getCycleTime()
|
||||
cycleCount = mocapData.calcCycleCount(t, cycleTime)
|
||||
|
||||
|
||||
#print("cycleTime=",cycleTime)
|
||||
#print("cycleCount=",cycleCount)
|
||||
|
||||
|
||||
#print("cycles=",cycles)
|
||||
frameTime = t - cycleCount*cycleTime
|
||||
frameTime = t - cycleCount * cycleTime
|
||||
#print("frameTime=",frameTime)
|
||||
if (frameTime<0):
|
||||
if (frameTime < 0):
|
||||
frameTime += cycleTime
|
||||
|
||||
frame = int(frameTime/keyFrameDuration)
|
||||
frameNext = frame+1
|
||||
if (frameNext >= mocapData.NumFrames()):
|
||||
frame = int(frameTime / keyFrameDuration)
|
||||
frameNext = frame + 1
|
||||
if (frameNext >= mocapData.NumFrames()):
|
||||
frameNext = frame
|
||||
frameFraction = (frameTime - frame*keyFrameDuration)/(keyFrameDuration)
|
||||
frameFraction = (frameTime - frame * keyFrameDuration) / (keyFrameDuration)
|
||||
#print("frame=",frame)
|
||||
#print("frameFraction=",frameFraction)
|
||||
frameData = mocapData._motion_data['Frames'][frame]
|
||||
frameDataNext = mocapData._motion_data['Frames'][frameNext]
|
||||
|
||||
jointsStr,qdot=qpi.Slerp(frameFraction, frameData, frameDataNext, p)
|
||||
|
||||
|
||||
jointsStr, qdot = qpi.Slerp(frameFraction, frameData, frameDataNext, p)
|
||||
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
print("jointIds=",jointIds)
|
||||
print("jointIds=", jointIds)
|
||||
|
||||
if useConstraints:
|
||||
for j in range (12):
|
||||
for j in range(12):
|
||||
#skip the base positional dofs
|
||||
targetPos = float(jointsStr[j+7])
|
||||
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
|
||||
|
||||
targetPos = float(jointsStr[j + 7])
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[j],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[j] * targetPos + jointOffsets[j],
|
||||
force=maxForce)
|
||||
|
||||
else:
|
||||
desiredPositions=[]
|
||||
for j in range (7):
|
||||
desiredPositions = []
|
||||
for j in range(7):
|
||||
targetPosUnmodified = float(jointsStr[j])
|
||||
desiredPositions.append(targetPosUnmodified)
|
||||
for j in range (12):
|
||||
targetPosUnmodified = float(jointsStr[j+7])
|
||||
targetPos=jointDirections[j]*targetPosUnmodified+jointOffsets[j]
|
||||
for j in range(12):
|
||||
targetPosUnmodified = float(jointsStr[j + 7])
|
||||
targetPos = jointDirections[j] * targetPosUnmodified + jointOffsets[j]
|
||||
desiredPositions.append(targetPos)
|
||||
numBaseDofs=6
|
||||
totalDofs=12+numBaseDofs
|
||||
desiredVelocities=None
|
||||
if desiredVelocities==None:
|
||||
desiredVelocities = [0]*totalDofs
|
||||
taus = stablePD.computePD(bodyUniqueId=quadruped,
|
||||
jointIndices = jointIds,
|
||||
desiredPositions = desiredPositions,
|
||||
desiredVelocities = desiredVelocities,
|
||||
kps = [4000]*totalDofs,
|
||||
kds = [40]*totalDofs,
|
||||
maxForces = [500]*totalDofs,
|
||||
timeStep=timeStep)
|
||||
|
||||
dofIndex=6
|
||||
numBaseDofs = 6
|
||||
totalDofs = 12 + numBaseDofs
|
||||
desiredVelocities = None
|
||||
if desiredVelocities == None:
|
||||
desiredVelocities = [0] * totalDofs
|
||||
taus = stablePD.computePD(bodyUniqueId=quadruped,
|
||||
jointIndices=jointIds,
|
||||
desiredPositions=desiredPositions,
|
||||
desiredVelocities=desiredVelocities,
|
||||
kps=[4000] * totalDofs,
|
||||
kds=[40] * totalDofs,
|
||||
maxForces=[500] * totalDofs,
|
||||
timeStep=timeStep)
|
||||
|
||||
dofIndex = 6
|
||||
scaling = 1
|
||||
for index in range (len(jointIds)):
|
||||
for index in range(len(jointIds)):
|
||||
jointIndex = jointIds[index]
|
||||
force=[scaling*taus[dofIndex]]
|
||||
print("force[", jointIndex,"]=",force)
|
||||
p.setJointMotorControlMultiDof(quadruped, jointIndex, controlMode=p.TORQUE_CONTROL, force=force)
|
||||
dofIndex+=1
|
||||
|
||||
force = [scaling * taus[dofIndex]]
|
||||
print("force[", jointIndex, "]=", force)
|
||||
p.setJointMotorControlMultiDof(quadruped,
|
||||
jointIndex,
|
||||
controlMode=p.TORQUE_CONTROL,
|
||||
force=force)
|
||||
dofIndex += 1
|
||||
|
||||
p.stepSimulation()
|
||||
t+=timeStep
|
||||
t += timeStep
|
||||
time.sleep(timeStep)
|
||||
|
||||
useOrgData=False
|
||||
useOrgData = False
|
||||
if useOrgData:
|
||||
with open("data1.txt","r") as filestream:
|
||||
for line in filestream:
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
currentline = line.split(",")
|
||||
frame = currentline[0]
|
||||
t = currentline[1]
|
||||
joints=currentline[2:14]
|
||||
for j in range (12):
|
||||
targetPos = float(joints[j])
|
||||
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
|
||||
p.stepSimulation()
|
||||
for lower_leg in lower_legs:
|
||||
#print("points for ", quadruped, " link: ", lower_leg)
|
||||
pts = p.getContactPoints(quadruped,-1, lower_leg)
|
||||
#print("num points=",len(pts))
|
||||
#for pt in pts:
|
||||
# print(pt[9])
|
||||
time.sleep(1./500.)
|
||||
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
js = p.getJointState(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"),-4,4,(js[0]-jointOffsets[j])/jointDirections[j]))
|
||||
with open("data1.txt", "r") as filestream:
|
||||
for line in filestream:
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
currentline = line.split(",")
|
||||
frame = currentline[0]
|
||||
t = currentline[1]
|
||||
joints = currentline[2:14]
|
||||
for j in range(12):
|
||||
targetPos = float(joints[j])
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[j],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[j] * targetPos + jointOffsets[j],
|
||||
force=maxForce)
|
||||
p.stepSimulation()
|
||||
for lower_leg in lower_legs:
|
||||
#print("points for ", quadruped, " link: ", lower_leg)
|
||||
pts = p.getContactPoints(quadruped, -1, lower_leg)
|
||||
#print("num points=",len(pts))
|
||||
#for pt in pts:
|
||||
# print(pt[9])
|
||||
time.sleep(1. / 500.)
|
||||
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
js = p.getJointState(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
paramIds.append(
|
||||
p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
|
||||
(js[0] - jointOffsets[j]) / jointDirections[j]))
|
||||
|
||||
p.setRealTimeSimulation(1)
|
||||
|
||||
while (1):
|
||||
|
||||
for i in range(len(paramIds)):
|
||||
c = paramIds[i]
|
||||
targetPos = p.readUserDebugParameter(c)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
p.setJointMotorControl2(quadruped,jointIds[i],p.POSITION_CONTROL,jointDirections[i]*targetPos+jointOffsets[i], force=maxForce)
|
||||
|
||||
|
||||
for i in range(len(paramIds)):
|
||||
c = paramIds[i]
|
||||
targetPos = p.readUserDebugParameter(c)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[i],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[i] * targetPos + jointOffsets[i],
|
||||
force=maxForce)
|
||||
|
||||
@@ -5,17 +5,18 @@ import pybullet_data
|
||||
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
|
||||
def build_agent(world, id, file):
|
||||
agent = None
|
||||
with open(pybullet_data.getDataPath()+"/"+file) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
|
||||
if (agent_type == PPOAgent.NAME):
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
else:
|
||||
assert False, 'Unsupported agent type: ' + agent_type
|
||||
|
||||
return agent
|
||||
def build_agent(world, id, file):
|
||||
agent = None
|
||||
with open(pybullet_data.getDataPath() + "/" + file) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
|
||||
if (agent_type == PPOAgent.NAME):
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
else:
|
||||
assert False, 'Unsupported agent type: ' + agent_type
|
||||
|
||||
return agent
|
||||
|
||||
@@ -2,53 +2,54 @@ import json
|
||||
import numpy as np
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
|
||||
|
||||
class ExpParams(object):
|
||||
RATE_KEY = 'Rate'
|
||||
INIT_ACTION_RATE_KEY = 'InitActionRate'
|
||||
NOISE_KEY = 'Noise'
|
||||
NOISE_INTERNAL_KEY = 'NoiseInternal'
|
||||
TEMP_KEY = 'Temp'
|
||||
RATE_KEY = 'Rate'
|
||||
INIT_ACTION_RATE_KEY = 'InitActionRate'
|
||||
NOISE_KEY = 'Noise'
|
||||
NOISE_INTERNAL_KEY = 'NoiseInternal'
|
||||
TEMP_KEY = 'Temp'
|
||||
|
||||
def __init__(self):
|
||||
self.rate = 0.2
|
||||
self.init_action_rate = 0
|
||||
self.noise = 0.1
|
||||
self.noise_internal = 0
|
||||
self.temp = 0.1
|
||||
return
|
||||
def __init__(self):
|
||||
self.rate = 0.2
|
||||
self.init_action_rate = 0
|
||||
self.noise = 0.1
|
||||
self.noise_internal = 0
|
||||
self.temp = 0.1
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
str = ''
|
||||
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
|
||||
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
|
||||
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
|
||||
return str
|
||||
def __str__(self):
|
||||
str = ''
|
||||
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
|
||||
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
|
||||
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
|
||||
return str
|
||||
|
||||
def load(self, json_data):
|
||||
if (self.RATE_KEY in json_data):
|
||||
self.rate = json_data[self.RATE_KEY]
|
||||
def load(self, json_data):
|
||||
if (self.RATE_KEY in json_data):
|
||||
self.rate = json_data[self.RATE_KEY]
|
||||
|
||||
if (self.INIT_ACTION_RATE_KEY in json_data):
|
||||
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
|
||||
if (self.INIT_ACTION_RATE_KEY in json_data):
|
||||
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
|
||||
|
||||
if (self.NOISE_KEY in json_data):
|
||||
self.noise = json_data[self.NOISE_KEY]
|
||||
if (self.NOISE_KEY in json_data):
|
||||
self.noise = json_data[self.NOISE_KEY]
|
||||
|
||||
if (self.NOISE_INTERNAL_KEY in json_data):
|
||||
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
|
||||
if (self.NOISE_INTERNAL_KEY in json_data):
|
||||
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
|
||||
|
||||
if (self.TEMP_KEY in json_data):
|
||||
self.temp = json_data[self.TEMP_KEY]
|
||||
if (self.TEMP_KEY in json_data):
|
||||
self.temp = json_data[self.TEMP_KEY]
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
def lerp(self, other, t):
|
||||
lerp_params = ExpParams()
|
||||
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
|
||||
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
|
||||
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
|
||||
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
|
||||
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
|
||||
return lerp_params
|
||||
def lerp(self, other, t):
|
||||
lerp_params = ExpParams()
|
||||
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
|
||||
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
|
||||
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
|
||||
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
|
||||
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
|
||||
return lerp_params
|
||||
|
||||
@@ -1 +1 @@
|
||||
from . import *
|
||||
from . import *
|
||||
|
||||
@@ -3,11 +3,12 @@ import pybullet_envs.deep_mimic.learning.tf_util as TFUtil
|
||||
|
||||
NAME = "fc_2layers_1024units"
|
||||
|
||||
def build_net(input_tfs, reuse=False):
|
||||
layers = [1024, 512]
|
||||
activation = tf.nn.relu
|
||||
|
||||
input_tf = tf.concat(axis=-1, values=input_tfs)
|
||||
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
|
||||
h = activation(h)
|
||||
return h
|
||||
def build_net(input_tfs, reuse=False):
|
||||
layers = [1024, 512]
|
||||
activation = tf.nn.relu
|
||||
|
||||
input_tf = tf.concat(axis=-1, values=input_tfs)
|
||||
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
|
||||
h = activation(h)
|
||||
return h
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import pybullet_envs.deep_mimic.learning.nets.fc_2layers_1024units as fc_2layers_1024units
|
||||
|
||||
def build_net(net_name, input_tfs, reuse=False):
|
||||
net = None
|
||||
|
||||
if (net_name == fc_2layers_1024units.NAME):
|
||||
net = fc_2layers_1024units.build_net(input_tfs, reuse)
|
||||
else:
|
||||
assert False, 'Unsupported net: ' + net_name
|
||||
|
||||
return net
|
||||
def build_net(net_name, input_tfs, reuse=False):
|
||||
net = None
|
||||
|
||||
if (net_name == fc_2layers_1024units.NAME):
|
||||
net = fc_2layers_1024units.build_net(input_tfs, reuse)
|
||||
else:
|
||||
assert False, 'Unsupported net: ' + net_name
|
||||
|
||||
return net
|
||||
|
||||
@@ -3,147 +3,149 @@ import copy
|
||||
import pybullet_utils.mpi_util as MPIUtil
|
||||
from pybullet_utils.logger import Logger
|
||||
|
||||
|
||||
class Normalizer(object):
|
||||
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
|
||||
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
|
||||
|
||||
# these group IDs must be the same as those in CharController.h
|
||||
NORM_GROUP_SINGLE = 0
|
||||
NORM_GROUP_NONE = -1
|
||||
# these group IDs must be the same as those in CharController.h
|
||||
NORM_GROUP_SINGLE = 0
|
||||
NORM_GROUP_NONE = -1
|
||||
|
||||
class Group(object):
|
||||
def __init__(self, id, indices):
|
||||
self.id = id
|
||||
self.indices = indices
|
||||
return
|
||||
class Group(object):
|
||||
|
||||
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.eps = eps
|
||||
self.clip = clip
|
||||
self.mean = np.zeros(size)
|
||||
self.mean_sq = np.zeros(size)
|
||||
self.std = np.ones(size)
|
||||
self.count = 0
|
||||
self.groups = self._build_groups(groups_ids)
|
||||
def __init__(self, id, indices):
|
||||
self.id = id
|
||||
self.indices = indices
|
||||
return
|
||||
|
||||
self.new_count = 0
|
||||
self.new_sum = np.zeros_like(self.mean)
|
||||
self.new_sum_sq = np.zeros_like(self.mean_sq)
|
||||
return
|
||||
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.eps = eps
|
||||
self.clip = clip
|
||||
self.mean = np.zeros(size)
|
||||
self.mean_sq = np.zeros(size)
|
||||
self.std = np.ones(size)
|
||||
self.count = 0
|
||||
self.groups = self._build_groups(groups_ids)
|
||||
|
||||
def record(self, x):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(x, np.ndarray)
|
||||
if not is_array:
|
||||
assert(size == 1)
|
||||
x = np.array([[x]])
|
||||
self.new_count = 0
|
||||
self.new_sum = np.zeros_like(self.mean)
|
||||
self.new_sum_sq = np.zeros_like(self.mean_sq)
|
||||
return
|
||||
|
||||
assert x.shape[-1] == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
|
||||
x = np.reshape(x, [-1, size])
|
||||
def record(self, x):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(x, np.ndarray)
|
||||
if not is_array:
|
||||
assert (size == 1)
|
||||
x = np.array([[x]])
|
||||
|
||||
self.new_count += x.shape[0]
|
||||
self.new_sum += np.sum(x, axis=0)
|
||||
self.new_sum_sq += np.sum(np.square(x), axis=0)
|
||||
return
|
||||
assert x.shape[-1] == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
|
||||
x = np.reshape(x, [-1, size])
|
||||
|
||||
def update(self):
|
||||
new_count = MPIUtil.reduce_sum(self.new_count)
|
||||
new_sum = MPIUtil.reduce_sum(self.new_sum)
|
||||
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
|
||||
self.new_count += x.shape[0]
|
||||
self.new_sum += np.sum(x, axis=0)
|
||||
self.new_sum_sq += np.sum(np.square(x), axis=0)
|
||||
return
|
||||
|
||||
new_total = self.count + new_count
|
||||
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
|
||||
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
|
||||
def update(self):
|
||||
new_count = MPIUtil.reduce_sum(self.new_count)
|
||||
new_sum = MPIUtil.reduce_sum(self.new_sum)
|
||||
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
|
||||
|
||||
if new_count > 0:
|
||||
new_mean = self._process_group_data(new_sum / new_count, self.mean)
|
||||
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
|
||||
w_old = float(self.count) / new_total
|
||||
w_new = float(new_count) / new_total
|
||||
new_total = self.count + new_count
|
||||
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
|
||||
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
|
||||
|
||||
self.mean = w_old * self.mean + w_new * new_mean
|
||||
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
|
||||
self.count = new_total
|
||||
self.std = self.calc_std(self.mean, self.mean_sq)
|
||||
if new_count > 0:
|
||||
new_mean = self._process_group_data(new_sum / new_count, self.mean)
|
||||
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
|
||||
w_old = float(self.count) / new_total
|
||||
w_new = float(new_count) / new_total
|
||||
|
||||
self.new_count = 0
|
||||
self.new_sum.fill(0)
|
||||
self.new_sum_sq.fill(0)
|
||||
self.mean = w_old * self.mean + w_new * new_mean
|
||||
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
|
||||
self.count = new_total
|
||||
self.std = self.calc_std(self.mean, self.mean_sq)
|
||||
|
||||
return
|
||||
self.new_count = 0
|
||||
self.new_sum.fill(0)
|
||||
self.new_sum_sq.fill(0)
|
||||
|
||||
def get_size(self):
|
||||
return self.mean.size
|
||||
return
|
||||
|
||||
def set_mean_std(self, mean, std):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
|
||||
|
||||
if not is_array:
|
||||
assert(size == 1)
|
||||
mean = np.array([mean])
|
||||
std = np.array([std])
|
||||
def get_size(self):
|
||||
return self.mean.size
|
||||
|
||||
assert len(mean) == size and len(std) == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
|
||||
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
def set_mean_std(self, mean, std):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
|
||||
|
||||
def normalize(self, x):
|
||||
norm_x = (x - self.mean) / self.std
|
||||
norm_x = np.clip(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
if not is_array:
|
||||
assert (size == 1)
|
||||
mean = np.array([mean])
|
||||
std = np.array([std])
|
||||
|
||||
def unnormalize(self, norm_x):
|
||||
x = norm_x * self.std + self.mean
|
||||
return x
|
||||
assert len(mean) == size and len(std) == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
|
||||
|
||||
def calc_std(self, mean, mean_sq):
|
||||
var = mean_sq - np.square(mean)
|
||||
# some time floating point errors can lead to small negative numbers
|
||||
var = np.maximum(var, 0)
|
||||
std = np.sqrt(var)
|
||||
std = np.maximum(std, self.eps)
|
||||
return std
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
|
||||
def calc_mean_sq(self, mean, std):
|
||||
return np.square(std) + np.square(self.mean)
|
||||
def normalize(self, x):
|
||||
norm_x = (x - self.mean) / self.std
|
||||
norm_x = np.clip(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if MPIUtil.is_root_proc():
|
||||
vars = np.concatenate([self.mean, self.mean_sq])
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = np.concatenate([self.mean, self.mean_sq])
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
def unnormalize(self, norm_x):
|
||||
x = norm_x * self.std + self.mean
|
||||
return x
|
||||
|
||||
def _build_groups(self, groups_ids):
|
||||
groups = []
|
||||
if groups_ids is None:
|
||||
curr_id = self.NORM_GROUP_SINGLE
|
||||
curr_list = np.arange(self.get_size()).astype(np.int32)
|
||||
groups.append(self.Group(curr_id, curr_list))
|
||||
else:
|
||||
ids = np.unique(groups_ids)
|
||||
for id in ids:
|
||||
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
|
||||
groups.append(self.Group(id, curr_list))
|
||||
def calc_std(self, mean, mean_sq):
|
||||
var = mean_sq - np.square(mean)
|
||||
# some time floating point errors can lead to small negative numbers
|
||||
var = np.maximum(var, 0)
|
||||
std = np.sqrt(var)
|
||||
std = np.maximum(std, self.eps)
|
||||
return std
|
||||
|
||||
return groups
|
||||
def calc_mean_sq(self, mean, std):
|
||||
return np.square(std) + np.square(self.mean)
|
||||
|
||||
def _process_group_data(self, new_data, old_data):
|
||||
proc_data = new_data.copy()
|
||||
for group in self.groups:
|
||||
if group.id == self.NORM_GROUP_NONE:
|
||||
proc_data[group.indices] = old_data[group.indices]
|
||||
elif group.id != self.NORM_GROUP_SINGLE:
|
||||
avg = np.mean(new_data[group.indices])
|
||||
proc_data[group.indices] = avg
|
||||
return proc_data
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if MPIUtil.is_root_proc():
|
||||
vars = np.concatenate([self.mean, self.mean_sq])
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = np.concatenate([self.mean, self.mean_sq])
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
|
||||
def _build_groups(self, groups_ids):
|
||||
groups = []
|
||||
if groups_ids is None:
|
||||
curr_id = self.NORM_GROUP_SINGLE
|
||||
curr_list = np.arange(self.get_size()).astype(np.int32)
|
||||
groups.append(self.Group(curr_id, curr_list))
|
||||
else:
|
||||
ids = np.unique(groups_ids)
|
||||
for id in ids:
|
||||
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
|
||||
groups.append(self.Group(id, curr_list))
|
||||
|
||||
return groups
|
||||
|
||||
def _process_group_data(self, new_data, old_data):
|
||||
proc_data = new_data.copy()
|
||||
for group in self.groups:
|
||||
if group.id == self.NORM_GROUP_NONE:
|
||||
proc_data[group.indices] = old_data[group.indices]
|
||||
elif group.id != self.NORM_GROUP_SINGLE:
|
||||
avg = np.mean(new_data[group.indices])
|
||||
proc_data[group.indices] = avg
|
||||
return proc_data
|
||||
|
||||
@@ -1,46 +1,47 @@
|
||||
import numpy as np
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
|
||||
class Path(object):
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def pathlength(self):
|
||||
return len(self.actions)
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def is_valid(self):
|
||||
valid = True
|
||||
l = self.pathlength()
|
||||
valid &= len(self.states) == l + 1
|
||||
valid &= len(self.goals) == l + 1
|
||||
valid &= len(self.actions) == l
|
||||
valid &= len(self.logps) == l
|
||||
valid &= len(self.rewards) == l
|
||||
valid &= len(self.flags) == l
|
||||
def pathlength(self):
|
||||
return len(self.actions)
|
||||
|
||||
return valid
|
||||
def is_valid(self):
|
||||
valid = True
|
||||
l = self.pathlength()
|
||||
valid &= len(self.states) == l + 1
|
||||
valid &= len(self.goals) == l + 1
|
||||
valid &= len(self.actions) == l
|
||||
valid &= len(self.logps) == l
|
||||
valid &= len(self.rewards) == l
|
||||
valid &= len(self.flags) == l
|
||||
|
||||
def check_vals(self):
|
||||
for vals in [self.states, self.goals, self.actions, self.logps,
|
||||
self.rewards]:
|
||||
for v in vals:
|
||||
if not np.isfinite(v).all():
|
||||
return False
|
||||
return True
|
||||
return valid
|
||||
|
||||
def clear(self):
|
||||
self.states = []
|
||||
self.goals = []
|
||||
self.actions = []
|
||||
self.logps = []
|
||||
self.rewards = []
|
||||
self.flags = []
|
||||
self.terminate = Env.Terminate.Null
|
||||
return
|
||||
def check_vals(self):
|
||||
for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
|
||||
for v in vals:
|
||||
if not np.isfinite(v).all():
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_pathlen(self):
|
||||
return len(self.rewards)
|
||||
def clear(self):
|
||||
self.states = []
|
||||
self.goals = []
|
||||
self.actions = []
|
||||
self.logps = []
|
||||
self.rewards = []
|
||||
self.flags = []
|
||||
self.terminate = Env.Terminate.Null
|
||||
return
|
||||
|
||||
def calc_return(self):
|
||||
return sum(self.rewards)
|
||||
def get_pathlen(self):
|
||||
return len(self.rewards)
|
||||
|
||||
def calc_return(self):
|
||||
return sum(self.rewards)
|
||||
|
||||
@@ -13,341 +13,343 @@ import pybullet_utils.mpi_util as MPIUtil
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
'''
|
||||
Policy Gradient Agent
|
||||
'''
|
||||
|
||||
|
||||
class PGAgent(TFAgent):
|
||||
NAME = 'PG'
|
||||
NAME = 'PG'
|
||||
|
||||
ACTOR_NET_KEY = 'ActorNet'
|
||||
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
|
||||
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
|
||||
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
|
||||
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
|
||||
ACTOR_NET_KEY = 'ActorNet'
|
||||
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
|
||||
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
|
||||
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
|
||||
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
|
||||
|
||||
CRITIC_NET_KEY = 'CriticNet'
|
||||
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
|
||||
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
|
||||
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
|
||||
|
||||
EXP_ACTION_FLAG = 1 << 0
|
||||
CRITIC_NET_KEY = 'CriticNet'
|
||||
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
|
||||
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
|
||||
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
self._exp_action = False
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
EXP_ACTION_FLAG = 1 << 0
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self._exp_action = False
|
||||
return
|
||||
def __init__(self, world, id, json_data):
|
||||
self._exp_action = False
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
|
||||
def _check_action_space(self):
|
||||
action_space = self.get_action_space()
|
||||
return action_space == ActionSpace.Continuous
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self._exp_action = False
|
||||
return
|
||||
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
|
||||
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
|
||||
return
|
||||
def _check_action_space(self):
|
||||
action_space = self.get_action_space()
|
||||
return action_space == ActionSpace.Continuous
|
||||
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
|
||||
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
|
||||
return
|
||||
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
|
||||
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
|
||||
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
|
||||
if (self.actor_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
|
||||
self.g_tf = tf.placeholder(tf.float32,
|
||||
shape=([None, g_size] if self.has_goal() else None),
|
||||
name="g") # goals
|
||||
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
return
|
||||
if (self.actor_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
|
||||
def _build_normalizers(self):
|
||||
super()._build_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
|
||||
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
|
||||
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
|
||||
return
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
def _init_normalizers(self):
|
||||
super()._init_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.val_norm.update()
|
||||
return
|
||||
return
|
||||
|
||||
def _load_normalizers(self):
|
||||
super()._load_normalizers()
|
||||
self.val_norm.load()
|
||||
return
|
||||
def _build_normalizers(self):
|
||||
super()._build_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
|
||||
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
|
||||
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
|
||||
return
|
||||
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
def _init_normalizers(self):
|
||||
super()._init_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.val_norm.update()
|
||||
return
|
||||
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
def _load_normalizers(self):
|
||||
super()._load_normalizers()
|
||||
self.val_norm.load()
|
||||
return
|
||||
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
|
||||
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (
|
||||
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (
|
||||
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
|
||||
self.actor_loss_tf *= self.adv_tf
|
||||
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
|
||||
self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
a_bound_loss /= self.exp_params_curr.noise
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
return
|
||||
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
|
||||
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
|
||||
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
|
||||
self.actor_loss_tf *= self.adv_tf
|
||||
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
|
||||
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
a_bound_loss /= self.exp_params_curr.noise
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
|
||||
return
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
def _build_net_actor(self, net_name, init_output_scale):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
|
||||
kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
|
||||
|
||||
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
|
||||
return a_tf
|
||||
|
||||
def _build_net_critic(self, net_name):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
|
||||
kernel_initializer=TFUtil.xavier_initializer);
|
||||
return
|
||||
|
||||
norm_val_tf = tf.reshape(norm_val_tf, [-1])
|
||||
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
|
||||
return val_tf
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (
|
||||
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (
|
||||
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (
|
||||
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (
|
||||
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
def _initialize_vars(self):
|
||||
super()._initialize_vars()
|
||||
self._sync_solvers()
|
||||
return
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
|
||||
momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
|
||||
def _sync_solvers(self):
|
||||
self.actor_solver.sync()
|
||||
self.critic_solver.sync()
|
||||
return
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = False
|
||||
a = self._eval_actor(s, g)[0]
|
||||
logp = 0
|
||||
return
|
||||
|
||||
if self._enable_stoch_policy():
|
||||
# epsilon-greedy
|
||||
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
if rand_action:
|
||||
norm_exp_noise = np.random.randn(*a.shape)
|
||||
norm_exp_noise *= self.exp_params_curr.noise
|
||||
exp_noise = norm_exp_noise * self.a_norm.std
|
||||
a += exp_noise
|
||||
def _build_net_actor(self, net_name, init_output_scale):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
logp = self._calc_action_logp(norm_exp_noise)
|
||||
self._exp_action = True
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_a_tf = tf.layers.dense(inputs=h,
|
||||
units=self.get_action_size(),
|
||||
activation=None,
|
||||
kernel_initializer=tf.random_uniform_initializer(
|
||||
minval=-init_output_scale, maxval=init_output_scale))
|
||||
|
||||
return a, logp
|
||||
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
|
||||
return a_tf
|
||||
|
||||
def _enable_stoch_policy(self):
|
||||
return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
|
||||
def _build_net_critic(self, net_name):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
def _eval_actor(self, s, g):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g
|
||||
}
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_val_tf = tf.layers.dense(inputs=h,
|
||||
units=1,
|
||||
activation=None,
|
||||
kernel_initializer=TFUtil.xavier_initializer)
|
||||
|
||||
a = self.actor_tf.eval(feed)
|
||||
return a
|
||||
|
||||
def _eval_critic(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
norm_val_tf = tf.reshape(norm_val_tf, [-1])
|
||||
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
|
||||
return val_tf
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g
|
||||
}
|
||||
def _initialize_vars(self):
|
||||
super()._initialize_vars()
|
||||
self._sync_solvers()
|
||||
return
|
||||
|
||||
val = self.critic_tf.eval(feed)
|
||||
return val
|
||||
def _sync_solvers(self):
|
||||
self.actor_solver.sync()
|
||||
self.critic_solver.sync()
|
||||
return
|
||||
|
||||
def _record_flags(self):
|
||||
flags = int(0)
|
||||
if (self._exp_action):
|
||||
flags = flags | self.EXP_ACTION_FLAG
|
||||
return flags
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = False
|
||||
a = self._eval_actor(s, g)[0]
|
||||
logp = 0
|
||||
|
||||
def _train_step(self):
|
||||
super()._train_step()
|
||||
if self._enable_stoch_policy():
|
||||
# epsilon-greedy
|
||||
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
if rand_action:
|
||||
norm_exp_noise = np.random.randn(*a.shape)
|
||||
norm_exp_noise *= self.exp_params_curr.noise
|
||||
exp_noise = norm_exp_noise * self.a_norm.std
|
||||
a += exp_noise
|
||||
|
||||
critic_loss = self._update_critic()
|
||||
actor_loss = self._update_actor()
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
logp = self._calc_action_logp(norm_exp_noise)
|
||||
self._exp_action = True
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
return a, logp
|
||||
|
||||
return
|
||||
def _enable_stoch_policy(self):
|
||||
return self.enable_training and (self._mode == self.Mode.TRAIN or
|
||||
self._mode == self.Mode.TRAIN_END)
|
||||
|
||||
def _update_critic(self):
|
||||
idx = self.replay_buffer.sample(self._local_mini_batch_size)
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
|
||||
|
||||
tar_V = self._calc_updated_vals(idx)
|
||||
tar_V = np.clip(tar_V, self.val_min, self.val_max)
|
||||
def _eval_actor(self, s, g):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.tar_val_tf: tar_V
|
||||
}
|
||||
feed = {self.s_tf: s, self.g_tf: g}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self):
|
||||
key = self.EXP_ACTION_FLAG
|
||||
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
|
||||
has_goal = self.has_goal()
|
||||
a = self.actor_tf.eval(feed)
|
||||
return a
|
||||
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if has_goal else None
|
||||
a = self.replay_buffer.get('actions', idx)
|
||||
def _eval_critic(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
V_new = self._calc_updated_vals(idx)
|
||||
V_old = self._eval_critic(s, g)
|
||||
adv = V_new - V_old
|
||||
feed = {self.s_tf: s, self.g_tf: g}
|
||||
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.a_tf: a,
|
||||
self.adv_tf: adv
|
||||
}
|
||||
val = self.critic_tf.eval(feed)
|
||||
return val
|
||||
|
||||
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
def _record_flags(self):
|
||||
flags = int(0)
|
||||
if (self._exp_action):
|
||||
flags = flags | self.EXP_ACTION_FLAG
|
||||
return flags
|
||||
|
||||
return loss
|
||||
def _train_step(self):
|
||||
super()._train_step()
|
||||
|
||||
def _calc_updated_vals(self, idx):
|
||||
r = self.replay_buffer.get('rewards', idx)
|
||||
critic_loss = self._update_critic()
|
||||
actor_loss = self._update_actor()
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
|
||||
if self.discount == 0:
|
||||
new_V = r
|
||||
else:
|
||||
next_idx = self.replay_buffer.get_next_idx(idx)
|
||||
s_next = self.replay_buffer.get('states', next_idx)
|
||||
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
|
||||
V_next = self._eval_critic(s_next, g_next)
|
||||
V_next[is_fail] = self.val_fail
|
||||
V_next[is_succ] = self.val_succ
|
||||
return
|
||||
|
||||
new_V = r + self.discount * V_next
|
||||
return new_V
|
||||
def _update_critic(self):
|
||||
idx = self.replay_buffer.sample(self._local_mini_batch_size)
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
|
||||
|
||||
def _calc_action_logp(self, norm_action_deltas):
|
||||
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
|
||||
stdev = self.exp_params_curr.noise
|
||||
assert stdev > 0
|
||||
tar_V = self._calc_updated_vals(idx)
|
||||
tar_V = np.clip(tar_V, self.val_min, self.val_max)
|
||||
|
||||
a_size = self.get_action_size()
|
||||
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
|
||||
logp += -0.5 * a_size * np.log(2 * np.pi)
|
||||
logp += -a_size * np.log(stdev)
|
||||
return logp
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_V}
|
||||
|
||||
def _log_val(self, s, g):
|
||||
val = self._eval_critic(s, g)
|
||||
norm_val = self.val_norm.normalize(val)
|
||||
self.world.env.log_val(self.id, norm_val[0])
|
||||
return
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _build_replay_buffer(self, buffer_size):
|
||||
super()._build_replay_buffer(buffer_size)
|
||||
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
|
||||
return
|
||||
def _update_actor(self):
|
||||
key = self.EXP_ACTION_FLAG
|
||||
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
|
||||
has_goal = self.has_goal()
|
||||
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if has_goal else None
|
||||
a = self.replay_buffer.get('actions', idx)
|
||||
|
||||
V_new = self._calc_updated_vals(idx)
|
||||
V_old = self._eval_critic(s, g)
|
||||
adv = V_new - V_old
|
||||
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv}
|
||||
|
||||
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss
|
||||
|
||||
def _calc_updated_vals(self, idx):
|
||||
r = self.replay_buffer.get('rewards', idx)
|
||||
|
||||
if self.discount == 0:
|
||||
new_V = r
|
||||
else:
|
||||
next_idx = self.replay_buffer.get_next_idx(idx)
|
||||
s_next = self.replay_buffer.get('states', next_idx)
|
||||
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
|
||||
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
|
||||
V_next = self._eval_critic(s_next, g_next)
|
||||
V_next[is_fail] = self.val_fail
|
||||
V_next[is_succ] = self.val_succ
|
||||
|
||||
new_V = r + self.discount * V_next
|
||||
return new_V
|
||||
|
||||
def _calc_action_logp(self, norm_action_deltas):
|
||||
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
|
||||
stdev = self.exp_params_curr.noise
|
||||
assert stdev > 0
|
||||
|
||||
a_size = self.get_action_size()
|
||||
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
|
||||
logp += -0.5 * a_size * np.log(2 * np.pi)
|
||||
logp += -a_size * np.log(stdev)
|
||||
return logp
|
||||
|
||||
def _log_val(self, s, g):
|
||||
val = self._eval_critic(s, g)
|
||||
norm_val = self.val_norm.normalize(val)
|
||||
self.world.env.log_val(self.id, norm_val[0])
|
||||
return
|
||||
|
||||
def _build_replay_buffer(self, buffer_size):
|
||||
super()._build_replay_buffer(buffer_size)
|
||||
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
|
||||
return
|
||||
|
||||
@@ -10,359 +10,374 @@ from pybullet_utils.logger import Logger
|
||||
import pybullet_utils.mpi_util as MPIUtil
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
'''
|
||||
Proximal Policy Optimization Agent
|
||||
'''
|
||||
|
||||
|
||||
class PPOAgent(PGAgent):
|
||||
NAME = "PPO"
|
||||
EPOCHS_KEY = "Epochs"
|
||||
BATCH_SIZE_KEY = "BatchSize"
|
||||
RATIO_CLIP_KEY = "RatioClip"
|
||||
NORM_ADV_CLIP_KEY = "NormAdvClip"
|
||||
TD_LAMBDA_KEY = "TDLambda"
|
||||
TAR_CLIP_FRAC = "TarClipFrac"
|
||||
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
|
||||
NAME = "PPO"
|
||||
EPOCHS_KEY = "Epochs"
|
||||
BATCH_SIZE_KEY = "BatchSize"
|
||||
RATIO_CLIP_KEY = "RatioClip"
|
||||
NORM_ADV_CLIP_KEY = "NormAdvClip"
|
||||
TD_LAMBDA_KEY = "TDLambda"
|
||||
TAR_CLIP_FRAC = "TarClipFrac"
|
||||
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
def __init__(self, world, id, json_data):
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
|
||||
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
|
||||
self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
|
||||
self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
|
||||
self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
|
||||
self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
|
||||
self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
|
||||
self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
|
||||
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
|
||||
self.batch_size = 1024 if (
|
||||
self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
|
||||
self.ratio_clip = 0.2 if (
|
||||
self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
|
||||
self.norm_adv_clip = 5 if (
|
||||
self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
|
||||
self.td_lambda = 0.95 if (
|
||||
self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
|
||||
self.tar_clip_frac = -1 if (
|
||||
self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
|
||||
self.actor_stepsize_decay = 0.5 if (
|
||||
self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
|
||||
|
||||
num_procs = MPIUtil.get_num_procs()
|
||||
local_batch_size = int(self.batch_size / num_procs)
|
||||
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
|
||||
assert(self.replay_buffer_size > min_replay_size)
|
||||
num_procs = MPIUtil.get_num_procs()
|
||||
local_batch_size = int(self.batch_size / num_procs)
|
||||
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
|
||||
assert (self.replay_buffer_size > min_replay_size)
|
||||
|
||||
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
|
||||
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
|
||||
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
|
||||
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
|
||||
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
|
||||
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
|
||||
self.g_tf = tf.placeholder(tf.float32,
|
||||
shape=([None, g_size] if self.has_goal() else None),
|
||||
name="g")
|
||||
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
|
||||
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
|
||||
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
if (self.a_mean_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
|
||||
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
|
||||
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
|
||||
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
|
||||
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
|
||||
if (self.a_mean_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
|
||||
return
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
|
||||
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
|
||||
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
|
||||
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
|
||||
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf,
|
||||
mean_tf=None,
|
||||
std_tf=self.norm_a_std_tf)
|
||||
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
|
||||
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
|
||||
return
|
||||
|
||||
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
|
||||
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
|
||||
actor_loss0 = self.adv_tf * ratio_tf
|
||||
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
|
||||
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (
|
||||
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (
|
||||
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
|
||||
self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
# for debugging
|
||||
self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
return
|
||||
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
|
||||
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
|
||||
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf,
|
||||
self.norm_a_std_tf)
|
||||
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
|
||||
actor_loss0 = self.adv_tf * ratio_tf
|
||||
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip,
|
||||
1 + self.ratio_clip)
|
||||
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
|
||||
|
||||
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
|
||||
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
|
||||
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
return
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
#print("_decide_action._exp_action=",self._exp_action)
|
||||
a, logp = self._eval_actor(s, g, self._exp_action)
|
||||
return a[0], logp[0]
|
||||
# for debugging
|
||||
self.clip_frac_tf = tf.reduce_mean(
|
||||
tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
|
||||
|
||||
def _eval_actor(self, s, g, enable_exp):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g,
|
||||
self.exp_mask_tf: np.array([1 if enable_exp else 0])
|
||||
}
|
||||
return
|
||||
|
||||
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
|
||||
return a, logp
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (
|
||||
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (
|
||||
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (
|
||||
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (
|
||||
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
def _train_step(self):
|
||||
adv_eps = 1e-5
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
|
||||
momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
|
||||
start_idx = self.replay_buffer.buffer_tail
|
||||
end_idx = self.replay_buffer.buffer_head
|
||||
assert(start_idx == 0)
|
||||
assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
|
||||
assert(start_idx < end_idx)
|
||||
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='actor_stepsize',
|
||||
initializer=actor_stepsize,
|
||||
trainable=False)
|
||||
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
|
||||
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
end_mask = self.replay_buffer.is_path_end(idx)
|
||||
end_mask = np.logical_not(end_mask)
|
||||
|
||||
vals = self._compute_batch_vals(start_idx, end_idx)
|
||||
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf,
|
||||
momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
valid_idx = idx[end_mask]
|
||||
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
|
||||
num_valid_idx = valid_idx.shape[0]
|
||||
num_exp_idx = exp_idx.shape[0]
|
||||
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
|
||||
|
||||
local_sample_count = valid_idx.size
|
||||
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
|
||||
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
|
||||
|
||||
adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
|
||||
new_vals = np.clip(new_vals, self.val_min, self.val_max)
|
||||
return
|
||||
|
||||
adv_mean = np.mean(adv)
|
||||
adv_std = np.std(adv)
|
||||
adv = (adv - adv_mean) / (adv_std + adv_eps)
|
||||
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(
|
||||
self.exp_params_curr.rate)
|
||||
#print("_decide_action._exp_action=",self._exp_action)
|
||||
a, logp = self._eval_actor(s, g, self._exp_action)
|
||||
return a[0], logp[0]
|
||||
|
||||
critic_loss = 0
|
||||
actor_loss = 0
|
||||
actor_clip_frac = 0
|
||||
def _eval_actor(self, s, g, enable_exp):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
for e in range(self.epochs):
|
||||
np.random.shuffle(valid_idx)
|
||||
np.random.shuffle(exp_idx)
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.exp_mask_tf: np.array([1 if enable_exp else 0])}
|
||||
|
||||
for b in range(mini_batches):
|
||||
batch_idx_beg = b * self._local_mini_batch_size
|
||||
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
|
||||
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
|
||||
return a, logp
|
||||
|
||||
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
|
||||
actor_batch = critic_batch.copy()
|
||||
critic_batch = np.mod(critic_batch, num_valid_idx)
|
||||
actor_batch = np.mod(actor_batch, num_exp_idx)
|
||||
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
|
||||
def _train_step(self):
|
||||
adv_eps = 1e-5
|
||||
|
||||
critic_batch = valid_idx[critic_batch]
|
||||
actor_batch = exp_idx[actor_batch]
|
||||
critic_batch_vals = new_vals[critic_batch]
|
||||
actor_batch_adv = adv[actor_batch[:,1]]
|
||||
start_idx = self.replay_buffer.buffer_tail
|
||||
end_idx = self.replay_buffer.buffer_head
|
||||
assert (start_idx == 0)
|
||||
assert (self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size
|
||||
) # must avoid overflow
|
||||
assert (start_idx < end_idx)
|
||||
|
||||
critic_s = self.replay_buffer.get('states', critic_batch)
|
||||
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
|
||||
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
end_mask = self.replay_buffer.is_path_end(idx)
|
||||
end_mask = np.logical_not(end_mask)
|
||||
|
||||
actor_s = self.replay_buffer.get("states", actor_batch[:,0])
|
||||
actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
|
||||
actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
|
||||
actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
|
||||
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
|
||||
|
||||
critic_loss += curr_critic_loss
|
||||
actor_loss += np.abs(curr_actor_loss)
|
||||
actor_clip_frac += curr_actor_clip_frac
|
||||
vals = self._compute_batch_vals(start_idx, end_idx)
|
||||
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
|
||||
|
||||
if (shuffle_actor):
|
||||
np.random.shuffle(exp_idx)
|
||||
valid_idx = idx[end_mask]
|
||||
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
|
||||
num_valid_idx = valid_idx.shape[0]
|
||||
num_exp_idx = exp_idx.shape[0]
|
||||
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
|
||||
|
||||
total_batches = mini_batches * self.epochs
|
||||
critic_loss /= total_batches
|
||||
actor_loss /= total_batches
|
||||
actor_clip_frac /= total_batches
|
||||
local_sample_count = valid_idx.size
|
||||
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
|
||||
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
|
||||
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
|
||||
adv = new_vals[exp_idx[:, 0]] - vals[exp_idx[:, 0]]
|
||||
new_vals = np.clip(new_vals, self.val_min, self.val_max)
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
|
||||
adv_mean = np.mean(adv)
|
||||
adv_std = np.std(adv)
|
||||
adv = (adv - adv_mean) / (adv_std + adv_eps)
|
||||
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
|
||||
self.logger.log_tabular('Adv_Mean', adv_mean)
|
||||
self.logger.log_tabular('Adv_Std', adv_std)
|
||||
critic_loss = 0
|
||||
actor_loss = 0
|
||||
actor_clip_frac = 0
|
||||
|
||||
self.replay_buffer.clear()
|
||||
for e in range(self.epochs):
|
||||
np.random.shuffle(valid_idx)
|
||||
np.random.shuffle(exp_idx)
|
||||
|
||||
return
|
||||
for b in range(mini_batches):
|
||||
batch_idx_beg = b * self._local_mini_batch_size
|
||||
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
|
||||
|
||||
def _get_iters_per_update(self):
|
||||
return 1
|
||||
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
|
||||
actor_batch = critic_batch.copy()
|
||||
critic_batch = np.mod(critic_batch, num_valid_idx)
|
||||
actor_batch = np.mod(actor_batch, num_exp_idx)
|
||||
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
|
||||
|
||||
def _valid_train_step(self):
|
||||
samples = self.replay_buffer.get_current_size()
|
||||
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
|
||||
global_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
|
||||
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
|
||||
critic_batch = valid_idx[critic_batch]
|
||||
actor_batch = exp_idx[actor_batch]
|
||||
critic_batch_vals = new_vals[critic_batch]
|
||||
actor_batch_adv = adv[actor_batch[:, 1]]
|
||||
|
||||
def _compute_batch_vals(self, start_idx, end_idx):
|
||||
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
|
||||
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
critic_s = self.replay_buffer.get('states', critic_batch)
|
||||
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
|
||||
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
|
||||
|
||||
vals = self._eval_critic(states, goals)
|
||||
vals[is_fail] = self.val_fail
|
||||
vals[is_succ] = self.val_succ
|
||||
actor_s = self.replay_buffer.get("states", actor_batch[:, 0])
|
||||
actor_g = self.replay_buffer.get("goals", actor_batch[:, 0]) if self.has_goal() else None
|
||||
actor_a = self.replay_buffer.get("actions", actor_batch[:, 0])
|
||||
actor_logp = self.replay_buffer.get("logps", actor_batch[:, 0])
|
||||
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a,
|
||||
actor_logp, actor_batch_adv)
|
||||
|
||||
return vals
|
||||
critic_loss += curr_critic_loss
|
||||
actor_loss += np.abs(curr_actor_loss)
|
||||
actor_clip_frac += curr_actor_clip_frac
|
||||
|
||||
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
|
||||
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
|
||||
if (shuffle_actor):
|
||||
np.random.shuffle(exp_idx)
|
||||
|
||||
if self.discount == 0:
|
||||
new_vals = rewards.copy()
|
||||
total_batches = mini_batches * self.epochs
|
||||
critic_loss /= total_batches
|
||||
actor_loss /= total_batches
|
||||
actor_clip_frac /= total_batches
|
||||
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
|
||||
self.logger.log_tabular('Adv_Mean', adv_mean)
|
||||
self.logger.log_tabular('Adv_Std', adv_std)
|
||||
|
||||
self.replay_buffer.clear()
|
||||
|
||||
return
|
||||
|
||||
def _get_iters_per_update(self):
|
||||
return 1
|
||||
|
||||
def _valid_train_step(self):
|
||||
samples = self.replay_buffer.get_current_size()
|
||||
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
|
||||
global_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
|
||||
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
|
||||
|
||||
def _compute_batch_vals(self, start_idx, end_idx):
|
||||
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
|
||||
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
|
||||
vals = self._eval_critic(states, goals)
|
||||
vals[is_fail] = self.val_fail
|
||||
vals[is_succ] = self.val_succ
|
||||
|
||||
return vals
|
||||
|
||||
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
|
||||
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
|
||||
|
||||
if self.discount == 0:
|
||||
new_vals = rewards.copy()
|
||||
else:
|
||||
new_vals = np.zeros_like(val_buffer)
|
||||
|
||||
curr_idx = start_idx
|
||||
while curr_idx < end_idx:
|
||||
idx0 = curr_idx - start_idx
|
||||
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
|
||||
r = rewards[idx0:idx1]
|
||||
v = val_buffer[idx0:(idx1 + 1)]
|
||||
|
||||
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
|
||||
curr_idx = idx1 + start_idx + 1
|
||||
|
||||
return new_vals
|
||||
|
||||
def _update_critic(self, s, g, tar_vals):
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_vals}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self, s, g, a, logp, adv):
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv, self.old_logp_tf: logp}
|
||||
|
||||
loss, grads, clip_frac = self.sess.run(
|
||||
[self.actor_loss_tf, self.actor_grad_tf, self.clip_frac_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss, clip_frac
|
||||
|
||||
def update_actor_stepsize(self, clip_frac):
|
||||
clip_tol = 1.5
|
||||
step_scale = 2
|
||||
max_stepsize = 1e-2
|
||||
min_stepsize = 1e-8
|
||||
warmup_iters = 5
|
||||
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
|
||||
min_clip = self.tar_clip_frac / clip_tol
|
||||
max_clip = self.tar_clip_frac * clip_tol
|
||||
under_tol = clip_frac < min_clip
|
||||
over_tol = clip_frac > max_clip
|
||||
|
||||
if (over_tol or under_tol):
|
||||
if (over_tol):
|
||||
actor_stepsize *= self.actor_stepsize_decay
|
||||
else:
|
||||
new_vals = np.zeros_like(val_buffer)
|
||||
actor_stepsize /= self.actor_stepsize_decay
|
||||
|
||||
curr_idx = start_idx
|
||||
while curr_idx < end_idx:
|
||||
idx0 = curr_idx - start_idx
|
||||
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
|
||||
r = rewards[idx0:idx1]
|
||||
v = val_buffer[idx0:(idx1 + 1)]
|
||||
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
|
||||
self.set_actor_stepsize(actor_stepsize)
|
||||
|
||||
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
|
||||
curr_idx = idx1 + start_idx + 1
|
||||
|
||||
return new_vals
|
||||
return actor_stepsize
|
||||
|
||||
def _update_critic(self, s, g, tar_vals):
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.tar_val_tf: tar_vals
|
||||
}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self, s, g, a, logp, adv):
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.a_tf: a,
|
||||
self.adv_tf: adv,
|
||||
self.old_logp_tf: logp
|
||||
}
|
||||
|
||||
loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
|
||||
self.clip_frac_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss, clip_frac
|
||||
|
||||
def update_actor_stepsize(self, clip_frac):
|
||||
clip_tol = 1.5
|
||||
step_scale = 2
|
||||
max_stepsize = 1e-2
|
||||
min_stepsize = 1e-8
|
||||
warmup_iters = 5
|
||||
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
|
||||
min_clip = self.tar_clip_frac / clip_tol
|
||||
max_clip = self.tar_clip_frac * clip_tol
|
||||
under_tol = clip_frac < min_clip
|
||||
over_tol = clip_frac > max_clip
|
||||
|
||||
if (over_tol or under_tol):
|
||||
if (over_tol):
|
||||
actor_stepsize *= self.actor_stepsize_decay
|
||||
else:
|
||||
actor_stepsize /= self.actor_stepsize_decay
|
||||
|
||||
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
|
||||
self.set_actor_stepsize(actor_stepsize)
|
||||
|
||||
return actor_stepsize
|
||||
|
||||
def set_actor_stepsize(self, stepsize):
|
||||
feed = {
|
||||
self._actor_stepsize_ph: stepsize,
|
||||
}
|
||||
self.sess.run(self._actor_stepsize_update_op, feed)
|
||||
return
|
||||
def set_actor_stepsize(self, stepsize):
|
||||
feed = {
|
||||
self._actor_stepsize_ph: stepsize,
|
||||
}
|
||||
self.sess.run(self._actor_stepsize_update_op, feed)
|
||||
return
|
||||
|
||||
@@ -5,347 +5,353 @@ import inspect as inspect
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
|
||||
|
||||
class ReplayBuffer(object):
|
||||
TERMINATE_KEY = 'terminate'
|
||||
PATH_START_KEY = 'path_start'
|
||||
PATH_END_KEY = 'path_end'
|
||||
TERMINATE_KEY = 'terminate'
|
||||
PATH_START_KEY = 'path_start'
|
||||
PATH_END_KEY = 'path_end'
|
||||
|
||||
def __init__(self, buffer_size):
|
||||
assert buffer_size > 0
|
||||
def __init__(self, buffer_size):
|
||||
assert buffer_size > 0
|
||||
|
||||
self.buffer_size = buffer_size
|
||||
self.total_count = 0
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
self._sample_buffers = dict()
|
||||
self.buffers = None
|
||||
self.buffer_size = buffer_size
|
||||
self.total_count = 0
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
self._sample_buffers = dict()
|
||||
self.buffers = None
|
||||
|
||||
self.clear()
|
||||
return
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
curr_size = self.get_current_size()
|
||||
assert curr_size > 0
|
||||
def sample(self, n):
|
||||
curr_size = self.get_current_size()
|
||||
assert curr_size > 0
|
||||
|
||||
idx = np.empty(n, dtype=int)
|
||||
# makes sure that the end states are not sampled
|
||||
for i in range(n):
|
||||
while True:
|
||||
curr_idx = np.random.randint(0, curr_size, size=1)[0]
|
||||
curr_idx += self.buffer_tail
|
||||
curr_idx = np.mod(curr_idx, self.buffer_size)
|
||||
idx = np.empty(n, dtype=int)
|
||||
# makes sure that the end states are not sampled
|
||||
for i in range(n):
|
||||
while True:
|
||||
curr_idx = np.random.randint(0, curr_size, size=1)[0]
|
||||
curr_idx += self.buffer_tail
|
||||
curr_idx = np.mod(curr_idx, self.buffer_size)
|
||||
|
||||
if not self.is_path_end(curr_idx):
|
||||
break
|
||||
idx[i] = curr_idx
|
||||
if not self.is_path_end(curr_idx):
|
||||
break
|
||||
idx[i] = curr_idx
|
||||
|
||||
return idx
|
||||
return idx
|
||||
|
||||
def sample_filtered(self, n, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.sample(n)
|
||||
return idx
|
||||
def sample_filtered(self, n, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.sample(n)
|
||||
return idx
|
||||
|
||||
def count_filtered(self, key):
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
return curr_buffer.count
|
||||
def count_filtered(self, key):
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
return curr_buffer.count
|
||||
|
||||
def get(self, key, idx):
|
||||
return self.buffers[key][idx]
|
||||
def get(self, key, idx):
|
||||
return self.buffers[key][idx]
|
||||
|
||||
def get_all(self, key):
|
||||
return self.buffers[key]
|
||||
def get_all(self, key):
|
||||
return self.buffers[key]
|
||||
|
||||
def get_idx_filtered(self, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
|
||||
return idx
|
||||
|
||||
def get_path_start(self, idx):
|
||||
return self.buffers[self.PATH_START_KEY][idx]
|
||||
def get_idx_filtered(self, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
|
||||
return idx
|
||||
|
||||
def get_path_end(self, idx):
|
||||
return self.buffers[self.PATH_END_KEY][idx]
|
||||
def get_path_start(self, idx):
|
||||
return self.buffers[self.PATH_START_KEY][idx]
|
||||
|
||||
def get_pathlen(self, idx):
|
||||
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
|
||||
if not is_array:
|
||||
idx = [idx]
|
||||
def get_path_end(self, idx):
|
||||
return self.buffers[self.PATH_END_KEY][idx]
|
||||
|
||||
n = len(idx)
|
||||
start_idx = self.get_path_start(idx)
|
||||
end_idx = self.get_path_end(idx)
|
||||
pathlen = np.empty(n, dtype=int)
|
||||
def get_pathlen(self, idx):
|
||||
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
|
||||
if not is_array:
|
||||
idx = [idx]
|
||||
|
||||
for i in range(n):
|
||||
curr_start = start_idx[i]
|
||||
curr_end = end_idx[i]
|
||||
if curr_start < curr_end:
|
||||
curr_len = curr_end - curr_start
|
||||
else:
|
||||
curr_len = self.buffer_size - curr_start + curr_end
|
||||
pathlen[i] = curr_len
|
||||
n = len(idx)
|
||||
start_idx = self.get_path_start(idx)
|
||||
end_idx = self.get_path_end(idx)
|
||||
pathlen = np.empty(n, dtype=int)
|
||||
|
||||
if not is_array:
|
||||
pathlen = pathlen[0]
|
||||
for i in range(n):
|
||||
curr_start = start_idx[i]
|
||||
curr_end = end_idx[i]
|
||||
if curr_start < curr_end:
|
||||
curr_len = curr_end - curr_start
|
||||
else:
|
||||
curr_len = self.buffer_size - curr_start + curr_end
|
||||
pathlen[i] = curr_len
|
||||
|
||||
return pathlen
|
||||
if not is_array:
|
||||
pathlen = pathlen[0]
|
||||
|
||||
def is_valid_path(self, idx):
|
||||
start_idx = self.get_path_start(idx)
|
||||
valid = start_idx != MathUtil.INVALID_IDX
|
||||
return valid
|
||||
return pathlen
|
||||
|
||||
def store(self, path):
|
||||
start_idx = MathUtil.INVALID_IDX
|
||||
n = path.pathlength()
|
||||
|
||||
if (n > 0):
|
||||
assert path.is_valid()
|
||||
def is_valid_path(self, idx):
|
||||
start_idx = self.get_path_start(idx)
|
||||
valid = start_idx != MathUtil.INVALID_IDX
|
||||
return valid
|
||||
|
||||
if path.check_vals():
|
||||
if self.buffers is None:
|
||||
self._init_buffers(path)
|
||||
def store(self, path):
|
||||
start_idx = MathUtil.INVALID_IDX
|
||||
n = path.pathlength()
|
||||
|
||||
idx = self._request_idx(n + 1)
|
||||
self._store_path(path, idx)
|
||||
self._add_sample_buffers(idx)
|
||||
if (n > 0):
|
||||
assert path.is_valid()
|
||||
|
||||
self.num_paths += 1
|
||||
self.total_count += n + 1
|
||||
start_idx = idx[0]
|
||||
else:
|
||||
Logger.print2('Invalid path data value detected')
|
||||
|
||||
return start_idx
|
||||
if path.check_vals():
|
||||
if self.buffers is None:
|
||||
self._init_buffers(path)
|
||||
|
||||
def clear(self):
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
idx = self._request_idx(n + 1)
|
||||
self._store_path(path, idx)
|
||||
self._add_sample_buffers(idx)
|
||||
|
||||
for key in self._sample_buffers:
|
||||
self._sample_buffers[key].clear()
|
||||
return
|
||||
self.num_paths += 1
|
||||
self.total_count += n + 1
|
||||
start_idx = idx[0]
|
||||
else:
|
||||
Logger.print2('Invalid path data value detected')
|
||||
|
||||
def get_next_idx(self, idx):
|
||||
next_idx = np.mod(idx + 1, self.buffer_size)
|
||||
return next_idx
|
||||
return start_idx
|
||||
|
||||
def is_terminal_state(self, idx):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags != Env.Terminate.Null.value
|
||||
is_end = self.is_path_end(idx)
|
||||
terminal_state = np.logical_and(terminate, is_end)
|
||||
return terminal_state
|
||||
def clear(self):
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
|
||||
def check_terminal_flag(self, idx, flag):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags == flag.value
|
||||
return terminate
|
||||
for key in self._sample_buffers:
|
||||
self._sample_buffers[key].clear()
|
||||
return
|
||||
|
||||
def is_path_end(self, idx):
|
||||
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
|
||||
return is_end
|
||||
def get_next_idx(self, idx):
|
||||
next_idx = np.mod(idx + 1, self.buffer_size)
|
||||
return next_idx
|
||||
|
||||
def add_filter_key(self, key):
|
||||
assert self.get_current_size() == 0
|
||||
if key not in self._sample_buffers:
|
||||
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
|
||||
return
|
||||
def is_terminal_state(self, idx):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags != Env.Terminate.Null.value
|
||||
is_end = self.is_path_end(idx)
|
||||
terminal_state = np.logical_and(terminate, is_end)
|
||||
return terminal_state
|
||||
|
||||
def get_current_size(self):
|
||||
if self.buffer_tail == MathUtil.INVALID_IDX:
|
||||
return 0
|
||||
elif self.buffer_tail < self.buffer_head:
|
||||
return self.buffer_head - self.buffer_tail
|
||||
def check_terminal_flag(self, idx, flag):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags == flag.value
|
||||
return terminate
|
||||
|
||||
def is_path_end(self, idx):
|
||||
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
|
||||
return is_end
|
||||
|
||||
def add_filter_key(self, key):
|
||||
assert self.get_current_size() == 0
|
||||
if key not in self._sample_buffers:
|
||||
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
|
||||
return
|
||||
|
||||
def get_current_size(self):
|
||||
if self.buffer_tail == MathUtil.INVALID_IDX:
|
||||
return 0
|
||||
elif self.buffer_tail < self.buffer_head:
|
||||
return self.buffer_head - self.buffer_tail
|
||||
else:
|
||||
return self.buffer_size - self.buffer_tail + self.buffer_head
|
||||
|
||||
def _check_flags(self, key, flags):
|
||||
return (flags & key) == key
|
||||
|
||||
def _add_sample_buffers(self, idx):
|
||||
flags = self.buffers['flags']
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
filter_idx = [
|
||||
i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))
|
||||
]
|
||||
curr_buffer.add(filter_idx)
|
||||
return
|
||||
|
||||
def _free_sample_buffers(self, idx):
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
curr_buffer.free(idx)
|
||||
return
|
||||
|
||||
def _init_buffers(self, path):
|
||||
self.buffers = dict()
|
||||
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
|
||||
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
|
||||
|
||||
for key in dir(path):
|
||||
val = getattr(path, key)
|
||||
if not key.startswith('__') and not inspect.ismethod(val):
|
||||
if key == self.TERMINATE_KEY:
|
||||
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
|
||||
else:
|
||||
return self.buffer_size - self.buffer_tail + self.buffer_head
|
||||
val_type = type(val[0])
|
||||
is_array = val_type == np.ndarray
|
||||
if is_array:
|
||||
shape = [self.buffer_size, val[0].shape[0]]
|
||||
dtype = val[0].dtype
|
||||
else:
|
||||
shape = [self.buffer_size]
|
||||
dtype = val_type
|
||||
|
||||
def _check_flags(self, key, flags):
|
||||
return (flags & key) == key
|
||||
self.buffers[key] = np.zeros(shape, dtype=dtype)
|
||||
return
|
||||
|
||||
def _add_sample_buffers(self, idx):
|
||||
flags = self.buffers['flags']
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
|
||||
curr_buffer.add(filter_idx)
|
||||
return
|
||||
def _request_idx(self, n):
|
||||
assert n + 1 < self.buffer_size # bad things can happen if path is too long
|
||||
|
||||
def _free_sample_buffers(self, idx):
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
curr_buffer.free(idx)
|
||||
return
|
||||
remainder = n
|
||||
idx = []
|
||||
|
||||
def _init_buffers(self, path):
|
||||
self.buffers = dict()
|
||||
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||
start_idx = self.buffer_head
|
||||
while remainder > 0:
|
||||
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
|
||||
remainder -= (end_idx - start_idx)
|
||||
|
||||
for key in dir(path):
|
||||
val = getattr(path, key)
|
||||
if not key.startswith('__') and not inspect.ismethod(val):
|
||||
if key == self.TERMINATE_KEY:
|
||||
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
|
||||
else:
|
||||
val_type = type(val[0])
|
||||
is_array = val_type == np.ndarray
|
||||
if is_array:
|
||||
shape = [self.buffer_size, val[0].shape[0]]
|
||||
dtype = val[0].dtype
|
||||
else:
|
||||
shape = [self.buffer_size]
|
||||
dtype = val_type
|
||||
|
||||
self.buffers[key] = np.zeros(shape, dtype=dtype)
|
||||
return
|
||||
free_idx = list(range(start_idx, end_idx))
|
||||
self._free_idx(free_idx)
|
||||
idx += free_idx
|
||||
start_idx = 0
|
||||
|
||||
def _request_idx(self, n):
|
||||
assert n + 1 < self.buffer_size # bad things can happen if path is too long
|
||||
self.buffer_head = (self.buffer_head + n) % self.buffer_size
|
||||
return idx
|
||||
|
||||
remainder = n
|
||||
idx = []
|
||||
def _free_idx(self, idx):
|
||||
assert (idx[0] <= idx[-1])
|
||||
n = len(idx)
|
||||
if self.buffer_tail != MathUtil.INVALID_IDX:
|
||||
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
|
||||
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or
|
||||
idx[-1] >= self.buffer_tail)
|
||||
|
||||
start_idx = self.buffer_head
|
||||
while remainder > 0:
|
||||
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
|
||||
remainder -= (end_idx - start_idx)
|
||||
if update_tail:
|
||||
i = 0
|
||||
while i < n:
|
||||
curr_idx = idx[i]
|
||||
if self.is_valid_path(curr_idx):
|
||||
start_idx = self.get_path_start(curr_idx)
|
||||
end_idx = self.get_path_end(curr_idx)
|
||||
pathlen = self.get_pathlen(curr_idx)
|
||||
|
||||
free_idx = list(range(start_idx, end_idx))
|
||||
self._free_idx(free_idx)
|
||||
idx += free_idx
|
||||
start_idx = 0
|
||||
if start_idx < end_idx:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
|
||||
else:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
|
||||
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
|
||||
self._free_sample_buffers(list(range(0, end_idx + 1)))
|
||||
|
||||
self.buffer_head = (self.buffer_head + n) % self.buffer_size
|
||||
return idx
|
||||
self.num_paths -= 1
|
||||
i += pathlen + 1
|
||||
self.buffer_tail = (end_idx + 1) % self.buffer_size
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
self.buffer_tail = idx[0]
|
||||
return
|
||||
|
||||
def _free_idx(self, idx):
|
||||
assert(idx[0] <= idx[-1])
|
||||
n = len(idx)
|
||||
if self.buffer_tail != MathUtil.INVALID_IDX:
|
||||
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
|
||||
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
|
||||
|
||||
if update_tail:
|
||||
i = 0
|
||||
while i < n:
|
||||
curr_idx = idx[i]
|
||||
if self.is_valid_path(curr_idx):
|
||||
start_idx = self.get_path_start(curr_idx)
|
||||
end_idx = self.get_path_end(curr_idx)
|
||||
pathlen = self.get_pathlen(curr_idx)
|
||||
def _store_path(self, path, idx):
|
||||
n = path.pathlength()
|
||||
for key, data in self.buffers.items():
|
||||
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
|
||||
val = getattr(path, key)
|
||||
val_len = len(val)
|
||||
assert val_len == n or val_len == n + 1
|
||||
data[idx[:val_len]] = val
|
||||
|
||||
if start_idx < end_idx:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
|
||||
else:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
|
||||
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
|
||||
self._free_sample_buffers(list(range(0, end_idx + 1)))
|
||||
|
||||
self.num_paths -= 1
|
||||
i += pathlen + 1
|
||||
self.buffer_tail = (end_idx + 1) % self.buffer_size;
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
self.buffer_tail = idx[0]
|
||||
return
|
||||
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
|
||||
self.buffers[self.PATH_START_KEY][idx] = idx[0]
|
||||
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
|
||||
return
|
||||
|
||||
def _store_path(self, path, idx):
|
||||
n = path.pathlength()
|
||||
for key, data in self.buffers.items():
|
||||
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
|
||||
val = getattr(path, key)
|
||||
val_len = len(val)
|
||||
assert val_len == n or val_len == n + 1
|
||||
data[idx[:val_len]] = val
|
||||
|
||||
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
|
||||
self.buffers[self.PATH_START_KEY][idx] = idx[0]
|
||||
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
|
||||
return
|
||||
|
||||
class SampleBuffer(object):
|
||||
def __init__(self, size):
|
||||
self.idx_to_slot = np.empty(shape=[size], dtype=int)
|
||||
self.slot_to_idx = np.empty(shape=[size], dtype=int)
|
||||
self.count = 0
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def clear(self):
|
||||
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
|
||||
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
|
||||
self.count = 0
|
||||
return
|
||||
|
||||
def is_valid(self, idx):
|
||||
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
|
||||
def __init__(self, size):
|
||||
self.idx_to_slot = np.empty(shape=[size], dtype=int)
|
||||
self.slot_to_idx = np.empty(shape=[size], dtype=int)
|
||||
self.count = 0
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def get_size(self):
|
||||
return self.idx_to_slot.shape[0]
|
||||
def clear(self):
|
||||
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
|
||||
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
|
||||
self.count = 0
|
||||
return
|
||||
|
||||
def add(self, idx):
|
||||
for i in idx:
|
||||
if not self.is_valid(i):
|
||||
new_slot = self.count
|
||||
assert new_slot >= 0
|
||||
def is_valid(self, idx):
|
||||
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
|
||||
|
||||
self.idx_to_slot[i] = new_slot
|
||||
self.slot_to_idx[new_slot] = i
|
||||
self.count += 1
|
||||
return
|
||||
def get_size(self):
|
||||
return self.idx_to_slot.shape[0]
|
||||
|
||||
def free(self, idx):
|
||||
for i in idx:
|
||||
if self.is_valid(i):
|
||||
slot = self.idx_to_slot[i]
|
||||
last_slot = self.count - 1
|
||||
last_idx = self.slot_to_idx[last_slot]
|
||||
def add(self, idx):
|
||||
for i in idx:
|
||||
if not self.is_valid(i):
|
||||
new_slot = self.count
|
||||
assert new_slot >= 0
|
||||
|
||||
self.idx_to_slot[last_idx] = slot
|
||||
self.slot_to_idx[slot] = last_idx
|
||||
self.idx_to_slot[i] = MathUtil.INVALID_IDX
|
||||
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
|
||||
self.count -= 1
|
||||
return
|
||||
self.idx_to_slot[i] = new_slot
|
||||
self.slot_to_idx[new_slot] = i
|
||||
self.count += 1
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
if self.count > 0:
|
||||
slots = np.random.randint(0, self.count, size=n)
|
||||
idx = self.slot_to_idx[slots]
|
||||
else:
|
||||
idx = np.empty(shape=[0], dtype=int)
|
||||
return idx
|
||||
def free(self, idx):
|
||||
for i in idx:
|
||||
if self.is_valid(i):
|
||||
slot = self.idx_to_slot[i]
|
||||
last_slot = self.count - 1
|
||||
last_idx = self.slot_to_idx[last_slot]
|
||||
|
||||
def check_consistency(self):
|
||||
valid = True
|
||||
if self.count < 0:
|
||||
self.idx_to_slot[last_idx] = slot
|
||||
self.slot_to_idx[slot] = last_idx
|
||||
self.idx_to_slot[i] = MathUtil.INVALID_IDX
|
||||
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
|
||||
self.count -= 1
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
if self.count > 0:
|
||||
slots = np.random.randint(0, self.count, size=n)
|
||||
idx = self.slot_to_idx[slots]
|
||||
else:
|
||||
idx = np.empty(shape=[0], dtype=int)
|
||||
return idx
|
||||
|
||||
def check_consistency(self):
|
||||
valid = True
|
||||
if self.count < 0:
|
||||
valid = False
|
||||
|
||||
if valid:
|
||||
for i in range(self.get_size()):
|
||||
if self.is_valid(i):
|
||||
s = self.idx_to_slot[i]
|
||||
if self.slot_to_idx[s] != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
if valid:
|
||||
for i in range(self.get_size()):
|
||||
if self.is_valid(i):
|
||||
s = self.idx_to_slot[i]
|
||||
if self.slot_to_idx[s] != i:
|
||||
valid = False
|
||||
break
|
||||
s2i = self.slot_to_idx[i]
|
||||
if s2i != MathUtil.INVALID_IDX:
|
||||
i2s = self.idx_to_slot[s2i]
|
||||
if i2s != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
s2i = self.slot_to_idx[i]
|
||||
if s2i != MathUtil.INVALID_IDX:
|
||||
i2s = self.idx_to_slot[s2i]
|
||||
if i2s != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
|
||||
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
|
||||
valid &= count0 == count1
|
||||
return valid
|
||||
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
|
||||
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
|
||||
valid &= count0 == count1
|
||||
return valid
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,18 +1,19 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compute_return(rewards, gamma, td_lambda, val_t):
|
||||
# computes td-lambda return of path
|
||||
path_len = len(rewards)
|
||||
assert len(val_t) == path_len + 1
|
||||
# computes td-lambda return of path
|
||||
path_len = len(rewards)
|
||||
assert len(val_t) == path_len + 1
|
||||
|
||||
return_t = np.zeros(path_len)
|
||||
last_val = rewards[-1] + gamma * val_t[-1]
|
||||
return_t[-1] = last_val
|
||||
return_t = np.zeros(path_len)
|
||||
last_val = rewards[-1] + gamma * val_t[-1]
|
||||
return_t[-1] = last_val
|
||||
|
||||
for i in reversed(range(0, path_len - 1)):
|
||||
curr_r = rewards[i]
|
||||
next_ret = return_t[i + 1]
|
||||
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
|
||||
return_t[i] = curr_val
|
||||
|
||||
return return_t
|
||||
for i in reversed(range(0, path_len - 1)):
|
||||
curr_r = rewards[i]
|
||||
next_ret = return_t[i + 1]
|
||||
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
|
||||
return_t[i] = curr_val
|
||||
|
||||
return return_t
|
||||
|
||||
@@ -5,139 +5,140 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
|
||||
from pybullet_utils.logger import Logger
|
||||
import pybullet_data
|
||||
|
||||
|
||||
class RLWorld(object):
|
||||
def __init__(self, env, arg_parser):
|
||||
TFUtil.disable_gpu()
|
||||
|
||||
self.env = env
|
||||
self.arg_parser = arg_parser
|
||||
self._enable_training = True
|
||||
self.train_agents = []
|
||||
self.parse_args(arg_parser)
|
||||
def __init__(self, env, arg_parser):
|
||||
TFUtil.disable_gpu()
|
||||
|
||||
self.build_agents()
|
||||
|
||||
return
|
||||
self.env = env
|
||||
self.arg_parser = arg_parser
|
||||
self._enable_training = True
|
||||
self.train_agents = []
|
||||
self.parse_args(arg_parser)
|
||||
|
||||
def get_enable_training(self):
|
||||
return self._enable_training
|
||||
|
||||
def set_enable_training(self, enable):
|
||||
self._enable_training = enable
|
||||
for i in range(len(self.agents)):
|
||||
curr_agent = self.agents[i]
|
||||
if curr_agent is not None:
|
||||
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
|
||||
curr_agent.enable_training = self.enable_training and enable_curr_train
|
||||
self.build_agents()
|
||||
|
||||
if (self._enable_training):
|
||||
self.env.set_mode(RLAgent.Mode.TRAIN)
|
||||
else:
|
||||
self.env.set_mode(RLAgent.Mode.TEST)
|
||||
return
|
||||
|
||||
return
|
||||
def get_enable_training(self):
|
||||
return self._enable_training
|
||||
|
||||
enable_training = property(get_enable_training, set_enable_training)
|
||||
|
||||
def parse_args(self, arg_parser):
|
||||
self.train_agents = self.arg_parser.parse_bools('train_agents')
|
||||
num_agents = self.env.get_num_agents()
|
||||
assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
|
||||
def set_enable_training(self, enable):
|
||||
self._enable_training = enable
|
||||
for i in range(len(self.agents)):
|
||||
curr_agent = self.agents[i]
|
||||
if curr_agent is not None:
|
||||
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
|
||||
curr_agent.enable_training = self.enable_training and enable_curr_train
|
||||
|
||||
return
|
||||
if (self._enable_training):
|
||||
self.env.set_mode(RLAgent.Mode.TRAIN)
|
||||
else:
|
||||
self.env.set_mode(RLAgent.Mode.TEST)
|
||||
|
||||
def shutdown(self):
|
||||
self.env.shutdown()
|
||||
return
|
||||
return
|
||||
|
||||
def build_agents(self):
|
||||
num_agents = self.env.get_num_agents()
|
||||
print("num_agents=",num_agents)
|
||||
self.agents = []
|
||||
enable_training = property(get_enable_training, set_enable_training)
|
||||
|
||||
Logger.print2('')
|
||||
Logger.print2('Num Agents: {:d}'.format(num_agents))
|
||||
def parse_args(self, arg_parser):
|
||||
self.train_agents = self.arg_parser.parse_bools('train_agents')
|
||||
num_agents = self.env.get_num_agents()
|
||||
assert (len(self.train_agents) == num_agents or len(self.train_agents) == 0)
|
||||
|
||||
agent_files = self.arg_parser.parse_strings('agent_files')
|
||||
print("len(agent_files)=",len(agent_files))
|
||||
assert(len(agent_files) == num_agents or len(agent_files) == 0)
|
||||
return
|
||||
|
||||
model_files = self.arg_parser.parse_strings('model_files')
|
||||
assert(len(model_files) == num_agents or len(model_files) == 0)
|
||||
def shutdown(self):
|
||||
self.env.shutdown()
|
||||
return
|
||||
|
||||
output_path = self.arg_parser.parse_string('output_path')
|
||||
int_output_path = self.arg_parser.parse_string('int_output_path')
|
||||
def build_agents(self):
|
||||
num_agents = self.env.get_num_agents()
|
||||
print("num_agents=", num_agents)
|
||||
self.agents = []
|
||||
|
||||
for i in range(num_agents):
|
||||
curr_file = agent_files[i]
|
||||
curr_agent = self._build_agent(i, curr_file)
|
||||
Logger.print2('')
|
||||
Logger.print2('Num Agents: {:d}'.format(num_agents))
|
||||
|
||||
if curr_agent is not None:
|
||||
curr_agent.output_dir = output_path
|
||||
curr_agent.int_output_dir = int_output_path
|
||||
Logger.print2(str(curr_agent))
|
||||
agent_files = self.arg_parser.parse_strings('agent_files')
|
||||
print("len(agent_files)=", len(agent_files))
|
||||
assert (len(agent_files) == num_agents or len(agent_files) == 0)
|
||||
|
||||
if (len(model_files) > 0):
|
||||
curr_model_file = model_files[i]
|
||||
if curr_model_file != 'none':
|
||||
curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
|
||||
model_files = self.arg_parser.parse_strings('model_files')
|
||||
assert (len(model_files) == num_agents or len(model_files) == 0)
|
||||
|
||||
self.agents.append(curr_agent)
|
||||
Logger.print2('')
|
||||
output_path = self.arg_parser.parse_string('output_path')
|
||||
int_output_path = self.arg_parser.parse_string('int_output_path')
|
||||
|
||||
self.set_enable_training(self.enable_training)
|
||||
for i in range(num_agents):
|
||||
curr_file = agent_files[i]
|
||||
curr_agent = self._build_agent(i, curr_file)
|
||||
|
||||
return
|
||||
if curr_agent is not None:
|
||||
curr_agent.output_dir = output_path
|
||||
curr_agent.int_output_dir = int_output_path
|
||||
Logger.print2(str(curr_agent))
|
||||
|
||||
def update(self, timestep):
|
||||
#print("world update!\n")
|
||||
self._update_agents(timestep)
|
||||
self._update_env(timestep)
|
||||
return
|
||||
if (len(model_files) > 0):
|
||||
curr_model_file = model_files[i]
|
||||
if curr_model_file != 'none':
|
||||
curr_agent.load_model(pybullet_data.getDataPath() + "/" + curr_model_file)
|
||||
|
||||
def reset(self):
|
||||
self._reset_agents()
|
||||
self._reset_env()
|
||||
return
|
||||
self.agents.append(curr_agent)
|
||||
Logger.print2('')
|
||||
|
||||
def end_episode(self):
|
||||
self._end_episode_agents();
|
||||
return
|
||||
self.set_enable_training(self.enable_training)
|
||||
|
||||
def _update_env(self, timestep):
|
||||
self.env.update(timestep)
|
||||
return
|
||||
return
|
||||
|
||||
def _update_agents(self, timestep):
|
||||
#print("len(agents)=",len(self.agents))
|
||||
for agent in self.agents:
|
||||
if (agent is not None):
|
||||
agent.update(timestep)
|
||||
return
|
||||
def update(self, timestep):
|
||||
#print("world update!\n")
|
||||
self._update_agents(timestep)
|
||||
self._update_env(timestep)
|
||||
return
|
||||
|
||||
def _reset_env(self):
|
||||
self.env.reset()
|
||||
return
|
||||
def reset(self):
|
||||
self._reset_agents()
|
||||
self._reset_env()
|
||||
return
|
||||
|
||||
def _reset_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.reset()
|
||||
return
|
||||
def end_episode(self):
|
||||
self._end_episode_agents()
|
||||
return
|
||||
|
||||
def _end_episode_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.end_episode()
|
||||
return
|
||||
def _update_env(self, timestep):
|
||||
self.env.update(timestep)
|
||||
return
|
||||
|
||||
def _build_agent(self, id, agent_file):
|
||||
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
|
||||
if (agent_file == 'none'):
|
||||
agent = None
|
||||
else:
|
||||
agent = AgentBuilder.build_agent(self, id, agent_file)
|
||||
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||
|
||||
return agent
|
||||
|
||||
def _update_agents(self, timestep):
|
||||
#print("len(agents)=",len(self.agents))
|
||||
for agent in self.agents:
|
||||
if (agent is not None):
|
||||
agent.update(timestep)
|
||||
return
|
||||
|
||||
def _reset_env(self):
|
||||
self.env.reset()
|
||||
return
|
||||
|
||||
def _reset_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.reset()
|
||||
return
|
||||
|
||||
def _end_episode_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.end_episode()
|
||||
return
|
||||
|
||||
def _build_agent(self, id, agent_file):
|
||||
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
|
||||
if (agent_file == 'none'):
|
||||
agent = None
|
||||
else:
|
||||
agent = AgentBuilder.build_agent(self, id, agent_file)
|
||||
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||
|
||||
return agent
|
||||
|
||||
@@ -8,96 +8,97 @@ from pybullet_utils.logger import Logger
|
||||
|
||||
from pybullet_envs.deep_mimic.learning.solvers.solver import Solver
|
||||
|
||||
|
||||
class MPISolver(Solver):
|
||||
CHECK_SYNC_ITERS = 1000
|
||||
CHECK_SYNC_ITERS = 1000
|
||||
|
||||
def __init__(self, sess, optimizer, vars):
|
||||
super().__init__(vars)
|
||||
self.sess = sess
|
||||
self.optimizer = optimizer
|
||||
self._build_grad_feed(vars)
|
||||
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
|
||||
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
|
||||
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
|
||||
def __init__(self, sess, optimizer, vars):
|
||||
super().__init__(vars)
|
||||
self.sess = sess
|
||||
self.optimizer = optimizer
|
||||
self._build_grad_feed(vars)
|
||||
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
|
||||
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
|
||||
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
|
||||
|
||||
self.iter = 0
|
||||
grad_dim = self._calc_grad_dim()
|
||||
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
|
||||
return
|
||||
self.iter = 0
|
||||
grad_dim = self._calc_grad_dim()
|
||||
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
|
||||
def get_stepsize(self):
|
||||
return self.optimizer._learning_rate_tensor.eval()
|
||||
return
|
||||
|
||||
def update(self, grads=None, grad_scale=1.0):
|
||||
if grads is not None:
|
||||
self._flat_grad = MathUtil.flatten(grads)
|
||||
else:
|
||||
self._flat_grad.fill(0)
|
||||
return self.update_flatgrad(self._flat_grad, grad_scale)
|
||||
def get_stepsize(self):
|
||||
return self.optimizer._learning_rate_tensor.eval()
|
||||
|
||||
def update_flatgrad(self, flat_grad, grad_scale=1.0):
|
||||
if self.iter % self.CHECK_SYNC_ITERS == 0:
|
||||
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
|
||||
|
||||
if grad_scale != 1.0:
|
||||
flat_grad *= grad_scale
|
||||
def update(self, grads=None, grad_scale=1.0):
|
||||
if grads is not None:
|
||||
self._flat_grad = MathUtil.flatten(grads)
|
||||
else:
|
||||
self._flat_grad.fill(0)
|
||||
return self.update_flatgrad(self._flat_grad, grad_scale)
|
||||
|
||||
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
|
||||
self._global_flat_grad /= MPIUtil.get_num_procs()
|
||||
def update_flatgrad(self, flat_grad, grad_scale=1.0):
|
||||
if self.iter % self.CHECK_SYNC_ITERS == 0:
|
||||
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
|
||||
|
||||
self._load_flat_grad(self._global_flat_grad)
|
||||
self.sess.run([self._update], self._grad_feed)
|
||||
self.iter += 1
|
||||
if grad_scale != 1.0:
|
||||
flat_grad *= grad_scale
|
||||
|
||||
return
|
||||
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
|
||||
self._global_flat_grad /= MPIUtil.get_num_procs()
|
||||
|
||||
def sync(self):
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
self._set_flat_vars(vars)
|
||||
return
|
||||
self._load_flat_grad(self._global_flat_grad)
|
||||
self.sess.run([self._update], self._grad_feed)
|
||||
self.iter += 1
|
||||
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if self._is_root():
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = self._get_flat_vars()
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
return
|
||||
|
||||
def _is_root(self):
|
||||
return MPIUtil.is_root_proc()
|
||||
|
||||
def _build_grad_feed(self, vars):
|
||||
self._grad_tf_list = []
|
||||
self._grad_buffers = []
|
||||
for v in self.vars:
|
||||
shape = v.get_shape()
|
||||
grad = np.zeros(shape)
|
||||
grad_tf = tf.placeholder(tf.float32, shape=shape)
|
||||
self._grad_buffers.append(grad)
|
||||
self._grad_tf_list.append(grad_tf)
|
||||
def sync(self):
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
self._set_flat_vars(vars)
|
||||
return
|
||||
|
||||
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
|
||||
|
||||
return
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if self._is_root():
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = self._get_flat_vars()
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
|
||||
def _calc_grad_dim(self):
|
||||
grad_dim = 0
|
||||
for grad in self._grad_buffers:
|
||||
grad_dim += grad.size
|
||||
return grad_dim
|
||||
def _is_root(self):
|
||||
return MPIUtil.is_root_proc()
|
||||
|
||||
def _load_flat_grad(self, flat_grad):
|
||||
start = 0
|
||||
for g in self._grad_buffers:
|
||||
size = g.size
|
||||
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
|
||||
start += size
|
||||
return
|
||||
def _build_grad_feed(self, vars):
|
||||
self._grad_tf_list = []
|
||||
self._grad_buffers = []
|
||||
for v in self.vars:
|
||||
shape = v.get_shape()
|
||||
grad = np.zeros(shape)
|
||||
grad_tf = tf.placeholder(tf.float32, shape=shape)
|
||||
self._grad_buffers.append(grad)
|
||||
self._grad_tf_list.append(grad_tf)
|
||||
|
||||
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
|
||||
|
||||
return
|
||||
|
||||
def _calc_grad_dim(self):
|
||||
grad_dim = 0
|
||||
for grad in self._grad_buffers:
|
||||
grad_dim += grad.size
|
||||
return grad_dim
|
||||
|
||||
def _load_flat_grad(self, flat_grad):
|
||||
start = 0
|
||||
for g in self._grad_buffers:
|
||||
size = g.size
|
||||
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
|
||||
start += size
|
||||
return
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
from abc import abstractmethod
|
||||
import sys, abc
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC
|
||||
ABC = abc.ABC
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
|
||||
class Solver(ABC):
|
||||
def __init__(self, vars):
|
||||
self.vars = vars
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def update(self, grads):
|
||||
pass
|
||||
def __init__(self, vars):
|
||||
self.vars = vars
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def update(self, grads):
|
||||
pass
|
||||
|
||||
@@ -6,144 +6,148 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
|
||||
from pybullet_utils.logger import Logger
|
||||
from pybullet_envs.deep_mimic.learning.tf_normalizer import TFNormalizer
|
||||
|
||||
|
||||
class TFAgent(RLAgent):
|
||||
RESOURCE_SCOPE = 'resource'
|
||||
SOLVER_SCOPE = 'solvers'
|
||||
RESOURCE_SCOPE = 'resource'
|
||||
SOLVER_SCOPE = 'solvers'
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
self.tf_scope = 'agent'
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
def __init__(self, world, id, json_data):
|
||||
self.tf_scope = 'agent'
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
|
||||
super().__init__(world, id, json_data)
|
||||
self._build_graph(json_data)
|
||||
self._init_normalizers()
|
||||
return
|
||||
super().__init__(world, id, json_data)
|
||||
self._build_graph(json_data)
|
||||
self._init_normalizers()
|
||||
return
|
||||
|
||||
def __del__(self):
|
||||
self.sess.close()
|
||||
return
|
||||
def __del__(self):
|
||||
self.sess.close()
|
||||
return
|
||||
|
||||
def save_model(self, out_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
try:
|
||||
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
|
||||
Logger.print2('Model saved to: ' + save_path)
|
||||
except:
|
||||
Logger.print2("Failed to save model to: " + save_path)
|
||||
return
|
||||
def save_model(self, out_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
try:
|
||||
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
|
||||
Logger.print2('Model saved to: ' + save_path)
|
||||
except:
|
||||
Logger.print2("Failed to save model to: " + save_path)
|
||||
return
|
||||
|
||||
def load_model(self, in_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.saver.restore(self.sess, in_path)
|
||||
self._load_normalizers()
|
||||
Logger.print2('Model loaded from: ' + in_path)
|
||||
return
|
||||
def load_model(self, in_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.saver.restore(self.sess, in_path)
|
||||
self._load_normalizers()
|
||||
Logger.print2('Model loaded from: ' + in_path)
|
||||
return
|
||||
|
||||
def _get_output_path(self):
|
||||
assert(self.output_dir != '')
|
||||
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
|
||||
return file_path
|
||||
def _get_output_path(self):
|
||||
assert (self.output_dir != '')
|
||||
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
|
||||
return file_path
|
||||
|
||||
def _get_int_output_path(self):
|
||||
assert(self.int_output_dir != '')
|
||||
file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
|
||||
return file_path
|
||||
def _get_int_output_path(self):
|
||||
assert (self.int_output_dir != '')
|
||||
file_path = self.int_output_dir + (
|
||||
'/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
|
||||
return file_path
|
||||
|
||||
def _build_graph(self, json_data):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
with tf.variable_scope(self.tf_scope):
|
||||
self._build_nets(json_data)
|
||||
|
||||
with tf.variable_scope(self.SOLVER_SCOPE):
|
||||
self._build_losses(json_data)
|
||||
self._build_solvers(json_data)
|
||||
def _build_graph(self, json_data):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
with tf.variable_scope(self.tf_scope):
|
||||
self._build_nets(json_data)
|
||||
|
||||
self._initialize_vars()
|
||||
self._build_saver()
|
||||
return
|
||||
with tf.variable_scope(self.SOLVER_SCOPE):
|
||||
self._build_losses(json_data)
|
||||
self._build_solvers(json_data)
|
||||
|
||||
def _init_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
# update normalizers to sync the tensorflow tensors
|
||||
self.s_norm.update()
|
||||
self.g_norm.update()
|
||||
self.a_norm.update()
|
||||
return
|
||||
self._initialize_vars()
|
||||
self._build_saver()
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def _build_nets(self, json_data):
|
||||
pass
|
||||
def _init_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
# update normalizers to sync the tensorflow tensors
|
||||
self.s_norm.update()
|
||||
self.g_norm.update()
|
||||
self.a_norm.update()
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def _build_losses(self, json_data):
|
||||
pass
|
||||
@abstractmethod
|
||||
def _build_nets(self, json_data):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _build_solvers(self, json_data):
|
||||
pass
|
||||
@abstractmethod
|
||||
def _build_losses(self, json_data):
|
||||
pass
|
||||
|
||||
def _tf_vars(self, scope=''):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
|
||||
assert len(res) > 0
|
||||
return res
|
||||
@abstractmethod
|
||||
def _build_solvers(self, json_data):
|
||||
pass
|
||||
|
||||
def _build_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
|
||||
state_offset = -self.world.env.build_state_offset(self.id)
|
||||
print("state_offset=",state_offset)
|
||||
state_scale = 1 / self.world.env.build_state_scale(self.id)
|
||||
print("state_scale=",state_scale)
|
||||
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||
1 / self.world.env.build_state_scale(self.id))
|
||||
|
||||
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
|
||||
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||
1 / self.world.env.build_goal_scale(self.id))
|
||||
def _tf_vars(self, scope=''):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
|
||||
assert len(res) > 0
|
||||
return res
|
||||
|
||||
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
|
||||
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||
1 / self.world.env.build_action_scale(self.id))
|
||||
return
|
||||
def _build_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(),
|
||||
self.world.env.build_state_norm_groups(self.id))
|
||||
state_offset = -self.world.env.build_state_offset(self.id)
|
||||
print("state_offset=", state_offset)
|
||||
state_scale = 1 / self.world.env.build_state_scale(self.id)
|
||||
print("state_scale=", state_scale)
|
||||
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||
1 / self.world.env.build_state_scale(self.id))
|
||||
|
||||
def _load_normalizers(self):
|
||||
self.s_norm.load()
|
||||
self.g_norm.load()
|
||||
self.a_norm.load()
|
||||
return
|
||||
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(),
|
||||
self.world.env.build_goal_norm_groups(self.id))
|
||||
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||
1 / self.world.env.build_goal_scale(self.id))
|
||||
|
||||
def _update_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._update_normalizers()
|
||||
return
|
||||
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
|
||||
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||
1 / self.world.env.build_action_scale(self.id))
|
||||
return
|
||||
|
||||
def _initialize_vars(self):
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
return
|
||||
def _load_normalizers(self):
|
||||
self.s_norm.load()
|
||||
self.g_norm.load()
|
||||
self.a_norm.load()
|
||||
return
|
||||
|
||||
def _build_saver(self):
|
||||
vars = self._get_saver_vars()
|
||||
self.saver = tf.train.Saver(vars, max_to_keep=0)
|
||||
return
|
||||
def _update_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._update_normalizers()
|
||||
return
|
||||
|
||||
def _get_saver_vars(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
|
||||
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
|
||||
#vars = [v for v in vars if '/target/' not in v.name]
|
||||
assert len(vars) > 0
|
||||
return vars
|
||||
|
||||
def _weight_decay_loss(self, scope):
|
||||
vars = self._tf_vars(scope)
|
||||
vars_no_bias = [v for v in vars if 'bias' not in v.name]
|
||||
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
|
||||
return loss
|
||||
def _initialize_vars(self):
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
return
|
||||
|
||||
def _train(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._train()
|
||||
return
|
||||
def _build_saver(self):
|
||||
vars = self._get_saver_vars()
|
||||
self.saver = tf.train.Saver(vars, max_to_keep=0)
|
||||
return
|
||||
|
||||
def _get_saver_vars(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
|
||||
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
|
||||
#vars = [v for v in vars if '/target/' not in v.name]
|
||||
assert len(vars) > 0
|
||||
return vars
|
||||
|
||||
def _weight_decay_loss(self, scope):
|
||||
vars = self._tf_vars(scope)
|
||||
vars_no_bias = [v for v in vars if 'bias' not in v.name]
|
||||
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
|
||||
return loss
|
||||
|
||||
def _train(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._train()
|
||||
return
|
||||
|
||||
@@ -3,65 +3,72 @@ import copy
|
||||
import tensorflow as tf
|
||||
from pybullet_envs.deep_mimic.learning.normalizer import Normalizer
|
||||
|
||||
|
||||
class TFNormalizer(Normalizer):
|
||||
|
||||
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.sess = sess
|
||||
self.scope = scope
|
||||
super().__init__(size, groups_ids, eps, clip)
|
||||
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.sess = sess
|
||||
self.scope = scope
|
||||
super().__init__(size, groups_ids, eps, clip)
|
||||
|
||||
with tf.variable_scope(self.scope):
|
||||
self._build_resource_tf()
|
||||
return
|
||||
with tf.variable_scope(self.scope):
|
||||
self._build_resource_tf()
|
||||
return
|
||||
|
||||
# initialze count when loading saved values so that things don't change to quickly during updates
|
||||
def load(self):
|
||||
self.count = self.count_tf.eval()[0]
|
||||
self.mean = self.mean_tf.eval()
|
||||
self.std = self.std_tf.eval()
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
# initialze count when loading saved values so that things don't change to quickly during updates
|
||||
def load(self):
|
||||
self.count = self.count_tf.eval()[0]
|
||||
self.mean = self.mean_tf.eval()
|
||||
self.std = self.std_tf.eval()
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
|
||||
def update(self):
|
||||
super().update()
|
||||
self._update_resource_tf()
|
||||
return
|
||||
def update(self):
|
||||
super().update()
|
||||
self._update_resource_tf()
|
||||
return
|
||||
|
||||
def set_mean_std(self, mean, std):
|
||||
super().set_mean_std(mean, std)
|
||||
self._update_resource_tf()
|
||||
return
|
||||
def set_mean_std(self, mean, std):
|
||||
super().set_mean_std(mean, std)
|
||||
self._update_resource_tf()
|
||||
return
|
||||
|
||||
def normalize_tf(self, x):
|
||||
norm_x = (x - self.mean_tf) / self.std_tf
|
||||
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
def normalize_tf(self, x):
|
||||
norm_x = (x - self.mean_tf) / self.std_tf
|
||||
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
|
||||
def unnormalize_tf(self, norm_x):
|
||||
x = norm_x * self.std_tf + self.mean_tf
|
||||
return x
|
||||
|
||||
def _build_resource_tf(self):
|
||||
self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
|
||||
self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
|
||||
self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
|
||||
|
||||
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
|
||||
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
|
||||
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
|
||||
|
||||
self._update_op = tf.group(
|
||||
self.count_tf.assign(self.count_ph),
|
||||
self.mean_tf.assign(self.mean_ph),
|
||||
self.std_tf.assign(self.std_ph)
|
||||
)
|
||||
return
|
||||
def unnormalize_tf(self, norm_x):
|
||||
x = norm_x * self.std_tf + self.mean_tf
|
||||
return x
|
||||
|
||||
def _update_resource_tf(self):
|
||||
feed = {
|
||||
self.count_ph: np.array([self.count], dtype=np.int32),
|
||||
self.mean_ph: self.mean,
|
||||
self.std_ph: self.std
|
||||
}
|
||||
self.sess.run(self._update_op, feed_dict=feed)
|
||||
return
|
||||
def _build_resource_tf(self):
|
||||
self.count_tf = tf.get_variable(dtype=tf.int32,
|
||||
name='count',
|
||||
initializer=np.array([self.count], dtype=np.int32),
|
||||
trainable=False)
|
||||
self.mean_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='mean',
|
||||
initializer=self.mean.astype(np.float32),
|
||||
trainable=False)
|
||||
self.std_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='std',
|
||||
initializer=self.std.astype(np.float32),
|
||||
trainable=False)
|
||||
|
||||
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
|
||||
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
|
||||
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
|
||||
|
||||
self._update_op = tf.group(self.count_tf.assign(self.count_ph),
|
||||
self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
|
||||
return
|
||||
|
||||
def _update_resource_tf(self):
|
||||
feed = {
|
||||
self.count_ph: np.array([self.count], dtype=np.int32),
|
||||
self.mean_ph: self.mean,
|
||||
self.std_ph: self.std
|
||||
}
|
||||
self.sess.run(self._update_op, feed_dict=feed)
|
||||
return
|
||||
|
||||
@@ -4,101 +4,116 @@ import os
|
||||
|
||||
xavier_initializer = tf.contrib.layers.xavier_initializer()
|
||||
|
||||
|
||||
def disable_gpu():
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
|
||||
return
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
|
||||
return
|
||||
|
||||
|
||||
def var_shape(x):
|
||||
out = [k.value for k in x.get_shape()]
|
||||
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
|
||||
return out
|
||||
out = [k.value for k in x.get_shape()]
|
||||
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
|
||||
return out
|
||||
|
||||
|
||||
def intprod(x):
|
||||
return int(np.prod(x))
|
||||
return int(np.prod(x))
|
||||
|
||||
|
||||
def numel(x):
|
||||
n = intprod(var_shape(x))
|
||||
return n
|
||||
n = intprod(var_shape(x))
|
||||
return n
|
||||
|
||||
|
||||
def flat_grad(loss, var_list, grad_ys=None):
|
||||
grads = tf.gradients(loss, var_list, grad_ys)
|
||||
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
|
||||
grads = tf.gradients(loss, var_list, grad_ys)
|
||||
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
|
||||
|
||||
def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
|
||||
curr_tf = input
|
||||
for i, size in enumerate(layers_sizes):
|
||||
with tf.variable_scope(str(i), reuse=reuse):
|
||||
curr_tf = tf.layers.dense(inputs=curr_tf,
|
||||
units=size,
|
||||
kernel_initializer=xavier_initializer,
|
||||
activation = activation if i < len(layers_sizes)-1 else None)
|
||||
if flatten:
|
||||
assert layers_sizes[-1] == 1
|
||||
curr_tf = tf.reshape(curr_tf, [-1])
|
||||
|
||||
return curr_tf
|
||||
def fc_net(input, layers_sizes, activation, reuse=None,
|
||||
flatten=False): # build fully connected network
|
||||
curr_tf = input
|
||||
for i, size in enumerate(layers_sizes):
|
||||
with tf.variable_scope(str(i), reuse=reuse):
|
||||
curr_tf = tf.layers.dense(inputs=curr_tf,
|
||||
units=size,
|
||||
kernel_initializer=xavier_initializer,
|
||||
activation=activation if i < len(layers_sizes) - 1 else None)
|
||||
if flatten:
|
||||
assert layers_sizes[-1] == 1
|
||||
curr_tf = tf.reshape(curr_tf, [-1])
|
||||
|
||||
return curr_tf
|
||||
|
||||
|
||||
def copy(sess, src, dst):
|
||||
assert len(src) == len(dst)
|
||||
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
|
||||
return
|
||||
assert len(src) == len(dst)
|
||||
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
|
||||
return
|
||||
|
||||
|
||||
def flat_grad(loss, var_list):
|
||||
grads = tf.gradients(loss, var_list)
|
||||
return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
|
||||
for (v, grad) in zip(var_list, grads)])
|
||||
grads = tf.gradients(loss, var_list)
|
||||
return tf.concat(axis=0,
|
||||
values=[tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)])
|
||||
|
||||
|
||||
def calc_logp_gaussian(x_tf, mean_tf, std_tf):
|
||||
dim = tf.to_float(tf.shape(x_tf)[-1])
|
||||
dim = tf.to_float(tf.shape(x_tf)[-1])
|
||||
|
||||
if mean_tf is None:
|
||||
diff_tf = x_tf
|
||||
else:
|
||||
diff_tf = x_tf - mean_tf
|
||||
if mean_tf is None:
|
||||
diff_tf = x_tf
|
||||
else:
|
||||
diff_tf = x_tf - mean_tf
|
||||
|
||||
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
|
||||
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
|
||||
|
||||
return logp_tf
|
||||
|
||||
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
|
||||
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
|
||||
|
||||
return logp_tf
|
||||
|
||||
def calc_bound_loss(x_tf, bound_min, bound_max):
|
||||
# penalty for violating bounds
|
||||
violation_min = tf.minimum(x_tf - bound_min, 0)
|
||||
violation_max = tf.maximum(x_tf - bound_max, 0)
|
||||
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
|
||||
loss = 0.5 * tf.reduce_mean(violation)
|
||||
return loss
|
||||
# penalty for violating bounds
|
||||
violation_min = tf.minimum(x_tf - bound_min, 0)
|
||||
violation_max = tf.maximum(x_tf - bound_max, 0)
|
||||
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(
|
||||
tf.square(violation_max), axis=-1)
|
||||
loss = 0.5 * tf.reduce_mean(violation)
|
||||
return loss
|
||||
|
||||
|
||||
class SetFromFlat(object):
|
||||
def __init__(self, sess, var_list, dtype=tf.float32):
|
||||
assigns = []
|
||||
shapes = list(map(var_shape, var_list))
|
||||
total_size = np.sum([intprod(shape) for shape in shapes])
|
||||
|
||||
self.sess = sess
|
||||
self.theta = tf.placeholder(dtype,[total_size])
|
||||
start=0
|
||||
assigns = []
|
||||
def __init__(self, sess, var_list, dtype=tf.float32):
|
||||
assigns = []
|
||||
shapes = list(map(var_shape, var_list))
|
||||
total_size = np.sum([intprod(shape) for shape in shapes])
|
||||
|
||||
for (shape,v) in zip(shapes,var_list):
|
||||
size = intprod(shape)
|
||||
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
|
||||
start += size
|
||||
self.sess = sess
|
||||
self.theta = tf.placeholder(dtype, [total_size])
|
||||
start = 0
|
||||
assigns = []
|
||||
|
||||
self.op = tf.group(*assigns)
|
||||
for (shape, v) in zip(shapes, var_list):
|
||||
size = intprod(shape)
|
||||
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start + size], shape)))
|
||||
start += size
|
||||
|
||||
return
|
||||
self.op = tf.group(*assigns)
|
||||
|
||||
return
|
||||
|
||||
def __call__(self, theta):
|
||||
self.sess.run(self.op, feed_dict={self.theta: theta})
|
||||
return
|
||||
|
||||
def __call__(self, theta):
|
||||
self.sess.run(self.op, feed_dict={self.theta:theta})
|
||||
return
|
||||
|
||||
class GetFlat(object):
|
||||
def __init__(self, sess, var_list):
|
||||
self.sess = sess
|
||||
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
|
||||
return
|
||||
|
||||
def __call__(self):
|
||||
return self.sess.run(self.op)
|
||||
def __init__(self, sess, var_list):
|
||||
self.sess = sess
|
||||
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
|
||||
return
|
||||
|
||||
def __call__(self):
|
||||
return self.sess.run(self.op)
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
import numpy as np
|
||||
import numpy as np
|
||||
|
||||
from quaternion import qrot, qinverse
|
||||
|
||||
|
||||
def normalize_screen_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
|
||||
return X/w*2 - [1, h/w]
|
||||
def normalize_screen_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
|
||||
return X / w * 2 - [1, h / w]
|
||||
|
||||
|
||||
def image_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Reverse camera frame normalization
|
||||
return (X + [1, h/w])*w/2
|
||||
assert X.shape[-1] == 2
|
||||
# Reverse camera frame normalization
|
||||
return (X + [1, h / w]) * w / 2
|
||||
|
||||
|
||||
def world_to_camera(X, R, t):
|
||||
Rt = qinverse(R)
|
||||
Q = np.tile(Rt, (*X.shape[:-1], 1))
|
||||
V = X - t
|
||||
return qrot(Q, V)
|
||||
|
||||
def camera_to_world(X, R, t):
|
||||
Q = np.tile(R, (*X.shape[:-1], 1))
|
||||
V = X
|
||||
return qrot(Q, V) + t
|
||||
Rt = qinverse(R)
|
||||
Q = np.tile(Rt, (*X.shape[:-1], 1))
|
||||
V = X - t
|
||||
return qrot(Q, V)
|
||||
|
||||
|
||||
def camera_to_world(X, R, t):
|
||||
Q = np.tile(R, (*X.shape[:-1], 1))
|
||||
V = X
|
||||
return qrot(Q, V) + t
|
||||
|
||||
@@ -3,54 +3,63 @@ from h36m_dataset import Human36mDataset
|
||||
from camera import *
|
||||
|
||||
import numpy as np
|
||||
# In[2]:
|
||||
# In[2]:
|
||||
|
||||
joint_info = {
|
||||
'joint_name':['root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle', 'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder', 'right_elbow', 'right_wrist'],
|
||||
'father':[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
|
||||
'side':['middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle', 'middle', 'left', 'left', 'left', 'right', 'right', 'right']
|
||||
'joint_name': [
|
||||
'root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle',
|
||||
'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist',
|
||||
'right_shoulder', 'right_elbow', 'right_wrist'
|
||||
],
|
||||
'father': [0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
|
||||
'side': [
|
||||
'middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle',
|
||||
'middle', 'left', 'left', 'left', 'right', 'right', 'right'
|
||||
]
|
||||
}
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
def init_fb_h36m_dataset(dataset_path):
|
||||
dataset = Human36mDataset(dataset_path)
|
||||
print('Preparing Facebook Human3.6M Dataset...')
|
||||
for subject in dataset.subjects():
|
||||
for action in dataset[subject].keys():
|
||||
anim = dataset[subject][action]
|
||||
positions_3d = []
|
||||
for cam in anim['cameras']:
|
||||
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
|
||||
pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
|
||||
positions_3d.append(pos_3d)
|
||||
anim['positions_3d'] = positions_3d
|
||||
return dataset
|
||||
dataset = Human36mDataset(dataset_path)
|
||||
print('Preparing Facebook Human3.6M Dataset...')
|
||||
for subject in dataset.subjects():
|
||||
for action in dataset[subject].keys():
|
||||
anim = dataset[subject][action]
|
||||
positions_3d = []
|
||||
for cam in anim['cameras']:
|
||||
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
|
||||
pos_3d[:, 1:] -= pos_3d[:, :
|
||||
1] # Remove global offset, but keep trajectory in first position
|
||||
positions_3d.append(pos_3d)
|
||||
anim['positions_3d'] = positions_3d
|
||||
return dataset
|
||||
|
||||
|
||||
def pose3D_from_fb_h36m(dataset, subject, action, shift):
|
||||
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
|
||||
trajectory = pose_seq[:, :1]
|
||||
pose_seq[:, 1:] += trajectory
|
||||
# Invert camera transformation
|
||||
cam = dataset.cameras()[subject][0]
|
||||
pose_seq = camera_to_world(pose_seq,
|
||||
R=cam['orientation'],
|
||||
t=cam['translation'])
|
||||
x = pose_seq[:,:,0:1]
|
||||
y = pose_seq[:,:,1:2] * -1
|
||||
z = pose_seq[:,:,2:3]
|
||||
pose_seq = np.concatenate((x,z,y),axis=2)
|
||||
# plus shift
|
||||
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])])
|
||||
return pose_seq
|
||||
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
|
||||
trajectory = pose_seq[:, :1]
|
||||
pose_seq[:, 1:] += trajectory
|
||||
# Invert camera transformation
|
||||
cam = dataset.cameras()[subject][0]
|
||||
pose_seq = camera_to_world(pose_seq, R=cam['orientation'], t=cam['translation'])
|
||||
x = pose_seq[:, :, 0:1]
|
||||
y = pose_seq[:, :, 1:2] * -1
|
||||
z = pose_seq[:, :, 2:3]
|
||||
pose_seq = np.concatenate((x, z, y), axis=2)
|
||||
# plus shift
|
||||
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])
|
||||
])
|
||||
return pose_seq
|
||||
|
||||
|
||||
def rot_seq_to_deepmimic_json(rot_seq, loop, json_path):
|
||||
to_json = {"Loop": loop, "Frames":[]}
|
||||
rot_seq = np.around(rot_seq, decimals=6)
|
||||
to_json["Frames"] = rot_seq.tolist()
|
||||
# In[14]:
|
||||
to_file = json.dumps(to_json)
|
||||
file = open(json_path,"w")
|
||||
file.write(to_file)
|
||||
file.close()
|
||||
|
||||
to_json = {"Loop": loop, "Frames": []}
|
||||
rot_seq = np.around(rot_seq, decimals=6)
|
||||
to_json["Frames"] = rot_seq.tolist()
|
||||
# In[14]:
|
||||
to_file = json.dumps(to_json)
|
||||
file = open(json_path, "w")
|
||||
file.write(to_file)
|
||||
file.close()
|
||||
|
||||
@@ -3,11 +3,13 @@ import copy
|
||||
from skeleton import Skeleton
|
||||
from mocap_dataset import MocapDataset
|
||||
from camera import normalize_screen_coordinates, image_coordinates
|
||||
|
||||
h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
|
||||
16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
|
||||
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
|
||||
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
|
||||
|
||||
h36m_skeleton = Skeleton(parents=[
|
||||
-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12, 16, 17, 18, 19, 20, 19, 22, 12, 24,
|
||||
25, 26, 27, 28, 27, 30
|
||||
],
|
||||
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
|
||||
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
|
||||
|
||||
h36m_cameras_intrinsic_params = [
|
||||
{
|
||||
@@ -18,7 +20,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
|
||||
'res_w': 1000,
|
||||
'res_h': 1002,
|
||||
'azimuth': 70, # Only used for visualization
|
||||
'azimuth': 70, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '55011271',
|
||||
@@ -28,7 +30,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
|
||||
'res_w': 1000,
|
||||
'res_h': 1000,
|
||||
'azimuth': -70, # Only used for visualization
|
||||
'azimuth': -70, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '58860488',
|
||||
@@ -38,7 +40,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
|
||||
'res_w': 1000,
|
||||
'res_h': 1000,
|
||||
'azimuth': 110, # Only used for visualization
|
||||
'azimuth': 110, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '60457274',
|
||||
@@ -48,26 +50,34 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
|
||||
'res_w': 1000,
|
||||
'res_h': 1002,
|
||||
'azimuth': -110, # Only used for visualization
|
||||
'azimuth': -110, # Only used for visualization
|
||||
},
|
||||
]
|
||||
|
||||
h36m_cameras_extrinsic_params = {
|
||||
'S1': [
|
||||
{
|
||||
'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
|
||||
'orientation': [
|
||||
0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088
|
||||
],
|
||||
'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
|
||||
'orientation': [
|
||||
0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205
|
||||
],
|
||||
'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
|
||||
'orientation': [
|
||||
0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696
|
||||
],
|
||||
'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
|
||||
'orientation': [
|
||||
0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435
|
||||
],
|
||||
'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
|
||||
},
|
||||
],
|
||||
@@ -91,158 +101,206 @@ h36m_cameras_extrinsic_params = {
|
||||
],
|
||||
'S5': [
|
||||
{
|
||||
'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
|
||||
'orientation': [
|
||||
0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332
|
||||
],
|
||||
'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
|
||||
'orientation': [
|
||||
0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915
|
||||
],
|
||||
'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
|
||||
'orientation': [
|
||||
0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576
|
||||
],
|
||||
'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
|
||||
'orientation': [
|
||||
0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092
|
||||
],
|
||||
'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
|
||||
},
|
||||
],
|
||||
'S6': [
|
||||
{
|
||||
'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
|
||||
'orientation': [
|
||||
0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938
|
||||
],
|
||||
'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
|
||||
'orientation': [
|
||||
0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428
|
||||
],
|
||||
'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
|
||||
'orientation': [
|
||||
0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334
|
||||
],
|
||||
'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
|
||||
'orientation': [
|
||||
0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802
|
||||
],
|
||||
'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
|
||||
},
|
||||
],
|
||||
'S7': [
|
||||
{
|
||||
'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
|
||||
'orientation': [
|
||||
0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778
|
||||
],
|
||||
'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
|
||||
'orientation': [
|
||||
0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508
|
||||
],
|
||||
'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
|
||||
'orientation': [
|
||||
0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278
|
||||
],
|
||||
'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
|
||||
'orientation': [
|
||||
0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523
|
||||
],
|
||||
'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
|
||||
},
|
||||
],
|
||||
'S8': [
|
||||
{
|
||||
'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
|
||||
'orientation': [
|
||||
0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773
|
||||
],
|
||||
'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
|
||||
'orientation': [
|
||||
0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615
|
||||
],
|
||||
'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
|
||||
'orientation': [
|
||||
0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755
|
||||
],
|
||||
'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
|
||||
'orientation': [
|
||||
0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708
|
||||
],
|
||||
'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
|
||||
},
|
||||
],
|
||||
'S9': [
|
||||
{
|
||||
'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
|
||||
'orientation': [
|
||||
0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243
|
||||
],
|
||||
'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
|
||||
'orientation': [
|
||||
0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801
|
||||
],
|
||||
'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
|
||||
'orientation': [
|
||||
0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915
|
||||
],
|
||||
'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
|
||||
'orientation': [
|
||||
0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822
|
||||
],
|
||||
'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
|
||||
},
|
||||
],
|
||||
'S11': [
|
||||
{
|
||||
'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
|
||||
'orientation': [
|
||||
0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467
|
||||
],
|
||||
'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
|
||||
'orientation': [
|
||||
0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085
|
||||
],
|
||||
'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
|
||||
'orientation': [
|
||||
0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407
|
||||
],
|
||||
'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
|
||||
'orientation': [
|
||||
0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809
|
||||
],
|
||||
'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class Human36mDataset(MocapDataset):
|
||||
def __init__(self, path, remove_static_joints=True):
|
||||
super().__init__(fps=50, skeleton=h36m_skeleton)
|
||||
|
||||
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
|
||||
for cameras in self._cameras.values():
|
||||
for i, cam in enumerate(cameras):
|
||||
cam.update(h36m_cameras_intrinsic_params[i])
|
||||
for k, v in cam.items():
|
||||
if k not in ['id', 'res_w', 'res_h']:
|
||||
cam[k] = np.array(v, dtype='float32')
|
||||
|
||||
# Normalize camera frame
|
||||
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype('float32')
|
||||
cam['focal_length'] = cam['focal_length']/cam['res_w']*2
|
||||
if 'translation' in cam:
|
||||
cam['translation'] = cam['translation']/1000 # mm to meters
|
||||
|
||||
# Add intrinsic parameters vector
|
||||
cam['intrinsic'] = np.concatenate((cam['focal_length'],
|
||||
cam['center'],
|
||||
cam['radial_distortion'],
|
||||
cam['tangential_distortion']))
|
||||
|
||||
# Load serialized dataset
|
||||
data = np.load(path)['positions_3d'].item()
|
||||
|
||||
self._data = {}
|
||||
for subject, actions in data.items():
|
||||
self._data[subject] = {}
|
||||
for action_name, positions in actions.items():
|
||||
self._data[subject][action_name] = {
|
||||
'positions': positions,
|
||||
'cameras': self._cameras[subject],
|
||||
}
|
||||
|
||||
if remove_static_joints:
|
||||
# Bring the skeleton to 17 joints instead of the original 32
|
||||
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
|
||||
|
||||
# Rewire shoulders to the correct parents
|
||||
self._skeleton._parents[11] = 8
|
||||
self._skeleton._parents[14] = 8
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
return True
|
||||
|
||||
|
||||
def __init__(self, path, remove_static_joints=True):
|
||||
super().__init__(fps=50, skeleton=h36m_skeleton)
|
||||
|
||||
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
|
||||
for cameras in self._cameras.values():
|
||||
for i, cam in enumerate(cameras):
|
||||
cam.update(h36m_cameras_intrinsic_params[i])
|
||||
for k, v in cam.items():
|
||||
if k not in ['id', 'res_w', 'res_h']:
|
||||
cam[k] = np.array(v, dtype='float32')
|
||||
|
||||
# Normalize camera frame
|
||||
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'],
|
||||
h=cam['res_h']).astype('float32')
|
||||
cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
|
||||
if 'translation' in cam:
|
||||
cam['translation'] = cam['translation'] / 1000 # mm to meters
|
||||
|
||||
# Add intrinsic parameters vector
|
||||
cam['intrinsic'] = np.concatenate((cam['focal_length'], cam['center'],
|
||||
cam['radial_distortion'], cam['tangential_distortion']))
|
||||
|
||||
# Load serialized dataset
|
||||
data = np.load(path)['positions_3d'].item()
|
||||
|
||||
self._data = {}
|
||||
for subject, actions in data.items():
|
||||
self._data[subject] = {}
|
||||
for action_name, positions in actions.items():
|
||||
self._data[subject][action_name] = {
|
||||
'positions': positions,
|
||||
'cameras': self._cameras[subject],
|
||||
}
|
||||
|
||||
if remove_static_joints:
|
||||
# Bring the skeleton to 17 joints instead of the original 32
|
||||
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
|
||||
|
||||
# Rewire shoulders to the correct parents
|
||||
self._skeleton._parents[11] = 8
|
||||
self._skeleton._parents[14] = 8
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
return True
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,142 +9,138 @@ from transformation import *
|
||||
|
||||
from pyquaternion import Quaternion
|
||||
|
||||
|
||||
def get_angle(vec1, vec2):
|
||||
cos_theta = np.dot(vec1, vec2)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
return acos(cos_theta)
|
||||
cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
return acos(cos_theta)
|
||||
|
||||
|
||||
def get_quaternion(ox, oy, oz, x, y, z):
|
||||
# given transformed axis in x-y-z order return a quaternion
|
||||
ox /= np.linalg.norm(ox)
|
||||
oy /= np.linalg.norm(oy)
|
||||
oz /= np.linalg.norm(oz)
|
||||
# given transformed axis in x-y-z order return a quaternion
|
||||
ox /= np.linalg.norm(ox)
|
||||
oy /= np.linalg.norm(oy)
|
||||
oz /= np.linalg.norm(oz)
|
||||
|
||||
set1 = np.vstack((ox,oy,oz))
|
||||
set1 = np.vstack((ox, oy, oz))
|
||||
|
||||
x /= np.linalg.norm(x)
|
||||
y /= np.linalg.norm(y)
|
||||
z /= np.linalg.norm(z)
|
||||
x /= np.linalg.norm(x)
|
||||
y /= np.linalg.norm(y)
|
||||
z /= np.linalg.norm(z)
|
||||
|
||||
set2 = np.vstack((x,y,z))
|
||||
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
|
||||
rot_qua = quaternion_from_matrix(rot_mat)
|
||||
set2 = np.vstack((x, y, z))
|
||||
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
|
||||
rot_qua = quaternion_from_matrix(rot_mat)
|
||||
|
||||
return rot_qua
|
||||
return rot_qua
|
||||
|
||||
|
||||
# 3D coord to deepmimic rotations
|
||||
def coord_to_rot(frameNum, frame, frame_duration):
|
||||
eps = 0.001
|
||||
axis_rotate_rate = 0.3
|
||||
eps = 0.001
|
||||
axis_rotate_rate = 0.3
|
||||
|
||||
frame = np.array(frame)
|
||||
tmp = [[] for i in range(15)]
|
||||
# duration of frame in seconds (1D),
|
||||
tmp[0] = [frame_duration]
|
||||
# root position (3D),
|
||||
tmp[1] = frame[0]
|
||||
# root rotation (4D),
|
||||
root_y = (frame[7] - frame[0])
|
||||
root_z = (frame[1] - frame[0])
|
||||
root_x = np.cross(root_y, root_z)
|
||||
frame = np.array(frame)
|
||||
tmp = [[] for i in range(15)]
|
||||
# duration of frame in seconds (1D),
|
||||
tmp[0] = [frame_duration]
|
||||
# root position (3D),
|
||||
tmp[1] = frame[0]
|
||||
# root rotation (4D),
|
||||
root_y = (frame[7] - frame[0])
|
||||
root_z = (frame[1] - frame[0])
|
||||
root_x = np.cross(root_y, root_z)
|
||||
|
||||
x = np.array([1.0,0,0])
|
||||
y = np.array([0,1.0,0])
|
||||
z = np.array([0,0,1.0])
|
||||
|
||||
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
|
||||
tmp[2] = list(rot_qua)
|
||||
x = np.array([1.0, 0, 0])
|
||||
y = np.array([0, 1.0, 0])
|
||||
z = np.array([0, 0, 1.0])
|
||||
|
||||
# chest rotation (4D),
|
||||
chest_y = (frame[8] - frame[7])
|
||||
chest_z = (frame[14] - frame[8])
|
||||
chest_x = np.cross(chest_y, chest_z)
|
||||
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
|
||||
tmp[3] = list(rot_qua)
|
||||
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
|
||||
tmp[2] = list(rot_qua)
|
||||
|
||||
# neck rotation (4D),
|
||||
neck_y = (frame[10] - frame[8])
|
||||
neck_z = np.cross(frame[10]-frame[9], frame[8]-frame[9])
|
||||
neck_x = np.cross(neck_y, neck_z)
|
||||
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
|
||||
tmp[4] = list(rot_qua)
|
||||
# chest rotation (4D),
|
||||
chest_y = (frame[8] - frame[7])
|
||||
chest_z = (frame[14] - frame[8])
|
||||
chest_x = np.cross(chest_y, chest_z)
|
||||
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
|
||||
tmp[3] = list(rot_qua)
|
||||
|
||||
# right hip rotation (4D),
|
||||
r_hip_y = (frame[1] - frame[2])
|
||||
r_hip_z = np.cross(frame[1]-frame[2], frame[3]-frame[2])
|
||||
r_hip_x = np.cross(r_hip_y, r_hip_z)
|
||||
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
|
||||
tmp[5] = list(rot_qua)
|
||||
# neck rotation (4D),
|
||||
neck_y = (frame[10] - frame[8])
|
||||
neck_z = np.cross(frame[10] - frame[9], frame[8] - frame[9])
|
||||
neck_x = np.cross(neck_y, neck_z)
|
||||
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
|
||||
tmp[4] = list(rot_qua)
|
||||
|
||||
# right knee rotation (1D),
|
||||
vec1 = frame[1] - frame[2]
|
||||
vec2 = frame[3] - frame[2]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[6] = [angle1-pi]
|
||||
# right hip rotation (4D),
|
||||
r_hip_y = (frame[1] - frame[2])
|
||||
r_hip_z = np.cross(frame[1] - frame[2], frame[3] - frame[2])
|
||||
r_hip_x = np.cross(r_hip_y, r_hip_z)
|
||||
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
|
||||
tmp[5] = list(rot_qua)
|
||||
|
||||
# right ankle rotation (4D),
|
||||
tmp[7] = [1,0,0,0]
|
||||
# right knee rotation (1D),
|
||||
vec1 = frame[1] - frame[2]
|
||||
vec2 = frame[3] - frame[2]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[6] = [angle1 - pi]
|
||||
|
||||
# right shoulder rotation (4D),
|
||||
r_shou_y = (frame[14] - frame[15])
|
||||
r_shou_z = np.cross(frame[16]-frame[15], frame[14]-frame[15])
|
||||
r_shou_x = np.cross(r_shou_y, r_shou_z)
|
||||
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[8] = list(rot_qua)
|
||||
# right ankle rotation (4D),
|
||||
tmp[7] = [1, 0, 0, 0]
|
||||
|
||||
# right elbow rotation (1D),
|
||||
vec1 = frame[14] - frame[15]
|
||||
vec2 = frame[16] - frame[15]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[9] = [pi-angle1]
|
||||
# right shoulder rotation (4D),
|
||||
r_shou_y = (frame[14] - frame[15])
|
||||
r_shou_z = np.cross(frame[16] - frame[15], frame[14] - frame[15])
|
||||
r_shou_x = np.cross(r_shou_y, r_shou_z)
|
||||
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[8] = list(rot_qua)
|
||||
|
||||
# left hip rotation (4D),
|
||||
l_hip_y = (frame[4] - frame[5])
|
||||
l_hip_z = np.cross(frame[4]-frame[5], frame[6]-frame[5])
|
||||
l_hip_x = np.cross(l_hip_y, l_hip_z)
|
||||
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
|
||||
tmp[10] = list(rot_qua)
|
||||
|
||||
# left knee rotation (1D),
|
||||
vec1 = frame[4] - frame[5]
|
||||
vec2 = frame[6] - frame[5]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[11] = [angle1-pi]
|
||||
|
||||
# left ankle rotation (4D),
|
||||
tmp[12] = [1,0,0,0]
|
||||
# right elbow rotation (1D),
|
||||
vec1 = frame[14] - frame[15]
|
||||
vec2 = frame[16] - frame[15]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[9] = [pi - angle1]
|
||||
|
||||
# left shoulder rotation (4D),
|
||||
l_shou_y = (frame[11] - frame[12])
|
||||
l_shou_z = np.cross(frame[13]-frame[12], frame[11]-frame[12])
|
||||
l_shou_x = np.cross(l_shou_y, l_shou_z)
|
||||
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[13] = list(rot_qua)
|
||||
# left hip rotation (4D),
|
||||
l_hip_y = (frame[4] - frame[5])
|
||||
l_hip_z = np.cross(frame[4] - frame[5], frame[6] - frame[5])
|
||||
l_hip_x = np.cross(l_hip_y, l_hip_z)
|
||||
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
|
||||
tmp[10] = list(rot_qua)
|
||||
|
||||
# left elbow rotation (1D)
|
||||
vec1 = frame[11] - frame[12]
|
||||
vec2 = frame[13] - frame[12]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[14] = [pi-angle1]
|
||||
# left knee rotation (1D),
|
||||
vec1 = frame[4] - frame[5]
|
||||
vec2 = frame[6] - frame[5]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[11] = [angle1 - pi]
|
||||
|
||||
# left ankle rotation (4D),
|
||||
tmp[12] = [1, 0, 0, 0]
|
||||
|
||||
# left shoulder rotation (4D),
|
||||
l_shou_y = (frame[11] - frame[12])
|
||||
l_shou_z = np.cross(frame[13] - frame[12], frame[11] - frame[12])
|
||||
l_shou_x = np.cross(l_shou_y, l_shou_z)
|
||||
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[13] = list(rot_qua)
|
||||
|
||||
# left elbow rotation (1D)
|
||||
vec1 = frame[11] - frame[12]
|
||||
vec2 = frame[13] - frame[12]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[14] = [pi - angle1]
|
||||
|
||||
ret = []
|
||||
for i in tmp:
|
||||
ret += list(i)
|
||||
return np.array(ret)
|
||||
|
||||
ret = []
|
||||
for i in tmp:
|
||||
ret += list(i)
|
||||
return np.array(ret)
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
def coord_seq_to_rot_seq(coord_seq, frame_duration):
|
||||
ret = []
|
||||
for i in range(len(coord_seq)):
|
||||
tmp = coord_to_rot( i, coord_seq[i], frame_duration)
|
||||
ret.append(list(tmp))
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ret = []
|
||||
for i in range(len(coord_seq)):
|
||||
tmp = coord_to_rot(i, coord_seq[i], frame_duration)
|
||||
ret.append(list(tmp))
|
||||
return ret
|
||||
|
||||
@@ -1,36 +1,37 @@
|
||||
import numpy as np
|
||||
from skeleton import Skeleton
|
||||
|
||||
|
||||
class MocapDataset:
|
||||
def __init__(self, fps, skeleton):
|
||||
self._skeleton = skeleton
|
||||
self._fps = fps
|
||||
self._data = None # Must be filled by subclass
|
||||
self._cameras = None # Must be filled by subclass
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
kept_joints = self._skeleton.remove_joints(joints_to_remove)
|
||||
for subject in self._data.keys():
|
||||
for action in self._data[subject].keys():
|
||||
s = self._data[subject][action]
|
||||
s['positions'] = s['positions'][:, kept_joints]
|
||||
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def subjects(self):
|
||||
return self._data.keys()
|
||||
|
||||
def fps(self):
|
||||
return self._fps
|
||||
|
||||
def skeleton(self):
|
||||
return self._skeleton
|
||||
|
||||
def cameras(self):
|
||||
return self._cameras
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
# This method can be overridden
|
||||
return False
|
||||
|
||||
def __init__(self, fps, skeleton):
|
||||
self._skeleton = skeleton
|
||||
self._fps = fps
|
||||
self._data = None # Must be filled by subclass
|
||||
self._cameras = None # Must be filled by subclass
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
kept_joints = self._skeleton.remove_joints(joints_to_remove)
|
||||
for subject in self._data.keys():
|
||||
for action in self._data[subject].keys():
|
||||
s = self._data[subject][action]
|
||||
s['positions'] = s['positions'][:, kept_joints]
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def subjects(self):
|
||||
return self._data.keys()
|
||||
|
||||
def fps(self):
|
||||
return self._fps
|
||||
|
||||
def skeleton(self):
|
||||
return self._skeleton
|
||||
|
||||
def cameras(self):
|
||||
return self._cameras
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
# This method can be overridden
|
||||
return False
|
||||
|
||||
@@ -1,30 +1,31 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def qrot(q, v):
|
||||
"""
|
||||
"""
|
||||
Rotate vector(s) v about the rotation described by quaternion(s) q.
|
||||
Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
|
||||
where * denotes any number of dimensions.
|
||||
Returns a tensor of shape (*, 3).
|
||||
"""
|
||||
assert q.shape[-1] == 4
|
||||
assert v.shape[-1] == 3
|
||||
assert q.shape[:-1] == v.shape[:-1]
|
||||
assert q.shape[-1] == 4
|
||||
assert v.shape[-1] == 3
|
||||
assert q.shape[:-1] == v.shape[:-1]
|
||||
|
||||
qvec = q[..., 1:]
|
||||
|
||||
uv = np.cross(qvec, v)
|
||||
uuv = np.cross(qvec, uv)
|
||||
|
||||
return (v + 2 * (q[..., :1] * uv + uuv))
|
||||
|
||||
qvec = q[..., 1:]
|
||||
|
||||
uv = np.cross(qvec, v)
|
||||
uuv = np.cross(qvec, uv)
|
||||
|
||||
return (v + 2 * (q[..., :1] * uv + uuv))
|
||||
|
||||
def qinverse(q, inplace=False):
|
||||
# We assume the quaternion to be normalized
|
||||
if inplace:
|
||||
q[..., 1:] *= -1
|
||||
return q
|
||||
else:
|
||||
w = q[..., :1]
|
||||
xyz = q[..., 1:]
|
||||
return np.hstack((w, -xyz))
|
||||
|
||||
# We assume the quaternion to be normalized
|
||||
if inplace:
|
||||
q[..., 1:] *= -1
|
||||
return q
|
||||
else:
|
||||
w = q[..., :1]
|
||||
xyz = q[..., 1:]
|
||||
return np.hstack((w, -xyz))
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
print('parent:', parentdir)
|
||||
|
||||
@@ -10,7 +10,6 @@ import pybullet
|
||||
import time
|
||||
import random
|
||||
|
||||
|
||||
from pybullet_utils.bullet_client import BulletClient
|
||||
from deep_mimic.env.motion_capture_data import MotionCaptureData
|
||||
|
||||
@@ -26,21 +25,28 @@ import pybullet as p
|
||||
import numpy as np
|
||||
import argparse
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Arguments for loading reference for learning.')
|
||||
|
||||
# General arguments
|
||||
parser.add_argument('--dataset_path', default='data/data_3d_h36m.npz', type=str, help='target dataset') # h36m or humaneva
|
||||
parser.add_argument('--json_path', default='data/Walking.json', type=str, help='json file path for storing the deepmimic-format json created by inverse-kinect.')
|
||||
parser.add_argument('--dataset_path',
|
||||
default='data/data_3d_h36m.npz',
|
||||
type=str,
|
||||
help='target dataset') # h36m or humaneva
|
||||
parser.add_argument(
|
||||
'--json_path',
|
||||
default='data/Walking.json',
|
||||
type=str,
|
||||
help='json file path for storing the deepmimic-format json created by inverse-kinect.')
|
||||
parser.add_argument('--fps', default=24, type=int, help='frame per second')
|
||||
parser.add_argument('--subject', default='S11', type=str, help='camera subject.')
|
||||
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
|
||||
parser.add_argument('--loop', default='wrap', type=str, help='loop information in deepmimic, wrap or none.')
|
||||
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
|
||||
parser.add_argument('--loop',
|
||||
default='wrap',
|
||||
type=str,
|
||||
help='loop information in deepmimic, wrap or none.')
|
||||
parser.add_argument('--draw_gt', action='store_true', help='draw ground truth or not.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset_path = args.dataset_path
|
||||
json_path = args.json_path
|
||||
@@ -52,93 +58,75 @@ draw_gt = args.draw_gt
|
||||
|
||||
|
||||
def draw_ground_truth(coord_seq, frame, duration, shift):
|
||||
global joint_info
|
||||
joint = coord_seq[frame]
|
||||
shift = np.array(shift)
|
||||
for i in range(1, 17):
|
||||
# print(x[11], x[14])
|
||||
joint_fa = joint_info['father'][i]
|
||||
if joint_info['side'][i] == 'right':
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
|
||||
lineToXYZ=joint[joint_fa]+shift,
|
||||
lineColorRGB=(255,0,0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
else:
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
|
||||
lineToXYZ=joint[joint_fa]+shift,
|
||||
lineColorRGB=(0,0,0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
global joint_info
|
||||
joint = coord_seq[frame]
|
||||
shift = np.array(shift)
|
||||
for i in range(1, 17):
|
||||
# print(x[11], x[14])
|
||||
joint_fa = joint_info['father'][i]
|
||||
if joint_info['side'][i] == 'right':
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
|
||||
lineToXYZ=joint[joint_fa] + shift,
|
||||
lineColorRGB=(255, 0, 0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
else:
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
|
||||
lineToXYZ=joint[joint_fa] + shift,
|
||||
lineColorRGB=(0, 0, 0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
|
||||
|
||||
dataset = init_fb_h36m_dataset(dataset_path)
|
||||
ground_truth = pose3D_from_fb_h36m(dataset,
|
||||
subject = subject,
|
||||
action = action,
|
||||
shift = [1.0,0.0,0.0])
|
||||
|
||||
rot_seq = coord_seq_to_rot_seq(coord_seq = ground_truth,
|
||||
frame_duration = 1/fps)
|
||||
|
||||
|
||||
rot_seq_to_deepmimic_json( rot_seq = rot_seq,
|
||||
loop = loop,
|
||||
json_path = json_path)
|
||||
ground_truth = pose3D_from_fb_h36m(dataset, subject=subject, action=action, shift=[1.0, 0.0, 0.0])
|
||||
|
||||
rot_seq = coord_seq_to_rot_seq(coord_seq=ground_truth, frame_duration=1 / fps)
|
||||
|
||||
rot_seq_to_deepmimic_json(rot_seq=rot_seq, loop=loop, json_path=json_path)
|
||||
|
||||
bc = BulletClient(connection_mode=pybullet.GUI)
|
||||
bc.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP,1)
|
||||
bc.setGravity(0,-9.8,0)
|
||||
motion=MotionCaptureData()
|
||||
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
bc.setGravity(0, -9.8, 0)
|
||||
motion = MotionCaptureData()
|
||||
|
||||
motionPath = json_path
|
||||
motion.Load(motionPath)
|
||||
print("numFrames = ", motion.NumFrames())
|
||||
|
||||
simTimeId = bc.addUserDebugParameter("simTime", 0, motion.NumFrames() - 1.1, 0)
|
||||
|
||||
y2zOrn = bc.getQuaternionFromEuler([-1.57, 0, 0])
|
||||
bc.loadURDF("plane.urdf", [0, -0.04, 0], y2zOrn)
|
||||
|
||||
simTimeId= bc.addUserDebugParameter("simTime",0,motion.NumFrames()-1.1,0)
|
||||
|
||||
y2zOrn = bc.getQuaternionFromEuler([-1.57,0,0])
|
||||
bc.loadURDF("plane.urdf",[0,-0.04,0], y2zOrn)
|
||||
|
||||
humanoid = Humanoid(bc, motion, [0,0,0]) #这是初始位置的坐标
|
||||
|
||||
|
||||
humanoid = Humanoid(bc, motion, [0, 0, 0]) #这是初始位置的坐标
|
||||
|
||||
print(p.getBasePositionAndOrientation(humanoid._humanoid))
|
||||
|
||||
|
||||
|
||||
simTime = 0
|
||||
keyFrameDuration = motion.KeyFrameDuraction()
|
||||
print("keyFrameDuration=",keyFrameDuration)
|
||||
print("keyFrameDuration=", keyFrameDuration)
|
||||
for utNum in range(motion.NumFrames()):
|
||||
bc.stepSimulation()
|
||||
humanoid.RenderReference(utNum * keyFrameDuration)
|
||||
if draw_gt:
|
||||
draw_ground_truth(coord_seq = ground_truth,
|
||||
frame = utNum,
|
||||
duration = keyFrameDuration,
|
||||
shift = [-1.0, 0.0, 1.0])
|
||||
time.sleep(0.001)
|
||||
bc.stepSimulation()
|
||||
humanoid.RenderReference(utNum * keyFrameDuration)
|
||||
if draw_gt:
|
||||
draw_ground_truth(coord_seq=ground_truth,
|
||||
frame=utNum,
|
||||
duration=keyFrameDuration,
|
||||
shift=[-1.0, 0.0, 1.0])
|
||||
time.sleep(0.001)
|
||||
stage = 0
|
||||
|
||||
|
||||
|
||||
def Reset(humanoid):
|
||||
global simTime
|
||||
humanoid.Reset()
|
||||
simTime = 0
|
||||
humanoid.SetSimTime(simTime)
|
||||
pose = humanoid.InitializePoseFromMotionData()
|
||||
humanoid.ApplyPose(pose, True, True, humanoid._humanoid,bc)
|
||||
global simTime
|
||||
humanoid.Reset()
|
||||
simTime = 0
|
||||
humanoid.SetSimTime(simTime)
|
||||
pose = humanoid.InitializePoseFromMotionData()
|
||||
humanoid.ApplyPose(pose, True, True, humanoid._humanoid, bc)
|
||||
|
||||
|
||||
Reset(humanoid)
|
||||
p.disconnect()
|
||||
|
||||
|
||||
|
||||
@@ -1,81 +1,82 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Skeleton:
|
||||
def __init__(self, parents, joints_left, joints_right):
|
||||
assert len(joints_left) == len(joints_right)
|
||||
|
||||
self._parents = np.array(parents)
|
||||
self._joints_left = joints_left
|
||||
self._joints_right = joints_right
|
||||
self._compute_metadata()
|
||||
|
||||
def num_joints(self):
|
||||
return len(self._parents)
|
||||
|
||||
def parents(self):
|
||||
return self._parents
|
||||
|
||||
def has_children(self):
|
||||
return self._has_children
|
||||
|
||||
def children(self):
|
||||
return self._children
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
"""
|
||||
|
||||
def __init__(self, parents, joints_left, joints_right):
|
||||
assert len(joints_left) == len(joints_right)
|
||||
|
||||
self._parents = np.array(parents)
|
||||
self._joints_left = joints_left
|
||||
self._joints_right = joints_right
|
||||
self._compute_metadata()
|
||||
|
||||
def num_joints(self):
|
||||
return len(self._parents)
|
||||
|
||||
def parents(self):
|
||||
return self._parents
|
||||
|
||||
def has_children(self):
|
||||
return self._has_children
|
||||
|
||||
def children(self):
|
||||
return self._children
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
"""
|
||||
Remove the joints specified in 'joints_to_remove'.
|
||||
"""
|
||||
valid_joints = []
|
||||
for joint in range(len(self._parents)):
|
||||
if joint not in joints_to_remove:
|
||||
valid_joints.append(joint)
|
||||
valid_joints = []
|
||||
for joint in range(len(self._parents)):
|
||||
if joint not in joints_to_remove:
|
||||
valid_joints.append(joint)
|
||||
|
||||
for i in range(len(self._parents)):
|
||||
while self._parents[i] in joints_to_remove:
|
||||
self._parents[i] = self._parents[self._parents[i]]
|
||||
|
||||
index_offsets = np.zeros(len(self._parents), dtype=int)
|
||||
new_parents = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
if i not in joints_to_remove:
|
||||
new_parents.append(parent - index_offsets[parent])
|
||||
else:
|
||||
index_offsets[i:] += 1
|
||||
self._parents = np.array(new_parents)
|
||||
|
||||
|
||||
if self._joints_left is not None:
|
||||
new_joints_left = []
|
||||
for joint in self._joints_left:
|
||||
if joint in valid_joints:
|
||||
new_joints_left.append(joint - index_offsets[joint])
|
||||
self._joints_left = new_joints_left
|
||||
if self._joints_right is not None:
|
||||
new_joints_right = []
|
||||
for joint in self._joints_right:
|
||||
if joint in valid_joints:
|
||||
new_joints_right.append(joint - index_offsets[joint])
|
||||
self._joints_right = new_joints_right
|
||||
for i in range(len(self._parents)):
|
||||
while self._parents[i] in joints_to_remove:
|
||||
self._parents[i] = self._parents[self._parents[i]]
|
||||
|
||||
self._compute_metadata()
|
||||
|
||||
return valid_joints
|
||||
|
||||
def joints_left(self):
|
||||
return self._joints_left
|
||||
|
||||
def joints_right(self):
|
||||
return self._joints_right
|
||||
|
||||
def _compute_metadata(self):
|
||||
self._has_children = np.zeros(len(self._parents)).astype(bool)
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._has_children[parent] = True
|
||||
index_offsets = np.zeros(len(self._parents), dtype=int)
|
||||
new_parents = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
if i not in joints_to_remove:
|
||||
new_parents.append(parent - index_offsets[parent])
|
||||
else:
|
||||
index_offsets[i:] += 1
|
||||
self._parents = np.array(new_parents)
|
||||
|
||||
self._children = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
self._children.append([])
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._children[parent].append(i)
|
||||
if self._joints_left is not None:
|
||||
new_joints_left = []
|
||||
for joint in self._joints_left:
|
||||
if joint in valid_joints:
|
||||
new_joints_left.append(joint - index_offsets[joint])
|
||||
self._joints_left = new_joints_left
|
||||
if self._joints_right is not None:
|
||||
new_joints_right = []
|
||||
for joint in self._joints_right:
|
||||
if joint in valid_joints:
|
||||
new_joints_right.append(joint - index_offsets[joint])
|
||||
self._joints_right = new_joints_right
|
||||
|
||||
self._compute_metadata()
|
||||
|
||||
return valid_joints
|
||||
|
||||
def joints_left(self):
|
||||
return self._joints_left
|
||||
|
||||
def joints_right(self):
|
||||
return self._joints_right
|
||||
|
||||
def _compute_metadata(self):
|
||||
self._has_children = np.zeros(len(self._parents)).astype(bool)
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._has_children[parent] = True
|
||||
|
||||
self._children = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
self._children.append([])
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._children[parent].append(i)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,21 +3,23 @@ import subprocess
|
||||
from pybullet_utils.arg_parser import ArgParser
|
||||
from pybullet_utils.logger import Logger
|
||||
|
||||
|
||||
def main():
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
|
||||
num_workers = arg_parser.parse_int('num_workers', 1)
|
||||
assert(num_workers > 0)
|
||||
num_workers = arg_parser.parse_int('num_workers', 1)
|
||||
assert (num_workers > 0)
|
||||
|
||||
Logger.print2('Running with {:d} workers'.format(num_workers))
|
||||
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
|
||||
cmd += ' '.join(args)
|
||||
Logger.print2('cmd: ' + cmd)
|
||||
subprocess.call(cmd, shell=True)
|
||||
return
|
||||
|
||||
Logger.print2('Running with {:d} workers'.format(num_workers))
|
||||
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
|
||||
cmd += ' '.join(args)
|
||||
Logger.print2('cmd: ' + cmd)
|
||||
subprocess.call(cmd, shell=True)
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,8 +2,8 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
print("parentdir=",parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
print("parentdir=", parentdir)
|
||||
import json
|
||||
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||
from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
|
||||
@@ -15,81 +15,78 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim
|
||||
import sys
|
||||
import random
|
||||
|
||||
update_timestep = 1./240.
|
||||
update_timestep = 1. / 240.
|
||||
animating = True
|
||||
|
||||
|
||||
def update_world(world, time_elapsed):
|
||||
timeStep = update_timestep
|
||||
world.update(timeStep)
|
||||
reward = world.env.calc_reward(agent_id=0)
|
||||
#print("reward=",reward)
|
||||
end_episode = world.env.is_episode_end()
|
||||
if (end_episode):
|
||||
world.end_episode()
|
||||
world.reset()
|
||||
return
|
||||
timeStep = update_timestep
|
||||
world.update(timeStep)
|
||||
reward = world.env.calc_reward(agent_id=0)
|
||||
#print("reward=",reward)
|
||||
end_episode = world.env.is_episode_end()
|
||||
if (end_episode):
|
||||
world.end_episode()
|
||||
world.reset()
|
||||
return
|
||||
|
||||
|
||||
def build_arg_parser(args):
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
|
||||
arg_file = arg_parser.parse_string('arg_file', '')
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath() + "/args/" + arg_file
|
||||
succ = arg_parser.load_file(path)
|
||||
Logger.print2(arg_file)
|
||||
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||
return arg_parser
|
||||
|
||||
arg_file = arg_parser.parse_string('arg_file', '')
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath()+"/args/"+arg_file
|
||||
succ = arg_parser.load_file(path)
|
||||
Logger.print2(arg_file)
|
||||
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||
return arg_parser
|
||||
|
||||
args = sys.argv[1:]
|
||||
|
||||
|
||||
|
||||
def build_world(args, enable_draw):
|
||||
arg_parser = build_arg_parser(args)
|
||||
print("enable_draw=",enable_draw)
|
||||
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
|
||||
world = RLWorld(env, arg_parser)
|
||||
#world.env.set_playback_speed(playback_speed)
|
||||
arg_parser = build_arg_parser(args)
|
||||
print("enable_draw=", enable_draw)
|
||||
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
|
||||
world = RLWorld(env, arg_parser)
|
||||
#world.env.set_playback_speed(playback_speed)
|
||||
|
||||
motion_file = arg_parser.parse_string("motion_file")
|
||||
print("motion_file=",motion_file)
|
||||
bodies = arg_parser.parse_ints("fall_contact_bodies")
|
||||
print("bodies=",bodies)
|
||||
int_output_path = arg_parser.parse_string("int_output_path")
|
||||
print("int_output_path=",int_output_path)
|
||||
agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
|
||||
motion_file = arg_parser.parse_string("motion_file")
|
||||
print("motion_file=", motion_file)
|
||||
bodies = arg_parser.parse_ints("fall_contact_bodies")
|
||||
print("bodies=", bodies)
|
||||
int_output_path = arg_parser.parse_string("int_output_path")
|
||||
print("int_output_path=", int_output_path)
|
||||
agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")
|
||||
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
|
||||
print("agent_file=",agent_files)
|
||||
with open(agent_files) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
print("json_data=",json_data)
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
print("agent_type=",agent_type)
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
|
||||
agent.set_enable_training(False)
|
||||
world.reset()
|
||||
return world
|
||||
print("agent_file=", agent_files)
|
||||
with open(agent_files) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
print("json_data=", json_data)
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
print("agent_type=", agent_type)
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
|
||||
agent.set_enable_training(False)
|
||||
world.reset()
|
||||
return world
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
world = build_world(args, True)
|
||||
while (world.env._pybullet_client.isConnected()):
|
||||
timeStep = update_timestep
|
||||
keys = world.env.getKeyboardEvents()
|
||||
|
||||
|
||||
if world.env.isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
if (animating):
|
||||
update_world(world, timeStep)
|
||||
#animating=False
|
||||
|
||||
world = build_world(args, True)
|
||||
while (world.env._pybullet_client.isConnected()):
|
||||
timeStep = update_timestep
|
||||
keys = world.env.getKeyboardEvents()
|
||||
|
||||
if world.env.isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
if (animating):
|
||||
update_world(world, timeStep)
|
||||
#animating=False
|
||||
|
||||
Reference in New Issue
Block a user