update to the latest agent and environment.
This commit is contained in:
@@ -16,7 +16,7 @@ class SimpleAgent():
|
|||||||
session,
|
session,
|
||||||
ckpt_path,
|
ckpt_path,
|
||||||
actor_layer_size,
|
actor_layer_size,
|
||||||
observation_dim=31
|
observation_dim=28
|
||||||
):
|
):
|
||||||
self._ckpt_path = ckpt_path
|
self._ckpt_path = ckpt_path
|
||||||
self._session = session
|
self._session = session
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class SimpleAgent():
|
|||||||
session,
|
session,
|
||||||
ckpt_path,
|
ckpt_path,
|
||||||
actor_layer_size,
|
actor_layer_size,
|
||||||
observation_size=(31,),
|
observation_size=(28,),
|
||||||
action_size=8,
|
action_size=8,
|
||||||
):
|
):
|
||||||
self._ckpt_path = ckpt_path
|
self._ckpt_path = ckpt_path
|
||||||
@@ -30,7 +30,7 @@ class SimpleAgent():
|
|||||||
|
|
||||||
def _build(self):
|
def _build(self):
|
||||||
self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
|
self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
|
||||||
self._obs = tf.placeholder(tf.float32, (31,))
|
self._obs = tf.placeholder(tf.float32, self._observation_size)
|
||||||
with tf.name_scope('Act'):
|
with tf.name_scope('Act'):
|
||||||
batch_obs = snt.nest.pack_iterable_as(self._obs,
|
batch_obs = snt.nest.pack_iterable_as(self._obs,
|
||||||
snt.nest.map(lambda x: tf.expand_dims(x, 0),
|
snt.nest.map(lambda x: tf.expand_dims(x, 0),
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
model_checkpoint_path: "tf_graph_data_converted.ckpt-0"
|
|
||||||
all_model_checkpoint_paths: "tf_graph_data_converted.ckpt-0"
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -16,10 +16,14 @@ class MinitaurGymEnv(gym.Env):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
urdfRoot="",
|
urdfRoot="",
|
||||||
actionRepeat=1,
|
actionRepeat=1,
|
||||||
|
isEnableSelfCollision=True,
|
||||||
|
motorVelocityLimit=10.0,
|
||||||
render=False):
|
render=False):
|
||||||
self._timeStep = 0.01
|
self._timeStep = 0.01
|
||||||
self._urdfRoot = urdfRoot
|
self._urdfRoot = urdfRoot
|
||||||
self._actionRepeat = actionRepeat
|
self._actionRepeat = actionRepeat
|
||||||
|
self._motorVelocityLimit = motorVelocityLimit
|
||||||
|
self._isEnableSelfCollision = isEnableSelfCollision
|
||||||
self._observation = []
|
self._observation = []
|
||||||
self._envStepCounter = 0
|
self._envStepCounter = 0
|
||||||
self._render = render
|
self._render = render
|
||||||
@@ -44,7 +48,7 @@ class MinitaurGymEnv(gym.Env):
|
|||||||
p.setTimeStep(self._timeStep)
|
p.setTimeStep(self._timeStep)
|
||||||
p.loadURDF("%splane.urdf" % self._urdfRoot)
|
p.loadURDF("%splane.urdf" % self._urdfRoot)
|
||||||
p.setGravity(0,0,-10)
|
p.setGravity(0,0,-10)
|
||||||
self._minitaur = minitaur_new.Minitaur(self._urdfRoot)
|
self._minitaur = minitaur_new.Minitaur(urdfRootPath=self._urdfRoot, timeStep=self._timeStep, isEnableSelfCollision=self._isEnableSelfCollision, motorVelocityLimit=self._motorVelocityLimit)
|
||||||
self._envStepCounter = 0
|
self._envStepCounter = 0
|
||||||
self._lastBasePosition = [0, 0, 0]
|
self._lastBasePosition = [0, 0, 0]
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
|
|||||||
@@ -4,8 +4,12 @@ import copy
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
class Minitaur:
|
class Minitaur:
|
||||||
def __init__(self, urdfRootPath=''):
|
|
||||||
|
def __init__(self, urdfRootPath='', timeStep=0.01, isEnableSelfCollision=True, motorVelocityLimit=10.0):
|
||||||
self.urdfRootPath = urdfRootPath
|
self.urdfRootPath = urdfRootPath
|
||||||
|
self.isEnableSelfCollision = isEnableSelfCollision
|
||||||
|
self.motorVelocityLimit = motorVelocityLimit
|
||||||
|
self.timeStep = timeStep
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def buildJointNameToIdDict(self):
|
def buildJointNameToIdDict(self):
|
||||||
@@ -27,8 +31,10 @@ class Minitaur:
|
|||||||
self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint'])
|
self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint'])
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
# self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
|
if self.isEnableSelfCollision:
|
||||||
self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
|
self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
|
||||||
|
else:
|
||||||
|
self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
|
||||||
self.kp = 1
|
self.kp = 1
|
||||||
self.kd = 1
|
self.kd = 1
|
||||||
self.maxForce = 3.5
|
self.maxForce = 3.5
|
||||||
@@ -116,11 +122,15 @@ class Minitaur:
|
|||||||
observation.extend(self.getMotorVelocities().tolist())
|
observation.extend(self.getMotorVelocities().tolist())
|
||||||
observation.extend(self.getMotorTorques().tolist())
|
observation.extend(self.getMotorTorques().tolist())
|
||||||
observation.extend(list(self.getBaseOrientation()))
|
observation.extend(list(self.getBaseOrientation()))
|
||||||
observation.extend(list(self.getBasePosition()))
|
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
|
|
||||||
def applyAction(self, motorCommands):
|
def applyAction(self, motorCommands):
|
||||||
|
if self.motorVelocityLimit < np.inf:
|
||||||
|
currentMotorAngle = self.getMotorAngles()
|
||||||
|
motorCommandsMax = currentMotorAngle + self.timeStep * self.motorVelocityLimit
|
||||||
|
motorCommandsMin = currentMotorAngle - self.timeStep * self.motorVelocityLimit
|
||||||
|
motorCommands = np.clip(motorCommands, motorCommandsMin, motorCommandsMax)
|
||||||
motorCommandsWithDir = np.multiply(motorCommands, self.motorDir)
|
motorCommandsWithDir = np.multiply(motorCommands, self.motorDir)
|
||||||
# print('action: {}'.format(motorCommands))
|
# print('action: {}'.format(motorCommands))
|
||||||
# print('motor: {}'.format(motorCommandsWithDir))
|
# print('motor: {}'.format(motorCommandsWithDir))
|
||||||
|
|||||||
@@ -60,9 +60,9 @@ def testDDPGPolicy():
|
|||||||
sum_reward = 0
|
sum_reward = 0
|
||||||
steps = 1000
|
steps = 1000
|
||||||
ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
|
ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
|
||||||
observation_shape = (31,)
|
observation_shape = (28,)
|
||||||
action_size = 8
|
action_size = 8
|
||||||
actor_layer_size = (100, 181)
|
actor_layer_size = (297, 158)
|
||||||
n_steps = 0
|
n_steps = 0
|
||||||
tf.reset_default_graph()
|
tf.reset_default_graph()
|
||||||
with tf.Session() as session:
|
with tf.Session() as session:
|
||||||
|
|||||||
Reference in New Issue
Block a user