update to the latest agent and environment.

2017-05-24 11:20:42 -07:00
parent 18fd1a003f
commit 1d0db4ec2e
9 changed files with 24 additions and 12 deletions
--- a/examples/pybullet/gym/agents/simpleAgent.py
+++ b/examples/pybullet/gym/agents/simpleAgent.py
@@ -16,7 +16,7 @@ class SimpleAgent():
      session,
      ckpt_path,
      actor_layer_size,
-      observation_dim=31
+      observation_dim=28
  ):
    self._ckpt_path = ckpt_path
    self._session = session
--- a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
+++ b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
@@ -18,7 +18,7 @@ class SimpleAgent():
      session,
      ckpt_path,
      actor_layer_size,
-      observation_size=(31,),
+      observation_size=(28,),
      action_size=8,
  ):
    self._ckpt_path = ckpt_path
@@ -30,7 +30,7 @@ class SimpleAgent():
  def _build(self):
    self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
-    self._obs = tf.placeholder(tf.float32, (31,))
+    self._obs = tf.placeholder(tf.float32, self._observation_size)
    with tf.name_scope('Act'):
      batch_obs = snt.nest.pack_iterable_as(self._obs,
                                            snt.nest.map(lambda x: tf.expand_dims(x, 0),
--- a/examples/pybullet/gym/data/agent/tf_graph_data/checkpoint
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/checkpoint
@@ -1,2 +0,0 @@
 model_checkpoint_path: "tf_graph_data_converted.ckpt-0"
 all_model_checkpoint_paths: "tf_graph_data_converted.ckpt-0"
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta
--- a/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py
+++ b/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py
@@ -16,10 +16,14 @@ class MinitaurGymEnv(gym.Env):
  def __init__(self,
               urdfRoot="",
               actionRepeat=1,
               isEnableSelfCollision=True,
               motorVelocityLimit=10.0,
               render=False):
    self._timeStep = 0.01
    self._urdfRoot = urdfRoot
    self._actionRepeat = actionRepeat
    self._motorVelocityLimit = motorVelocityLimit
    self._isEnableSelfCollision = isEnableSelfCollision
    self._observation = []
    self._envStepCounter = 0
    self._render = render
@@ -44,7 +48,7 @@ class MinitaurGymEnv(gym.Env):
    p.setTimeStep(self._timeStep)
    p.loadURDF("%splane.urdf" % self._urdfRoot)
    p.setGravity(0,0,-10)
-    self._minitaur = minitaur_new.Minitaur(self._urdfRoot)
+    self._minitaur = minitaur_new.Minitaur(urdfRootPath=self._urdfRoot, timeStep=self._timeStep, isEnableSelfCollision=self._isEnableSelfCollision, motorVelocityLimit=self._motorVelocityLimit)
    self._envStepCounter = 0
    self._lastBasePosition = [0, 0, 0]
    for i in range(100):
--- a/examples/pybullet/gym/envs/bullet/minitaur_new.py
+++ b/examples/pybullet/gym/envs/bullet/minitaur_new.py
@@ -4,8 +4,12 @@ import copy
 import math
 class Minitaur:
-  def __init__(self, urdfRootPath=''):
+
  def __init__(self, urdfRootPath='', timeStep=0.01, isEnableSelfCollision=True, motorVelocityLimit=10.0):
    self.urdfRootPath = urdfRootPath
    self.isEnableSelfCollision = isEnableSelfCollision
    self.motorVelocityLimit = motorVelocityLimit
    self.timeStep = timeStep
    self.reset()
  def buildJointNameToIdDict(self):
@@ -27,8 +31,10 @@ class Minitaur:
    self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint'])
  def reset(self):
-#    self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
+    if self.isEnableSelfCollision:
-    self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
+      self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
    else:
      self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
    self.kp = 1
    self.kd = 1
    self.maxForce = 3.5
@@ -116,11 +122,15 @@ class Minitaur:
    observation.extend(self.getMotorVelocities().tolist())
    observation.extend(self.getMotorTorques().tolist())
    observation.extend(list(self.getBaseOrientation()))
    observation.extend(list(self.getBasePosition()))
    return observation
  def applyAction(self, motorCommands):
    if self.motorVelocityLimit < np.inf:
      currentMotorAngle = self.getMotorAngles()
      motorCommandsMax = currentMotorAngle + self.timeStep * self.motorVelocityLimit
      motorCommandsMin = currentMotorAngle - self.timeStep * self.motorVelocityLimit
      motorCommands = np.clip(motorCommands, motorCommandsMin, motorCommandsMax)
    motorCommandsWithDir = np.multiply(motorCommands, self.motorDir)
 #    print('action: {}'.format(motorCommands))
 #    print('motor: {}'.format(motorCommandsWithDir))
--- a/examples/pybullet/gym/minitaurGymEnvTest.py
+++ b/examples/pybullet/gym/minitaurGymEnvTest.py
@@ -60,9 +60,9 @@ def testDDPGPolicy():
  sum_reward = 0
  steps = 1000
  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
-  observation_shape = (31,)
+  observation_shape = (28,)
  action_size = 8
-  actor_layer_size = (100, 181)
+  actor_layer_size = (297, 158)
  n_steps = 0
  tf.reset_default_graph()
  with tf.Session() as session:
		`@@ -1,2 +0,0 @@`
			`model_checkpoint_path: "tf_graph_data_converted.ckpt-0"`
			`all_model_checkpoint_paths: "tf_graph_data_converted.ckpt-0"`