Merge remote-tracking branch 'bp/master'

2017-05-23 22:06:07 -07:00
parent 7bb802a91b 4f3d60f4d0
commit 0f63e7a2f6
24 changed files with 1796 additions and 59 deletions
--- a/examples/pybullet/examples/kuka_grasp_block_playback.py
+++ b/examples/pybullet/examples/kuka_grasp_block_playback.py
@@ -0,0 +1,91 @@
+import pybullet as p
+import time
+import math
+from datetime import datetime
+from numpy import *
+from pylab import *
+import struct
+import sys
+import os, fnmatch
+import argparse
+from time import sleep
+
+def readLogFile(filename, verbose = True):
+  f = open(filename, 'rb')
+  
+  print('Opened'),
+  print(filename)
+
+  keys = f.readline().decode('utf8').rstrip('\n').split(',')
+  fmt = f.readline().decode('utf8').rstrip('\n')
+  
+  # The byte number of one record
+  sz = struct.calcsize(fmt)
+  # The type number of one record
+  ncols = len(fmt)
+
+  if verbose:
+    print('Keys:'),
+    print(keys)
+    print('Format:'),
+    print(fmt)
+    print('Size:'),
+    print(sz)
+    print('Columns:'),
+    print(ncols)
+
+  # Read data
+  wholeFile = f.read()
+  # split by alignment word
+  chunks = wholeFile.split(b'\xaa\xbb')
+  log = list()
+  for chunk in chunks:
+    if len(chunk) == sz:
+      values = struct.unpack(fmt, chunk)
+      record = list()
+      for i in range(ncols):
+        record.append(values[i])
+      log.append(record)
+
+  return log
+
+#clid = p.connect(p.SHARED_MEMORY)
+p.connect(p.GUI)
+p.loadSDF("kuka_iiwa/kuka_with_gripper.sdf")
+p.loadURDF("tray/tray.urdf",[0,0,0])
+p.loadURDF("block.urdf",[0,0,2])
+
+log = readLogFile("data/block_grasp_log.bin")
+
+recordNum = len(log)
+itemNum = len(log[0])
+objectNum = p.getNumBodies()
+
+print('record num:'),
+print(recordNum)
+print('item num:'),
+print(itemNum)
+
+def Step(stepIndex):
+	for objectId in range(objectNum):
+		record = log[stepIndex*objectNum+objectId]
+		Id = record[2]
+		pos = [record[3],record[4],record[5]]
+		orn = [record[6],record[7],record[8],record[9]]
+		p.resetBasePositionAndOrientation(Id,pos,orn)
+		numJoints = p.getNumJoints(Id)
+		for i in range (numJoints):
+			jointInfo = p.getJointInfo(Id,i)
+			qIndex = jointInfo[3]
+			if qIndex > -1:
+				p.resetJointState(Id,i,record[qIndex-7+17])
+
+
+stepIndexId = p.addUserDebugParameter("stepIndex",0,recordNum/objectNum-1,0)
+
+while True:
+	stepIndex = int(p.readUserDebugParameter(stepIndexId))
+	Step(stepIndex)
+	p.stepSimulation()
+	Step(stepIndex)
+
--- a/examples/pybullet/examples/reset_dynamic_info.py
+++ b/examples/pybullet/examples/reset_dynamic_info.py
@@ -6,15 +6,15 @@ p.connect(p.GUI)
 planeId = p.loadURDF(fileName="plane.urdf",baseOrientation=[0.25882,0,0,0.96593])
 p.loadURDF(fileName="cube.urdf",baseOrientation=[0.25882,0,0,0.96593],basePosition=[0,0,2])
 cubeId = p.loadURDF(fileName="cube.urdf",baseOrientation=[0,0,0,1],basePosition=[0,0,4])
-p.changeDynamicsInfo(bodyUniqueId=2,linkIndex=-1,mass=0.1)
-#p.changeDynamicsInfo(bodyUniqueId=2,linkIndex=-1,mass=100.0)
+#p.changeDynamics(bodyUniqueId=2,linkIndex=-1,mass=0.1)
+p.changeDynamics(bodyUniqueId=2,linkIndex=-1,mass=100.0)
 p.setGravity(0,0,-10)
 p.setRealTimeSimulation(0)
 t=0
 while 1:
 	t=t+1
 	if t > 400:
-		p.changeDynamicsInfo(bodyUniqueId=0,linkIndex=-1,lateralFriction=0.01)
+		p.changeDynamics(bodyUniqueId=0,linkIndex=-1,lateralFriction=0.01)
 	mass1,frictionCoeff1=p.getDynamicsInfo(bodyUniqueId=planeId,linkIndex=-1)
 	mass2,frictionCoeff2=p.getDynamicsInfo(bodyUniqueId=cubeId,linkIndex=-1)
 	print mass1,frictionCoeff1
--- a/examples/pybullet/gym/agents/actor_net.py
+++ b/examples/pybullet/gym/agents/actor_net.py
@@ -0,0 +1,21 @@
+"""An actor network."""
+import tensorflow as tf
+import sonnet as snt
+
+class ActorNetwork(snt.AbstractModule):
+  """An actor network as a sonnet Module."""
+
+  def __init__(self, layer_sizes, action_size, name='target_actor'):
+    super(ActorNetwork, self).__init__(name=name)
+    self._layer_sizes = layer_sizes
+    self._action_size = action_size
+
+  def _build(self, inputs):
+    state = inputs
+    for output_size in self._layer_sizes:
+      state = snt.Linear(output_size)(state)
+      state = tf.nn.relu(state)
+
+    action = tf.tanh(
+        snt.Linear(self._action_size, name='action')(state))
+    return action
--- a/examples/pybullet/gym/agents/simplerAgent.py
+++ b/examples/pybullet/gym/agents/simplerAgent.py
@@ -10,11 +10,12 @@ import numpy as np
 import tensorflow as tf
 import pdb

-class SimplerAgent():
+class SimpleAgent():
  def __init__(
      self,
      session,
      ckpt_path,
+      actor_layer_size,
      observation_dim=31
  ):
    self._ckpt_path = ckpt_path
--- a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
+++ b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
@@ -0,0 +1,46 @@
+"""Loads a DDPG agent without too much external dependencies
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import collections
+import numpy as np
+import tensorflow as tf
+
+import sonnet as snt
+from agents import actor_net
+
+class SimpleAgent():
+  def __init__(
+      self,
+      session,
+      ckpt_path,
+      actor_layer_size,
+      observation_size=(31,),
+      action_size=8,
+  ):
+    self._ckpt_path = ckpt_path
+    self._actor_layer_size = actor_layer_size
+    self._observation_size = observation_size
+    self._action_size = action_size
+    self._session = session
+    self._build()
+
+  def _build(self):
+    self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
+    self._obs = tf.placeholder(tf.float32, (31,))
+    with tf.name_scope('Act'):
+      batch_obs = snt.nest.pack_iterable_as(self._obs,
+                                            snt.nest.map(lambda x: tf.expand_dims(x, 0),
+                                                         snt.nest.flatten_iterable(self._obs)))
+      self._action = self._agent_net(batch_obs)
+      saver = tf.train.Saver()
+      saver.restore(
+          sess=self._session,
+          save_path=self._ckpt_path)
+
+  def __call__(self, observation):
+    out_action = self._session.run(self._action, feed_dict={self._obs: observation})
+    return out_action[0]
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta
--- a/examples/pybullet/gym/minitaurGymEnvTest.py
+++ b/examples/pybullet/gym/minitaurGymEnvTest.py
@@ -10,8 +10,15 @@ import numpy as np
 import tensorflow as tf

 from envs.bullet.minitaurGymEnv import MinitaurGymEnv
-from agents import simplerAgent

+try:
+  import sonnet
+  from agents import simpleAgentWithSonnet as agent_lib
+  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
+except ImportError:
+  from agents import simpleAgent as agent_lib
+  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data.ckpt'
+  
 def testSinePolicy():
  """Tests sine policy
    """
@@ -53,14 +60,14 @@ def testDDPGPolicy():
  environment = MinitaurGymEnv(render=True)
  sum_reward = 0
  steps = 1000
-  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
+
  observation_shape = (31,)
  action_size = 8
-  actor_layer_sizes = (100, 181)
+  actor_layer_size = (100, 181)
  n_steps = 0
  tf.reset_default_graph()
  with tf.Session() as session:
-    agent = simplerAgent.SimplerAgent(session, ckpt_path)
+    agent = agent_lib.SimpleAgent(session=session, ckpt_path=ckpt_path, actor_layer_size=actor_layer_size)
    state = environment.reset()
    action = agent(state)
    for _ in range(steps):