diff --git a/examples/pybullet/gym/agents/actor_net.py b/examples/pybullet/gym/agents/actor_net.py new file mode 100644 index 000000000..ac6aaff8a --- /dev/null +++ b/examples/pybullet/gym/agents/actor_net.py @@ -0,0 +1,21 @@ +"""An actor network.""" +import tensorflow as tf +import sonnet as snt + +class ActorNetwork(snt.AbstractModule): + """An actor network as a sonnet Module.""" + + def __init__(self, layer_sizes, action_size, name='target_actor'): + super(ActorNetwork, self).__init__(name=name) + self._layer_sizes = layer_sizes + self._action_size = action_size + + def _build(self, inputs): + state = inputs + for output_size in self._layer_sizes: + state = snt.Linear(output_size)(state) + state = tf.nn.relu(state) + + action = tf.tanh( + snt.Linear(self._action_size, name='action')(state)) + return action diff --git a/examples/pybullet/gym/agents/simplerAgent.py b/examples/pybullet/gym/agents/simpleAgent.py similarity index 95% rename from examples/pybullet/gym/agents/simplerAgent.py rename to examples/pybullet/gym/agents/simpleAgent.py index 4f12f04db..6588e73ba 100644 --- a/examples/pybullet/gym/agents/simplerAgent.py +++ b/examples/pybullet/gym/agents/simpleAgent.py @@ -10,11 +10,12 @@ import numpy as np import tensorflow as tf import pdb -class SimplerAgent(): +class SimpleAgent(): def __init__( self, session, ckpt_path, + actor_layer_size, observation_dim=31 ): self._ckpt_path = ckpt_path diff --git a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py new file mode 100644 index 000000000..08a4cf1fa --- /dev/null +++ b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py @@ -0,0 +1,46 @@ +"""Loads a DDPG agent without too much external dependencies +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import collections +import numpy as np +import tensorflow as tf + +import sonnet as snt +from agents import actor_net + +class SimpleAgent(): + def __init__( + self, + session, + ckpt_path, + actor_layer_size, + observation_size=(31,), + action_size=8, + ): + self._ckpt_path = ckpt_path + self._actor_layer_size = actor_layer_size + self._observation_size = observation_size + self._action_size = action_size + self._session = session + self._build() + + def _build(self): + self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size) + self._obs = tf.placeholder(tf.float32, (31,)) + with tf.name_scope('Act'): + batch_obs = snt.nest.pack_iterable_as(self._obs, + snt.nest.map(lambda x: tf.expand_dims(x, 0), + snt.nest.flatten_iterable(self._obs))) + self._action = self._agent_net(batch_obs) + saver = tf.train.Saver() + saver.restore( + sess=self._session, + save_path=self._ckpt_path) + + def __call__(self, observation): + out_action = self._session.run(self._action, feed_dict={self._obs: observation}) + return out_action[0] diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001 b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001 new file mode 100644 index 000000000..b25aa2872 Binary files /dev/null and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001 differ diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index new file mode 100644 index 000000000..8abcb6ea5 Binary files /dev/null and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index differ diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta new file mode 100644 index 000000000..e1369a3d1 Binary files /dev/null and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta differ diff --git a/examples/pybullet/gym/minitaurGymEnvTest.py b/examples/pybullet/gym/minitaurGymEnvTest.py index 3bbabd9a4..484842102 100644 --- a/examples/pybullet/gym/minitaurGymEnvTest.py +++ b/examples/pybullet/gym/minitaurGymEnvTest.py @@ -10,8 +10,15 @@ import numpy as np import tensorflow as tf from envs.bullet.minitaurGymEnv import MinitaurGymEnv -from agents import simplerAgent +try: + import sonnet + from agents import simpleAgentWithSonnet as agent_lib + ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0' +except ImportError: + from agents import simpleAgent as agent_lib + ckpt_path = 'data/agent/tf_graph_data/tf_graph_data.ckpt' + def testSinePolicy(): """Tests sine policy """ @@ -53,14 +60,14 @@ def testDDPGPolicy(): environment = MinitaurGymEnv(render=True) sum_reward = 0 steps = 1000 - ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0' + observation_shape = (31,) action_size = 8 - actor_layer_sizes = (100, 181) + actor_layer_size = (100, 181) n_steps = 0 tf.reset_default_graph() with tf.Session() as session: - agent = simplerAgent.SimplerAgent(session, ckpt_path) + agent = agent_lib.SimpleAgent(session=session, ckpt_path=ckpt_path, actor_layer_size=actor_layer_size) state = environment.reset() action = agent(state) for _ in range(steps):