diff --git a/examples/pybullet/gym/agents/simpleAgent.py b/examples/pybullet/gym/agents/simpleAgent.py index 6588e73ba..8f399566c 100644 --- a/examples/pybullet/gym/agents/simpleAgent.py +++ b/examples/pybullet/gym/agents/simpleAgent.py @@ -16,7 +16,7 @@ class SimpleAgent(): session, ckpt_path, actor_layer_size, - observation_dim=31 + observation_dim=28 ): self._ckpt_path = ckpt_path self._session = session diff --git a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py index 08a4cf1fa..6af4be61d 100644 --- a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py +++ b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py @@ -18,7 +18,7 @@ class SimpleAgent(): session, ckpt_path, actor_layer_size, - observation_size=(31,), + observation_size=(28,), action_size=8, ): self._ckpt_path = ckpt_path @@ -30,7 +30,7 @@ class SimpleAgent(): def _build(self): self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size) - self._obs = tf.placeholder(tf.float32, (31,)) + self._obs = tf.placeholder(tf.float32, self._observation_size) with tf.name_scope('Act'): batch_obs = snt.nest.pack_iterable_as(self._obs, snt.nest.map(lambda x: tf.expand_dims(x, 0), diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/checkpoint b/examples/pybullet/gym/data/agent/tf_graph_data/checkpoint deleted file mode 100644 index 72cc2c323..000000000 --- a/examples/pybullet/gym/data/agent/tf_graph_data/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "tf_graph_data_converted.ckpt-0" -all_model_checkpoint_paths: "tf_graph_data_converted.ckpt-0" diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001 b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001 index 4d4eb02c3..d0d344013 100644 Binary files a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001 and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.data-00000-of-00001 differ diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index index 9f923be82..85feb951c 100644 Binary files a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.index differ diff --git a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta index 2a70ea383..b9484d825 100644 Binary files a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta and b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0.meta differ diff --git a/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py b/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py index c8ca5e536..83679f22a 100644 --- a/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py +++ b/examples/pybullet/gym/envs/bullet/minitaurGymEnv.py @@ -16,10 +16,14 @@ class MinitaurGymEnv(gym.Env): def __init__(self, urdfRoot="", actionRepeat=1, + isEnableSelfCollision=True, + motorVelocityLimit=10.0, render=False): self._timeStep = 0.01 self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat + self._motorVelocityLimit = motorVelocityLimit + self._isEnableSelfCollision = isEnableSelfCollision self._observation = [] self._envStepCounter = 0 self._render = render @@ -44,7 +48,7 @@ class MinitaurGymEnv(gym.Env): p.setTimeStep(self._timeStep) p.loadURDF("%splane.urdf" % self._urdfRoot) p.setGravity(0,0,-10) - self._minitaur = minitaur_new.Minitaur(self._urdfRoot) + self._minitaur = minitaur_new.Minitaur(urdfRootPath=self._urdfRoot, timeStep=self._timeStep, isEnableSelfCollision=self._isEnableSelfCollision, motorVelocityLimit=self._motorVelocityLimit) self._envStepCounter = 0 self._lastBasePosition = [0, 0, 0] for i in range(100): diff --git a/examples/pybullet/gym/envs/bullet/minitaur_new.py b/examples/pybullet/gym/envs/bullet/minitaur_new.py index 252c12ed3..e23fd7dca 100644 --- a/examples/pybullet/gym/envs/bullet/minitaur_new.py +++ b/examples/pybullet/gym/envs/bullet/minitaur_new.py @@ -4,8 +4,12 @@ import copy import math class Minitaur: - def __init__(self, urdfRootPath=''): + + def __init__(self, urdfRootPath='', timeStep=0.01, isEnableSelfCollision=True, motorVelocityLimit=10.0): self.urdfRootPath = urdfRootPath + self.isEnableSelfCollision = isEnableSelfCollision + self.motorVelocityLimit = motorVelocityLimit + self.timeStep = timeStep self.reset() def buildJointNameToIdDict(self): @@ -27,8 +31,10 @@ class Minitaur: self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint']) def reset(self): -# self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION) - self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2]) + if self.isEnableSelfCollision: + self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION) + else: + self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2]) self.kp = 1 self.kd = 1 self.maxForce = 3.5 @@ -116,11 +122,15 @@ class Minitaur: observation.extend(self.getMotorVelocities().tolist()) observation.extend(self.getMotorTorques().tolist()) observation.extend(list(self.getBaseOrientation())) - observation.extend(list(self.getBasePosition())) return observation def applyAction(self, motorCommands): + if self.motorVelocityLimit < np.inf: + currentMotorAngle = self.getMotorAngles() + motorCommandsMax = currentMotorAngle + self.timeStep * self.motorVelocityLimit + motorCommandsMin = currentMotorAngle - self.timeStep * self.motorVelocityLimit + motorCommands = np.clip(motorCommands, motorCommandsMin, motorCommandsMax) motorCommandsWithDir = np.multiply(motorCommands, self.motorDir) # print('action: {}'.format(motorCommands)) # print('motor: {}'.format(motorCommandsWithDir)) diff --git a/examples/pybullet/gym/minitaurGymEnvTest.py b/examples/pybullet/gym/minitaurGymEnvTest.py index 3b1ce8aeb..7491453fe 100644 --- a/examples/pybullet/gym/minitaurGymEnvTest.py +++ b/examples/pybullet/gym/minitaurGymEnvTest.py @@ -60,9 +60,9 @@ def testDDPGPolicy(): sum_reward = 0 steps = 1000 ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0' - observation_shape = (31,) + observation_shape = (28,) action_size = 8 - actor_layer_size = (100, 181) + actor_layer_size = (297, 158) n_steps = 0 tf.reset_default_graph() with tf.Session() as session: