Added kuka environment with multiple diverse objects. (#1508)

* Added new diverse object environment
2018-01-18 08:49:00 -08:00
parent 3e873feac0
commit e30f2624f9
2 changed files with 383 additions and 0 deletions
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
@@ -0,0 +1,61 @@
+"""Runs a random policy for the random object KukaDiverseObjectEnv.
+"""
+
+import os, inspect
+import numpy as np
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+import gym
+from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
+from gym import spaces
+
+from baselines import deepq
+
+
+class ContinuousDownwardBiasPolicy(object):
+  """Policy which takes continuous actions, and is biased to move down.
+  """
+  def __init__(self, height_hack_prob=0.9):
+    """Initializes the DownwardBiasPolicy.
+
+    Args:
+        height_hack_prob: The probability of moving down at every move.
+    """
+    self._height_hack_prob = height_hack_prob
+    self._action_space = spaces.Box(low=-1, high=1, shape=(5,))
+
+  def sample_action(self, obs, explore_prob):
+    """Implements height hack and grasping threshold hack.
+    """
+    dx, dy, dz, da, close = self._action_space.sample()
+    if np.random.random() < self._height_hack_prob:
+      dz = -1
+    return [dx, dy, dz, da, 0]
+
+
+def main():
+    
+    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
+    policy = ContinuousDownwardBiasPolicy()
+
+    while True:
+        obs, done = env.reset(), False
+        print("===================================")        
+        print("obs")
+        print(obs)
+        episode_rew = 0
+        while not done:
+            env.render()
+            act = policy.sample_action(obs, .1)
+            print("Action")
+            print(act)
+            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
+            episode_rew += rew
+        print("Episode reward", episode_rew)
+
+
+if __name__ == '__main__':
+    main()