From e30f2624f92287058d44ff19fa13424302b5fe2e Mon Sep 17 00:00:00 2001
From: deq2 <deq2@cornell.edu>
Date: Thu, 18 Jan 2018 08:49:00 -0800
Subject: [PATCH] Added kuka environment with multiple diverse objects. (#1508)

* Added new diverse object environment
---
 .../enjoy_kuka_diverse_object_grasping.py     |  61 ++++
 .../bullet/kuka_diverse_object_gym_env.py     | 322 ++++++++++++++++++
 2 files changed, 383 insertions(+)
 create mode 100644 examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
 create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py

diff --git a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
new file mode 100644
index 000000000..b023a316a
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_diverse_object_grasping.py
@@ -0,0 +1,61 @@
+"""Runs a random policy for the random object KukaDiverseObjectEnv.
+"""
+
+import os, inspect
+import numpy as np
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+import gym
+from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
+from gym import spaces
+
+from baselines import deepq
+
+
+class ContinuousDownwardBiasPolicy(object):
+  """Policy which takes continuous actions, and is biased to move down.
+  """
+  def __init__(self, height_hack_prob=0.9):
+    """Initializes the DownwardBiasPolicy.
+
+    Args:
+        height_hack_prob: The probability of moving down at every move.
+    """
+    self._height_hack_prob = height_hack_prob
+    self._action_space = spaces.Box(low=-1, high=1, shape=(5,))
+
+  def sample_action(self, obs, explore_prob):
+    """Implements height hack and grasping threshold hack.
+    """
+    dx, dy, dz, da, close = self._action_space.sample()
+    if np.random.random() < self._height_hack_prob:
+      dz = -1
+    return [dx, dy, dz, da, 0]
+
+
+def main():
+    
+    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
+    policy = ContinuousDownwardBiasPolicy()
+
+    while True:
+        obs, done = env.reset(), False
+        print("===================================")        
+        print("obs")
+        print(obs)
+        episode_rew = 0
+        while not done:
+            env.render()
+            act = policy.sample_action(obs, .1)
+            print("Action")
+            print(act)
+            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
+            episode_rew += rew
+        print("Episode reward", episode_rew)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py
new file mode 100644
index 000000000..ac9829e60
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka_diverse_object_gym_env.py
@@ -0,0 +1,322 @@
+from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
+import random
+import os
+from gym import spaces
+import time
+import pybullet as p
+from . import kuka
+import numpy as np
+import pybullet_data
+import pdb
+import distutils.dir_util
+import glob
+
+
+class KukaDiverseObjectEnv(KukaGymEnv):
+  """Class for Kuka environment with diverse objects.
+
+  In each episode some objects are chosen from a set of 1000 diverse objects.
+  These 1000 objects are split 90/10 into a train and test set.
+  """
+
+  def __init__(self,
+               urdfRoot=pybullet_data.getDataPath(),
+               actionRepeat=50,
+               isEnableSelfCollision=True,
+               renders=False,
+               isDiscrete=False,
+               maxSteps=8,
+               dv=0.06,
+               removeHeightHack=False,
+               blockRandom=0.3,
+               cameraRandom=0,
+               width=48,
+               height=48,
+               numObjects=5,
+               isTest=False):
+    """Initializes the KukaDiverseObjectEnv. 
+
+    Args:
+      urdfRoot: The diretory from which to load environment URDF's.
+      actionRepeat: The number of simulation steps to apply for each action.
+      isEnableSelfCollision: If true, enable self-collision.
+      renders: If true, render the bullet GUI.
+      isDiscrete: If true, the action space is discrete. If False, the
+        action space is continuous.
+      maxSteps: The maximum number of actions per episode.
+      dv: The velocity along each dimension for each action.
+      removeHeightHack: If false, there is a "height hack" where the gripper
+        automatically moves down for each action. If true, the environment is
+        harder and the policy chooses the height displacement.
+      blockRandom: A float between 0 and 1 indicated block randomness. 0 is
+        deterministic.
+      cameraRandom: A float between 0 and 1 indicating camera placement
+        randomness. 0 is deterministic.
+      width: The image width.
+      height: The observation image height.
+      numObjects: The number of objects in the bin.
+      isTest: If true, use the test set of objects. If false, use the train
+        set of objects.
+    """
+
+    self._isDiscrete = isDiscrete
+    self._timeStep = 1./240.
+    self._urdfRoot = urdfRoot
+    self._actionRepeat = actionRepeat
+    self._isEnableSelfCollision = isEnableSelfCollision
+    self._observation = []
+    self._envStepCounter = 0
+    self._renders = renders
+    self._maxSteps = maxSteps
+    self.terminated = 0
+    self._cam_dist = 1.3
+    self._cam_yaw = 180 
+    self._cam_pitch = -40
+    self._dv = dv
+    self._p = p
+    self._removeHeightHack = removeHeightHack
+    self._blockRandom = blockRandom
+    self._cameraRandom = cameraRandom
+    self._width = width
+    self._height = height
+    self._numObjects = numObjects
+    self._isTest = isTest
+
+    if self._renders:
+      self.cid = p.connect(p.SHARED_MEMORY)
+      if (self.cid<0):
+         self.cid = p.connect(p.GUI)
+      p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
+    else:
+      self.cid = p.connect(p.DIRECT)
+    self._seed()
+
+    if (self._isDiscrete):
+      if self._removeHeightHack:
+        self.action_space = spaces.Discrete(9)
+      else:
+        self.action_space = spaces.Discrete(7)
+    else:
+      self.action_space = spaces.Box(low=-1, high=1, shape=(3,))  # dx, dy, da
+      if self._removeHeightHack:
+        self.action_space = spaces.Box(low=-1,
+                                       high=1,
+                                       shape=(4,))  # dx, dy, dz, da
+    self.viewer = None
+
+  def _reset(self):
+    """Environment reset called at the beginning of an episode.
+    """
+    # Set the camera settings.
+    look = [0.23, 0.2, 0.54]
+    distance = 1.
+    pitch = -56 + self._cameraRandom*np.random.uniform(-3, 3)
+    yaw = 245 + self._cameraRandom*np.random.uniform(-3, 3)
+    roll = 0
+    self._view_matrix = p.computeViewMatrixFromYawPitchRoll(
+        look, distance, yaw, pitch, roll, 2)
+    fov = 20. + self._cameraRandom*np.random.uniform(-2, 2)
+    aspect = self._width / self._height
+    near = 0.1
+    far = 10
+    self._proj_matrix = p.computeProjectionMatrixFOV(
+        fov, aspect, near, far)
+    
+    self._attempted_grasp = False
+    self._env_step = 0
+    self.terminated = 0
+
+    p.resetSimulation()
+    p.setPhysicsEngineParameter(numSolverIterations=150)
+    p.setTimeStep(self._timeStep)
+    p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
+    
+    p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
+            
+    p.setGravity(0,0,-10)
+    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
+    self._envStepCounter = 0
+    p.stepSimulation()
+
+    # Choose the objects in the bin.
+    urdfList = self._get_random_object(
+      self._numObjects, self._isTest)
+    self._objectUids = self._randomly_place_objects(urdfList)
+    self._observation = self._get_observation()
+    return np.array(self._observation)
+
+  def _randomly_place_objects(self, urdfList):
+    """Randomly places the objects in the bin.
+
+    Args:
+      urdfList: The list of urdf files to place in the bin.
+
+    Returns:
+      The list of object unique ID's.
+    """
+
+
+    # Randomize positions of each object urdf.
+    objectUids = []
+    for urdf_name in urdfList:
+      xpos = 0.4 +self._blockRandom*random.random()
+      ypos = self._blockRandom*(random.random()-.5)
+      angle = np.pi/2 + self._blockRandom * np.pi * random.random()
+      orn = p.getQuaternionFromEuler([0, 0, angle])
+      urdf_path = os.path.join(self._urdfRoot, urdf_name)
+      uid = p.loadURDF(urdf_path, [xpos, ypos, .15],
+        [orn[0], orn[1], orn[2], orn[3]])
+      objectUids.append(uid)
+      # Let each object fall to the tray individual, to prevent object
+      # intersection.
+      for _ in range(500):
+        p.stepSimulation()
+    return objectUids
+
+  def _get_observation(self):
+    """Return the observation as an image.
+    """
+    img_arr = p.getCameraImage(width=self._width,
+                                      height=self._height,
+                                      viewMatrix=self._view_matrix,
+                                      projectionMatrix=self._proj_matrix)
+    rgb = img_arr[2]
+    np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
+    return np_img_arr[:, :, :3]
+
+  def _step(self, action):
+    """Environment step.
+
+    Args:
+      action: 5-vector parameterizing XYZ offset, vertical angle offset
+      (radians), and grasp angle (radians).
+    Returns:
+      observation: Next observation.
+      reward: Float of the per-step reward as a result of taking the action.
+      done: Bool of whether or not the episode has ended.
+      debug: Dictionary of extra information provided by environment.
+    """
+    dv = self._dv  # velocity per physics step.
+    if self._isDiscrete:
+      # Static type assertion for integers.
+      assert isinstance(action, int)
+      if self._removeHeightHack:
+        dx = [0, -dv, dv, 0, 0, 0, 0, 0, 0][action]
+        dy = [0, 0, 0, -dv, dv, 0, 0, 0, 0][action]
+        dz = [0, 0, 0, 0, 0, -dv, dv, 0, 0][action]
+        da = [0, 0, 0, 0, 0, 0, 0, -0.25, 0.25][action]
+      else:
+        dx = [0, -dv, dv, 0, 0, 0, 0][action]
+        dy = [0, 0, 0, -dv, dv, 0, 0][action]
+        dz = -dv
+        da = [0, 0, 0, 0, 0, -0.25, 0.25][action]
+    else:
+      dx = dv * action[0]
+      dy = dv * action[1]
+      if self._removeHeightHack:
+        dz = dv * action[2]
+        da = 0.25 * action[3]
+      else:
+        dz = -dv
+        da = 0.25 * action[2]
+
+    return self._step_continuous([dx, dy, dz, da, 0.3])
+
+  def _step_continuous(self, action):
+    """Applies a continuous velocity-control action.
+
+    Args:
+      action: 5-vector parameterizing XYZ offset, vertical angle offset
+      (radians), and grasp angle (radians).
+    Returns:
+      observation: Next observation.
+      reward: Float of the per-step reward as a result of taking the action.
+      done: Bool of whether or not the episode has ended.
+      debug: Dictionary of extra information provided by environment.
+    """
+    # Perform commanded action.
+    self._env_step += 1
+    self._kuka.applyAction(action)
+    for _ in range(self._actionRepeat):
+      p.stepSimulation()
+      if self._renders:
+        time.sleep(self._timeStep)
+      if self._termination():
+        break
+
+    # If we are close to the bin, attempt grasp.
+    state = p.getLinkState(self._kuka.kukaUid,
+                                  self._kuka.kukaEndEffectorIndex)
+    end_effector_pos = state[0]
+    if end_effector_pos[2] <= 0.1:
+      finger_angle = 0.3
+      for _ in range(500):
+        grasp_action = [0, 0, 0, 0, finger_angle]
+        self._kuka.applyAction(grasp_action)
+        p.stepSimulation()
+        finger_angle -= 0.3/100.
+        if finger_angle < 0:
+          finger_angle = 0
+      for _ in range(500):
+        grasp_action = [0, 0, 0.001, 0, finger_angle]
+        self._kuka.applyAction(grasp_action)
+        p.stepSimulation()
+        finger_angle -= 0.3/100.
+        if finger_angle < 0:
+          finger_angle = 0
+      self._attempted_grasp = True
+    observation = self._get_observation()
+    done = self._termination()
+    reward = self._reward()
+
+    debug = {
+        'grasp_success': self._graspSuccess
+    }
+    return observation, reward, done, debug
+
+  def _reward(self):
+    """Calculates the reward for the episode.
+
+    The reward is 1 if one of the objects is above height .2 at the end of the
+    episode.
+    """
+    reward = 0
+    self._graspSuccess = 0
+    for uid in self._objectUids:
+      pos, _ = p.getBasePositionAndOrientation(
+        uid)
+      # If any block is above height, provide reward.
+      if pos[2] > 0.2:
+        self._graspSuccess += 1
+        reward = 1
+        break
+    return reward
+
+  def _termination(self):
+    """Terminates the episode if we have tried to grasp or if we are above
+    maxSteps steps.
+    """
+    return self._attempted_grasp or self._env_step >= self._maxSteps
+
+  def _get_random_object(self, num_objects, test):
+    """Randomly choose an object urdf from the random_urdfs directory.
+
+    Args:
+      num_objects:
+        Number of graspable objects.
+
+    Returns:
+      A list of urdf filenames.
+    """
+    if test:
+      urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*0/*.urdf')
+    else:
+      urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*[^0]/*.urdf')
+    found_object_directories = glob.glob(urdf_pattern)
+    total_num_objects = len(found_object_directories)
+    selected_objects = np.random.choice(np.arange(total_num_objects),
+                                        num_objects)
+    selected_objects_filenames = []
+    for object_index in selected_objects:
+      selected_objects_filenames += [found_object_directories[object_index]]
+    return selected_objects_filenames
\ No newline at end of file