Merge remote-tracking branch 'bp/master'
This commit is contained in:
@@ -0,0 +1,61 @@
|
|||||||
|
"""Runs a random policy for the random object KukaDiverseObjectEnv.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, inspect
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
|
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||||
|
os.sys.path.insert(0,parentdir)
|
||||||
|
|
||||||
|
import gym
|
||||||
|
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
from baselines import deepq
|
||||||
|
|
||||||
|
|
||||||
|
class ContinuousDownwardBiasPolicy(object):
|
||||||
|
"""Policy which takes continuous actions, and is biased to move down.
|
||||||
|
"""
|
||||||
|
def __init__(self, height_hack_prob=0.9):
|
||||||
|
"""Initializes the DownwardBiasPolicy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
height_hack_prob: The probability of moving down at every move.
|
||||||
|
"""
|
||||||
|
self._height_hack_prob = height_hack_prob
|
||||||
|
self._action_space = spaces.Box(low=-1, high=1, shape=(5,))
|
||||||
|
|
||||||
|
def sample_action(self, obs, explore_prob):
|
||||||
|
"""Implements height hack and grasping threshold hack.
|
||||||
|
"""
|
||||||
|
dx, dy, dz, da, close = self._action_space.sample()
|
||||||
|
if np.random.random() < self._height_hack_prob:
|
||||||
|
dz = -1
|
||||||
|
return [dx, dy, dz, da, 0]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
|
||||||
|
policy = ContinuousDownwardBiasPolicy()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
obs, done = env.reset(), False
|
||||||
|
print("===================================")
|
||||||
|
print("obs")
|
||||||
|
print(obs)
|
||||||
|
episode_rew = 0
|
||||||
|
while not done:
|
||||||
|
env.render()
|
||||||
|
act = policy.sample_action(obs, .1)
|
||||||
|
print("Action")
|
||||||
|
print(act)
|
||||||
|
obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
|
||||||
|
episode_rew += rew
|
||||||
|
print("Episode reward", episode_rew)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -0,0 +1,322 @@
|
|||||||
|
from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
|
||||||
|
import random
|
||||||
|
import os
|
||||||
|
from gym import spaces
|
||||||
|
import time
|
||||||
|
import pybullet as p
|
||||||
|
from . import kuka
|
||||||
|
import numpy as np
|
||||||
|
import pybullet_data
|
||||||
|
import pdb
|
||||||
|
import distutils.dir_util
|
||||||
|
import glob
|
||||||
|
|
||||||
|
|
||||||
|
class KukaDiverseObjectEnv(KukaGymEnv):
|
||||||
|
"""Class for Kuka environment with diverse objects.
|
||||||
|
|
||||||
|
In each episode some objects are chosen from a set of 1000 diverse objects.
|
||||||
|
These 1000 objects are split 90/10 into a train and test set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
urdfRoot=pybullet_data.getDataPath(),
|
||||||
|
actionRepeat=50,
|
||||||
|
isEnableSelfCollision=True,
|
||||||
|
renders=False,
|
||||||
|
isDiscrete=False,
|
||||||
|
maxSteps=8,
|
||||||
|
dv=0.06,
|
||||||
|
removeHeightHack=False,
|
||||||
|
blockRandom=0.3,
|
||||||
|
cameraRandom=0,
|
||||||
|
width=48,
|
||||||
|
height=48,
|
||||||
|
numObjects=5,
|
||||||
|
isTest=False):
|
||||||
|
"""Initializes the KukaDiverseObjectEnv.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urdfRoot: The diretory from which to load environment URDF's.
|
||||||
|
actionRepeat: The number of simulation steps to apply for each action.
|
||||||
|
isEnableSelfCollision: If true, enable self-collision.
|
||||||
|
renders: If true, render the bullet GUI.
|
||||||
|
isDiscrete: If true, the action space is discrete. If False, the
|
||||||
|
action space is continuous.
|
||||||
|
maxSteps: The maximum number of actions per episode.
|
||||||
|
dv: The velocity along each dimension for each action.
|
||||||
|
removeHeightHack: If false, there is a "height hack" where the gripper
|
||||||
|
automatically moves down for each action. If true, the environment is
|
||||||
|
harder and the policy chooses the height displacement.
|
||||||
|
blockRandom: A float between 0 and 1 indicated block randomness. 0 is
|
||||||
|
deterministic.
|
||||||
|
cameraRandom: A float between 0 and 1 indicating camera placement
|
||||||
|
randomness. 0 is deterministic.
|
||||||
|
width: The image width.
|
||||||
|
height: The observation image height.
|
||||||
|
numObjects: The number of objects in the bin.
|
||||||
|
isTest: If true, use the test set of objects. If false, use the train
|
||||||
|
set of objects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self._isDiscrete = isDiscrete
|
||||||
|
self._timeStep = 1./240.
|
||||||
|
self._urdfRoot = urdfRoot
|
||||||
|
self._actionRepeat = actionRepeat
|
||||||
|
self._isEnableSelfCollision = isEnableSelfCollision
|
||||||
|
self._observation = []
|
||||||
|
self._envStepCounter = 0
|
||||||
|
self._renders = renders
|
||||||
|
self._maxSteps = maxSteps
|
||||||
|
self.terminated = 0
|
||||||
|
self._cam_dist = 1.3
|
||||||
|
self._cam_yaw = 180
|
||||||
|
self._cam_pitch = -40
|
||||||
|
self._dv = dv
|
||||||
|
self._p = p
|
||||||
|
self._removeHeightHack = removeHeightHack
|
||||||
|
self._blockRandom = blockRandom
|
||||||
|
self._cameraRandom = cameraRandom
|
||||||
|
self._width = width
|
||||||
|
self._height = height
|
||||||
|
self._numObjects = numObjects
|
||||||
|
self._isTest = isTest
|
||||||
|
|
||||||
|
if self._renders:
|
||||||
|
self.cid = p.connect(p.SHARED_MEMORY)
|
||||||
|
if (self.cid<0):
|
||||||
|
self.cid = p.connect(p.GUI)
|
||||||
|
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
|
||||||
|
else:
|
||||||
|
self.cid = p.connect(p.DIRECT)
|
||||||
|
self._seed()
|
||||||
|
|
||||||
|
if (self._isDiscrete):
|
||||||
|
if self._removeHeightHack:
|
||||||
|
self.action_space = spaces.Discrete(9)
|
||||||
|
else:
|
||||||
|
self.action_space = spaces.Discrete(7)
|
||||||
|
else:
|
||||||
|
self.action_space = spaces.Box(low=-1, high=1, shape=(3,)) # dx, dy, da
|
||||||
|
if self._removeHeightHack:
|
||||||
|
self.action_space = spaces.Box(low=-1,
|
||||||
|
high=1,
|
||||||
|
shape=(4,)) # dx, dy, dz, da
|
||||||
|
self.viewer = None
|
||||||
|
|
||||||
|
def _reset(self):
|
||||||
|
"""Environment reset called at the beginning of an episode.
|
||||||
|
"""
|
||||||
|
# Set the camera settings.
|
||||||
|
look = [0.23, 0.2, 0.54]
|
||||||
|
distance = 1.
|
||||||
|
pitch = -56 + self._cameraRandom*np.random.uniform(-3, 3)
|
||||||
|
yaw = 245 + self._cameraRandom*np.random.uniform(-3, 3)
|
||||||
|
roll = 0
|
||||||
|
self._view_matrix = p.computeViewMatrixFromYawPitchRoll(
|
||||||
|
look, distance, yaw, pitch, roll, 2)
|
||||||
|
fov = 20. + self._cameraRandom*np.random.uniform(-2, 2)
|
||||||
|
aspect = self._width / self._height
|
||||||
|
near = 0.1
|
||||||
|
far = 10
|
||||||
|
self._proj_matrix = p.computeProjectionMatrixFOV(
|
||||||
|
fov, aspect, near, far)
|
||||||
|
|
||||||
|
self._attempted_grasp = False
|
||||||
|
self._env_step = 0
|
||||||
|
self.terminated = 0
|
||||||
|
|
||||||
|
p.resetSimulation()
|
||||||
|
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||||
|
p.setTimeStep(self._timeStep)
|
||||||
|
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
|
||||||
|
|
||||||
|
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||||
|
|
||||||
|
p.setGravity(0,0,-10)
|
||||||
|
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||||
|
self._envStepCounter = 0
|
||||||
|
p.stepSimulation()
|
||||||
|
|
||||||
|
# Choose the objects in the bin.
|
||||||
|
urdfList = self._get_random_object(
|
||||||
|
self._numObjects, self._isTest)
|
||||||
|
self._objectUids = self._randomly_place_objects(urdfList)
|
||||||
|
self._observation = self._get_observation()
|
||||||
|
return np.array(self._observation)
|
||||||
|
|
||||||
|
def _randomly_place_objects(self, urdfList):
|
||||||
|
"""Randomly places the objects in the bin.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urdfList: The list of urdf files to place in the bin.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The list of object unique ID's.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Randomize positions of each object urdf.
|
||||||
|
objectUids = []
|
||||||
|
for urdf_name in urdfList:
|
||||||
|
xpos = 0.4 +self._blockRandom*random.random()
|
||||||
|
ypos = self._blockRandom*(random.random()-.5)
|
||||||
|
angle = np.pi/2 + self._blockRandom * np.pi * random.random()
|
||||||
|
orn = p.getQuaternionFromEuler([0, 0, angle])
|
||||||
|
urdf_path = os.path.join(self._urdfRoot, urdf_name)
|
||||||
|
uid = p.loadURDF(urdf_path, [xpos, ypos, .15],
|
||||||
|
[orn[0], orn[1], orn[2], orn[3]])
|
||||||
|
objectUids.append(uid)
|
||||||
|
# Let each object fall to the tray individual, to prevent object
|
||||||
|
# intersection.
|
||||||
|
for _ in range(500):
|
||||||
|
p.stepSimulation()
|
||||||
|
return objectUids
|
||||||
|
|
||||||
|
def _get_observation(self):
|
||||||
|
"""Return the observation as an image.
|
||||||
|
"""
|
||||||
|
img_arr = p.getCameraImage(width=self._width,
|
||||||
|
height=self._height,
|
||||||
|
viewMatrix=self._view_matrix,
|
||||||
|
projectionMatrix=self._proj_matrix)
|
||||||
|
rgb = img_arr[2]
|
||||||
|
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||||
|
return np_img_arr[:, :, :3]
|
||||||
|
|
||||||
|
def _step(self, action):
|
||||||
|
"""Environment step.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: 5-vector parameterizing XYZ offset, vertical angle offset
|
||||||
|
(radians), and grasp angle (radians).
|
||||||
|
Returns:
|
||||||
|
observation: Next observation.
|
||||||
|
reward: Float of the per-step reward as a result of taking the action.
|
||||||
|
done: Bool of whether or not the episode has ended.
|
||||||
|
debug: Dictionary of extra information provided by environment.
|
||||||
|
"""
|
||||||
|
dv = self._dv # velocity per physics step.
|
||||||
|
if self._isDiscrete:
|
||||||
|
# Static type assertion for integers.
|
||||||
|
assert isinstance(action, int)
|
||||||
|
if self._removeHeightHack:
|
||||||
|
dx = [0, -dv, dv, 0, 0, 0, 0, 0, 0][action]
|
||||||
|
dy = [0, 0, 0, -dv, dv, 0, 0, 0, 0][action]
|
||||||
|
dz = [0, 0, 0, 0, 0, -dv, dv, 0, 0][action]
|
||||||
|
da = [0, 0, 0, 0, 0, 0, 0, -0.25, 0.25][action]
|
||||||
|
else:
|
||||||
|
dx = [0, -dv, dv, 0, 0, 0, 0][action]
|
||||||
|
dy = [0, 0, 0, -dv, dv, 0, 0][action]
|
||||||
|
dz = -dv
|
||||||
|
da = [0, 0, 0, 0, 0, -0.25, 0.25][action]
|
||||||
|
else:
|
||||||
|
dx = dv * action[0]
|
||||||
|
dy = dv * action[1]
|
||||||
|
if self._removeHeightHack:
|
||||||
|
dz = dv * action[2]
|
||||||
|
da = 0.25 * action[3]
|
||||||
|
else:
|
||||||
|
dz = -dv
|
||||||
|
da = 0.25 * action[2]
|
||||||
|
|
||||||
|
return self._step_continuous([dx, dy, dz, da, 0.3])
|
||||||
|
|
||||||
|
def _step_continuous(self, action):
|
||||||
|
"""Applies a continuous velocity-control action.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: 5-vector parameterizing XYZ offset, vertical angle offset
|
||||||
|
(radians), and grasp angle (radians).
|
||||||
|
Returns:
|
||||||
|
observation: Next observation.
|
||||||
|
reward: Float of the per-step reward as a result of taking the action.
|
||||||
|
done: Bool of whether or not the episode has ended.
|
||||||
|
debug: Dictionary of extra information provided by environment.
|
||||||
|
"""
|
||||||
|
# Perform commanded action.
|
||||||
|
self._env_step += 1
|
||||||
|
self._kuka.applyAction(action)
|
||||||
|
for _ in range(self._actionRepeat):
|
||||||
|
p.stepSimulation()
|
||||||
|
if self._renders:
|
||||||
|
time.sleep(self._timeStep)
|
||||||
|
if self._termination():
|
||||||
|
break
|
||||||
|
|
||||||
|
# If we are close to the bin, attempt grasp.
|
||||||
|
state = p.getLinkState(self._kuka.kukaUid,
|
||||||
|
self._kuka.kukaEndEffectorIndex)
|
||||||
|
end_effector_pos = state[0]
|
||||||
|
if end_effector_pos[2] <= 0.1:
|
||||||
|
finger_angle = 0.3
|
||||||
|
for _ in range(500):
|
||||||
|
grasp_action = [0, 0, 0, 0, finger_angle]
|
||||||
|
self._kuka.applyAction(grasp_action)
|
||||||
|
p.stepSimulation()
|
||||||
|
finger_angle -= 0.3/100.
|
||||||
|
if finger_angle < 0:
|
||||||
|
finger_angle = 0
|
||||||
|
for _ in range(500):
|
||||||
|
grasp_action = [0, 0, 0.001, 0, finger_angle]
|
||||||
|
self._kuka.applyAction(grasp_action)
|
||||||
|
p.stepSimulation()
|
||||||
|
finger_angle -= 0.3/100.
|
||||||
|
if finger_angle < 0:
|
||||||
|
finger_angle = 0
|
||||||
|
self._attempted_grasp = True
|
||||||
|
observation = self._get_observation()
|
||||||
|
done = self._termination()
|
||||||
|
reward = self._reward()
|
||||||
|
|
||||||
|
debug = {
|
||||||
|
'grasp_success': self._graspSuccess
|
||||||
|
}
|
||||||
|
return observation, reward, done, debug
|
||||||
|
|
||||||
|
def _reward(self):
|
||||||
|
"""Calculates the reward for the episode.
|
||||||
|
|
||||||
|
The reward is 1 if one of the objects is above height .2 at the end of the
|
||||||
|
episode.
|
||||||
|
"""
|
||||||
|
reward = 0
|
||||||
|
self._graspSuccess = 0
|
||||||
|
for uid in self._objectUids:
|
||||||
|
pos, _ = p.getBasePositionAndOrientation(
|
||||||
|
uid)
|
||||||
|
# If any block is above height, provide reward.
|
||||||
|
if pos[2] > 0.2:
|
||||||
|
self._graspSuccess += 1
|
||||||
|
reward = 1
|
||||||
|
break
|
||||||
|
return reward
|
||||||
|
|
||||||
|
def _termination(self):
|
||||||
|
"""Terminates the episode if we have tried to grasp or if we are above
|
||||||
|
maxSteps steps.
|
||||||
|
"""
|
||||||
|
return self._attempted_grasp or self._env_step >= self._maxSteps
|
||||||
|
|
||||||
|
def _get_random_object(self, num_objects, test):
|
||||||
|
"""Randomly choose an object urdf from the random_urdfs directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_objects:
|
||||||
|
Number of graspable objects.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of urdf filenames.
|
||||||
|
"""
|
||||||
|
if test:
|
||||||
|
urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*0/*.urdf')
|
||||||
|
else:
|
||||||
|
urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*[^0]/*.urdf')
|
||||||
|
found_object_directories = glob.glob(urdf_pattern)
|
||||||
|
total_num_objects = len(found_object_directories)
|
||||||
|
selected_objects = np.random.choice(np.arange(total_num_objects),
|
||||||
|
num_objects)
|
||||||
|
selected_objects_filenames = []
|
||||||
|
for object_index in selected_objects:
|
||||||
|
selected_objects_filenames += [found_object_directories[object_index]]
|
||||||
|
return selected_objects_filenames
|
||||||
Reference in New Issue
Block a user