add continuous versions of kukaGymEnv, kukaCamGymEnv, racecarZEDGymEnv etc.

should be trainable with PPO or evolution strategies (ES) now
This commit is contained in:
Erwin Coumans
2017-10-31 15:50:34 -07:00
parent 32312e60a8
commit 55f5e52ecd
8 changed files with 172 additions and 96 deletions

View File

@@ -14,7 +14,7 @@ import pybullet as p
from . import kuka
import random
import pybullet_data
maxSteps = 1000
class KukaCamGymEnv(gym.Env):
metadata = {
@@ -24,10 +24,10 @@ class KukaCamGymEnv(gym.Env):
def __init__(self,
urdfRoot=pybullet_data.getDataPath(),
actionRepeat=1,
actionRepeat=25,
isEnableSelfCollision=True,
renders=True):
print("init")
renders=True,
isDiscrete=False):
self._timeStep = 1./240.
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
@@ -37,6 +37,7 @@ class KukaCamGymEnv(gym.Env):
self._renders = renders
self._width = 341
self._height = 256
self._isDiscrete=isDiscrete
self.terminated = 0
self._p = p
if self._renders:
@@ -54,12 +55,17 @@ class KukaCamGymEnv(gym.Env):
#print(observationDim)
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
self.action_space = spaces.Discrete(7)
if (self._isDiscrete):
self.action_space = spaces.Discrete(7)
else:
action_dim = 3
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high)
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
self.viewer = None
def _reset(self):
print("reset")
self.terminated = 0
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -68,8 +74,8 @@ class KukaCamGymEnv(gym.Env):
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
xpos = 0.5 +0.05*random.random()
ypos = 0 +0.05*random.random()
xpos = 0.5 +0.2*random.random()
ypos = 0 +0.25*random.random()
ang = 3.1415925438*random.random()
orn = p.getQuaternionFromEuler([0,0,ang])
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
@@ -117,24 +123,36 @@ class KukaCamGymEnv(gym.Env):
return self._observation
def _step(self, action):
dv = 0.01
dx = [0,-dv,dv,0,0,0,0][action]
dy = [0,0,0,-dv,dv,0,0][action]
da = [0,0,0,0,0,-0.1,0.1][action]
f = 0.3
realAction = [dx,dy,-0.002,da,f]
if (self._isDiscrete):
dv = 0.01
dx = [0,-dv,dv,0,0,0,0][action]
dy = [0,0,0,-dv,dv,0,0][action]
da = [0,0,0,0,0,-0.1,0.1][action]
f = 0.3
realAction = [dx,dy,-0.002,da,f]
else:
dv = 0.01
dx = action[0] * dv
dy = action[1] * dv
da = action[2] * 0.1
f = 0.3
realAction = [dx,dy,-0.002,da,f]
return self.step2( realAction)
def step2(self, action):
self._kuka.applyAction(action)
for i in range(self._actionRepeat):
self._kuka.applyAction(action)
p.stepSimulation()
if self._renders:
time.sleep(self._timeStep)
self._observation = self.getExtendedObservation()
if self._termination():
break
#self._observation = self.getExtendedObservation()
self._envStepCounter += 1
self._observation = self.getExtendedObservation()
if self._renders:
time.sleep(self._timeStep)
#print("self._envStepCounter")
#print(self._envStepCounter)
@@ -154,25 +172,41 @@ class KukaCamGymEnv(gym.Env):
#print("self._envStepCounter")
#print(self._envStepCounter)
if (self.terminated or self._envStepCounter>1000):
if (self.terminated or self._envStepCounter>maxSteps):
self._observation = self.getExtendedObservation()
return True
if (actualEndEffectorPos[2] <= 0.10):
maxDist = 0.005
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
self.terminated = 1
#print("closing gripper, attempting grasp")
#start grasp and terminate
fingerAngle = 0.3
for i in range (1000):
graspAction = [0,0,0.001,0,fingerAngle]
for i in range (100):
graspAction = [0,0,0.0001,0,fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
fingerAngle = fingerAngle-(0.3/100.)
if (fingerAngle<0):
fingerAngle=0
for i in range (1000):
graspAction = [0,0,0.001,0,fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
if (blockPos[2] > 0.23):
#print("BLOCKPOS!")
#print(blockPos[2])
break
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
if (actualEndEffectorPos[2]>0.5):
break
self._observation = self.getExtendedObservation()
return True
return False
@@ -181,7 +215,7 @@ class KukaCamGymEnv(gym.Env):
#rewards is height of target object
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000)
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
reward = -1000
numPt = len(closestPoints)
@@ -189,13 +223,13 @@ class KukaCamGymEnv(gym.Env):
if (numPt>0):
#print("reward:")
reward = -closestPoints[0][8]*10
if (blockPos[2] >0.2):
print("grasped a block!!!")
print("self._envStepCounter")
print(self._envStepCounter)
#print("grasped a block!!!")
#print("self._envStepCounter")
#print(self._envStepCounter)
reward = reward+1000
#print("reward")
#print(reward)
return reward