add continuous versions of kukaGymEnv, kukaCamGymEnv, racecarZEDGymEnv etc.
should be trainable with PPO or evolution strategies (ES) now
This commit is contained in:
@@ -14,6 +14,8 @@ from . import kuka
|
||||
import random
|
||||
import pybullet_data
|
||||
|
||||
maxSteps = 1000
|
||||
|
||||
|
||||
class KukaGymEnv(gym.Env):
|
||||
metadata = {
|
||||
@@ -25,8 +27,9 @@ class KukaGymEnv(gym.Env):
|
||||
urdfRoot=pybullet_data.getDataPath(),
|
||||
actionRepeat=1,
|
||||
isEnableSelfCollision=True,
|
||||
renders=True):
|
||||
print("init")
|
||||
renders=True,
|
||||
isDiscrete=False):
|
||||
self._isDiscrete = isDiscrete
|
||||
self._timeStep = 1./240.
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
@@ -51,12 +54,17 @@ class KukaGymEnv(gym.Env):
|
||||
#print(observationDim)
|
||||
|
||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||
self.action_space = spaces.Discrete(7)
|
||||
if (self._isDiscrete):
|
||||
self.action_space = spaces.Discrete(7)
|
||||
else:
|
||||
action_dim = 3
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high)
|
||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||
self.viewer = None
|
||||
|
||||
def _reset(self):
|
||||
print("reset")
|
||||
self.terminated = 0
|
||||
p.resetSimulation()
|
||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||
@@ -65,8 +73,8 @@ class KukaGymEnv(gym.Env):
|
||||
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||
|
||||
xpos = 0.5 +0.05*random.random()
|
||||
ypos = 0 +0.05*random.random()
|
||||
xpos = 0.5 +0.2*random.random()
|
||||
ypos = 0 +0.25*random.random()
|
||||
ang = 3.1415925438*random.random()
|
||||
orn = p.getQuaternionFromEuler([0,0,ang])
|
||||
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
||||
@@ -101,12 +109,20 @@ class KukaGymEnv(gym.Env):
|
||||
return self._observation
|
||||
|
||||
def _step(self, action):
|
||||
dv = 0.01
|
||||
dx = [0,-dv,dv,0,0,0,0][action]
|
||||
dy = [0,0,0,-dv,dv,0,0][action]
|
||||
da = [0,0,0,0,0,-0.1,0.1][action]
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
if (self._isDiscrete):
|
||||
dv = 0.01
|
||||
dx = [0,-dv,dv,0,0,0,0][action]
|
||||
dy = [0,0,0,-dv,dv,0,0][action]
|
||||
da = [0,0,0,0,0,-0.1,0.1][action]
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
else:
|
||||
dv = 0.01
|
||||
dx = action[0] * dv
|
||||
dy = action[1] * dv
|
||||
da = action[2] * 0.1
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
return self.step2( realAction)
|
||||
|
||||
def step2(self, action):
|
||||
@@ -138,25 +154,41 @@ class KukaGymEnv(gym.Env):
|
||||
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
if (self.terminated or self._envStepCounter>1000):
|
||||
if (self.terminated or self._envStepCounter>maxSteps):
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
|
||||
if (actualEndEffectorPos[2] <= 0.10):
|
||||
maxDist = 0.005
|
||||
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
|
||||
|
||||
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
|
||||
self.terminated = 1
|
||||
|
||||
#print("closing gripper, attempting grasp")
|
||||
#start grasp and terminate
|
||||
fingerAngle = 0.3
|
||||
|
||||
for i in range (1000):
|
||||
graspAction = [0,0,0.001,0,fingerAngle]
|
||||
for i in range (100):
|
||||
graspAction = [0,0,0.0001,0,fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
fingerAngle = fingerAngle-(0.3/100.)
|
||||
if (fingerAngle<0):
|
||||
fingerAngle=0
|
||||
|
||||
|
||||
for i in range (1000):
|
||||
graspAction = [0,0,0.001,0,fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
if (blockPos[2] > 0.23):
|
||||
#print("BLOCKPOS!")
|
||||
#print(blockPos[2])
|
||||
break
|
||||
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
if (actualEndEffectorPos[2]>0.5):
|
||||
break
|
||||
|
||||
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
return False
|
||||
@@ -165,7 +197,7 @@ class KukaGymEnv(gym.Env):
|
||||
|
||||
#rewards is height of target object
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000)
|
||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
|
||||
|
||||
reward = -1000
|
||||
numPt = len(closestPoints)
|
||||
@@ -173,11 +205,10 @@ class KukaGymEnv(gym.Env):
|
||||
if (numPt>0):
|
||||
#print("reward:")
|
||||
reward = -closestPoints[0][8]*10
|
||||
|
||||
if (blockPos[2] >0.2):
|
||||
print("grasped a block!!!")
|
||||
print("self._envStepCounter")
|
||||
print(self._envStepCounter)
|
||||
#print("grasped a block!!!")
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
reward = reward+1000
|
||||
|
||||
#print("reward")
|
||||
|
||||
Reference in New Issue
Block a user