add continuous versions of kukaGymEnv, kukaCamGymEnv, racecarZEDGymEnv etc.
should be trainable with PPO or evolution strategies (ES) now
This commit is contained in:
@@ -17,12 +17,12 @@ class Kuka:
|
|||||||
self.timeStep = timeStep
|
self.timeStep = timeStep
|
||||||
|
|
||||||
self.maxForce = 200.
|
self.maxForce = 200.
|
||||||
self.fingerAForce = 6
|
self.fingerAForce = 2
|
||||||
self.fingerBForce = 5.5
|
self.fingerBForce = 2.5
|
||||||
self.fingerTipForce = 6
|
self.fingerTipForce = 2
|
||||||
self.useInverseKinematics = 1
|
self.useInverseKinematics = 1
|
||||||
self.useSimulation = 1
|
self.useSimulation = 1
|
||||||
self.useNullSpace = 1
|
self.useNullSpace =21
|
||||||
self.useOrientation = 1
|
self.useOrientation = 1
|
||||||
self.kukaEndEffectorIndex = 6
|
self.kukaEndEffectorIndex = 6
|
||||||
#lower limits for null space
|
#lower limits for null space
|
||||||
@@ -120,10 +120,8 @@ class Kuka:
|
|||||||
#print (self.endEffectorPos[2])
|
#print (self.endEffectorPos[2])
|
||||||
#print("actualEndEffectorPos[2]")
|
#print("actualEndEffectorPos[2]")
|
||||||
#print(actualEndEffectorPos[2])
|
#print(actualEndEffectorPos[2])
|
||||||
if (dz>0 or actualEndEffectorPos[2]>0.10):
|
#if (dz<0 or actualEndEffectorPos[2]<0.5):
|
||||||
self.endEffectorPos[2] = self.endEffectorPos[2]+dz
|
self.endEffectorPos[2] = self.endEffectorPos[2]+dz
|
||||||
if (actualEndEffectorPos[2]<0.10):
|
|
||||||
self.endEffectorPos[2] = self.endEffectorPos[2]+0.0001
|
|
||||||
|
|
||||||
|
|
||||||
self.endEffectorAngle = self.endEffectorAngle + da
|
self.endEffectorAngle = self.endEffectorAngle + da
|
||||||
@@ -147,7 +145,7 @@ class Kuka:
|
|||||||
if (self.useSimulation):
|
if (self.useSimulation):
|
||||||
for i in range (self.kukaEndEffectorIndex+1):
|
for i in range (self.kukaEndEffectorIndex+1):
|
||||||
#print(i)
|
#print(i)
|
||||||
p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.03,velocityGain=1)
|
p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.3,velocityGain=1)
|
||||||
else:
|
else:
|
||||||
#reset the joint state (ignoring all dynamics, not recommended to use during simulation)
|
#reset the joint state (ignoring all dynamics, not recommended to use during simulation)
|
||||||
for i in range (self.numJoints):
|
for i in range (self.numJoints):
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import pybullet as p
|
|||||||
from . import kuka
|
from . import kuka
|
||||||
import random
|
import random
|
||||||
import pybullet_data
|
import pybullet_data
|
||||||
|
maxSteps = 1000
|
||||||
|
|
||||||
class KukaCamGymEnv(gym.Env):
|
class KukaCamGymEnv(gym.Env):
|
||||||
metadata = {
|
metadata = {
|
||||||
@@ -24,10 +24,10 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
urdfRoot=pybullet_data.getDataPath(),
|
urdfRoot=pybullet_data.getDataPath(),
|
||||||
actionRepeat=1,
|
actionRepeat=25,
|
||||||
isEnableSelfCollision=True,
|
isEnableSelfCollision=True,
|
||||||
renders=True):
|
renders=True,
|
||||||
print("init")
|
isDiscrete=False):
|
||||||
self._timeStep = 1./240.
|
self._timeStep = 1./240.
|
||||||
self._urdfRoot = urdfRoot
|
self._urdfRoot = urdfRoot
|
||||||
self._actionRepeat = actionRepeat
|
self._actionRepeat = actionRepeat
|
||||||
@@ -37,6 +37,7 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
self._renders = renders
|
self._renders = renders
|
||||||
self._width = 341
|
self._width = 341
|
||||||
self._height = 256
|
self._height = 256
|
||||||
|
self._isDiscrete=isDiscrete
|
||||||
self.terminated = 0
|
self.terminated = 0
|
||||||
self._p = p
|
self._p = p
|
||||||
if self._renders:
|
if self._renders:
|
||||||
@@ -54,12 +55,17 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
#print(observationDim)
|
#print(observationDim)
|
||||||
|
|
||||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||||
self.action_space = spaces.Discrete(7)
|
if (self._isDiscrete):
|
||||||
|
self.action_space = spaces.Discrete(7)
|
||||||
|
else:
|
||||||
|
action_dim = 3
|
||||||
|
self._action_bound = 1
|
||||||
|
action_high = np.array([self._action_bound] * action_dim)
|
||||||
|
self.action_space = spaces.Box(-action_high, action_high)
|
||||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
|
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
print("reset")
|
|
||||||
self.terminated = 0
|
self.terminated = 0
|
||||||
p.resetSimulation()
|
p.resetSimulation()
|
||||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||||
@@ -68,8 +74,8 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
|
|
||||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||||
|
|
||||||
xpos = 0.5 +0.05*random.random()
|
xpos = 0.5 +0.2*random.random()
|
||||||
ypos = 0 +0.05*random.random()
|
ypos = 0 +0.25*random.random()
|
||||||
ang = 3.1415925438*random.random()
|
ang = 3.1415925438*random.random()
|
||||||
orn = p.getQuaternionFromEuler([0,0,ang])
|
orn = p.getQuaternionFromEuler([0,0,ang])
|
||||||
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
||||||
@@ -117,24 +123,36 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
return self._observation
|
return self._observation
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
dv = 0.01
|
if (self._isDiscrete):
|
||||||
dx = [0,-dv,dv,0,0,0,0][action]
|
dv = 0.01
|
||||||
dy = [0,0,0,-dv,dv,0,0][action]
|
dx = [0,-dv,dv,0,0,0,0][action]
|
||||||
da = [0,0,0,0,0,-0.1,0.1][action]
|
dy = [0,0,0,-dv,dv,0,0][action]
|
||||||
f = 0.3
|
da = [0,0,0,0,0,-0.1,0.1][action]
|
||||||
realAction = [dx,dy,-0.002,da,f]
|
f = 0.3
|
||||||
|
realAction = [dx,dy,-0.002,da,f]
|
||||||
|
else:
|
||||||
|
dv = 0.01
|
||||||
|
dx = action[0] * dv
|
||||||
|
dy = action[1] * dv
|
||||||
|
da = action[2] * 0.1
|
||||||
|
f = 0.3
|
||||||
|
realAction = [dx,dy,-0.002,da,f]
|
||||||
|
|
||||||
return self.step2( realAction)
|
return self.step2( realAction)
|
||||||
|
|
||||||
def step2(self, action):
|
def step2(self, action):
|
||||||
self._kuka.applyAction(action)
|
|
||||||
for i in range(self._actionRepeat):
|
for i in range(self._actionRepeat):
|
||||||
|
self._kuka.applyAction(action)
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
if self._renders:
|
|
||||||
time.sleep(self._timeStep)
|
|
||||||
self._observation = self.getExtendedObservation()
|
|
||||||
if self._termination():
|
if self._termination():
|
||||||
break
|
break
|
||||||
|
#self._observation = self.getExtendedObservation()
|
||||||
self._envStepCounter += 1
|
self._envStepCounter += 1
|
||||||
|
|
||||||
|
self._observation = self.getExtendedObservation()
|
||||||
|
if self._renders:
|
||||||
|
time.sleep(self._timeStep)
|
||||||
|
|
||||||
#print("self._envStepCounter")
|
#print("self._envStepCounter")
|
||||||
#print(self._envStepCounter)
|
#print(self._envStepCounter)
|
||||||
|
|
||||||
@@ -154,25 +172,41 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
|
|
||||||
#print("self._envStepCounter")
|
#print("self._envStepCounter")
|
||||||
#print(self._envStepCounter)
|
#print(self._envStepCounter)
|
||||||
if (self.terminated or self._envStepCounter>1000):
|
if (self.terminated or self._envStepCounter>maxSteps):
|
||||||
self._observation = self.getExtendedObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
return True
|
return True
|
||||||
|
maxDist = 0.005
|
||||||
if (actualEndEffectorPos[2] <= 0.10):
|
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
|
||||||
|
|
||||||
|
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
|
||||||
self.terminated = 1
|
self.terminated = 1
|
||||||
|
|
||||||
#print("closing gripper, attempting grasp")
|
#print("closing gripper, attempting grasp")
|
||||||
#start grasp and terminate
|
#start grasp and terminate
|
||||||
fingerAngle = 0.3
|
fingerAngle = 0.3
|
||||||
|
for i in range (100):
|
||||||
for i in range (1000):
|
graspAction = [0,0,0.0001,0,fingerAngle]
|
||||||
graspAction = [0,0,0.001,0,fingerAngle]
|
|
||||||
self._kuka.applyAction(graspAction)
|
self._kuka.applyAction(graspAction)
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
fingerAngle = fingerAngle-(0.3/100.)
|
fingerAngle = fingerAngle-(0.3/100.)
|
||||||
if (fingerAngle<0):
|
if (fingerAngle<0):
|
||||||
fingerAngle=0
|
fingerAngle=0
|
||||||
|
|
||||||
|
for i in range (1000):
|
||||||
|
graspAction = [0,0,0.001,0,fingerAngle]
|
||||||
|
self._kuka.applyAction(graspAction)
|
||||||
|
p.stepSimulation()
|
||||||
|
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||||
|
if (blockPos[2] > 0.23):
|
||||||
|
#print("BLOCKPOS!")
|
||||||
|
#print(blockPos[2])
|
||||||
|
break
|
||||||
|
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||||
|
actualEndEffectorPos = state[0]
|
||||||
|
if (actualEndEffectorPos[2]>0.5):
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
self._observation = self.getExtendedObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -181,7 +215,7 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
|
|
||||||
#rewards is height of target object
|
#rewards is height of target object
|
||||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000)
|
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
|
||||||
|
|
||||||
reward = -1000
|
reward = -1000
|
||||||
numPt = len(closestPoints)
|
numPt = len(closestPoints)
|
||||||
@@ -189,13 +223,13 @@ class KukaCamGymEnv(gym.Env):
|
|||||||
if (numPt>0):
|
if (numPt>0):
|
||||||
#print("reward:")
|
#print("reward:")
|
||||||
reward = -closestPoints[0][8]*10
|
reward = -closestPoints[0][8]*10
|
||||||
|
|
||||||
if (blockPos[2] >0.2):
|
if (blockPos[2] >0.2):
|
||||||
print("grasped a block!!!")
|
#print("grasped a block!!!")
|
||||||
print("self._envStepCounter")
|
#print("self._envStepCounter")
|
||||||
print(self._envStepCounter)
|
#print(self._envStepCounter)
|
||||||
reward = reward+1000
|
reward = reward+1000
|
||||||
|
|
||||||
#print("reward")
|
#print("reward")
|
||||||
#print(reward)
|
#print(reward)
|
||||||
return reward
|
return reward
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ from . import kuka
|
|||||||
import random
|
import random
|
||||||
import pybullet_data
|
import pybullet_data
|
||||||
|
|
||||||
|
maxSteps = 1000
|
||||||
|
|
||||||
|
|
||||||
class KukaGymEnv(gym.Env):
|
class KukaGymEnv(gym.Env):
|
||||||
metadata = {
|
metadata = {
|
||||||
@@ -25,8 +27,9 @@ class KukaGymEnv(gym.Env):
|
|||||||
urdfRoot=pybullet_data.getDataPath(),
|
urdfRoot=pybullet_data.getDataPath(),
|
||||||
actionRepeat=1,
|
actionRepeat=1,
|
||||||
isEnableSelfCollision=True,
|
isEnableSelfCollision=True,
|
||||||
renders=True):
|
renders=True,
|
||||||
print("init")
|
isDiscrete=False):
|
||||||
|
self._isDiscrete = isDiscrete
|
||||||
self._timeStep = 1./240.
|
self._timeStep = 1./240.
|
||||||
self._urdfRoot = urdfRoot
|
self._urdfRoot = urdfRoot
|
||||||
self._actionRepeat = actionRepeat
|
self._actionRepeat = actionRepeat
|
||||||
@@ -51,12 +54,17 @@ class KukaGymEnv(gym.Env):
|
|||||||
#print(observationDim)
|
#print(observationDim)
|
||||||
|
|
||||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||||
self.action_space = spaces.Discrete(7)
|
if (self._isDiscrete):
|
||||||
|
self.action_space = spaces.Discrete(7)
|
||||||
|
else:
|
||||||
|
action_dim = 3
|
||||||
|
self._action_bound = 1
|
||||||
|
action_high = np.array([self._action_bound] * action_dim)
|
||||||
|
self.action_space = spaces.Box(-action_high, action_high)
|
||||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
print("reset")
|
|
||||||
self.terminated = 0
|
self.terminated = 0
|
||||||
p.resetSimulation()
|
p.resetSimulation()
|
||||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||||
@@ -65,8 +73,8 @@ class KukaGymEnv(gym.Env):
|
|||||||
|
|
||||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||||
|
|
||||||
xpos = 0.5 +0.05*random.random()
|
xpos = 0.5 +0.2*random.random()
|
||||||
ypos = 0 +0.05*random.random()
|
ypos = 0 +0.25*random.random()
|
||||||
ang = 3.1415925438*random.random()
|
ang = 3.1415925438*random.random()
|
||||||
orn = p.getQuaternionFromEuler([0,0,ang])
|
orn = p.getQuaternionFromEuler([0,0,ang])
|
||||||
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
||||||
@@ -101,12 +109,20 @@ class KukaGymEnv(gym.Env):
|
|||||||
return self._observation
|
return self._observation
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
dv = 0.01
|
if (self._isDiscrete):
|
||||||
dx = [0,-dv,dv,0,0,0,0][action]
|
dv = 0.01
|
||||||
dy = [0,0,0,-dv,dv,0,0][action]
|
dx = [0,-dv,dv,0,0,0,0][action]
|
||||||
da = [0,0,0,0,0,-0.1,0.1][action]
|
dy = [0,0,0,-dv,dv,0,0][action]
|
||||||
f = 0.3
|
da = [0,0,0,0,0,-0.1,0.1][action]
|
||||||
realAction = [dx,dy,-0.002,da,f]
|
f = 0.3
|
||||||
|
realAction = [dx,dy,-0.002,da,f]
|
||||||
|
else:
|
||||||
|
dv = 0.01
|
||||||
|
dx = action[0] * dv
|
||||||
|
dy = action[1] * dv
|
||||||
|
da = action[2] * 0.1
|
||||||
|
f = 0.3
|
||||||
|
realAction = [dx,dy,-0.002,da,f]
|
||||||
return self.step2( realAction)
|
return self.step2( realAction)
|
||||||
|
|
||||||
def step2(self, action):
|
def step2(self, action):
|
||||||
@@ -138,25 +154,41 @@ class KukaGymEnv(gym.Env):
|
|||||||
|
|
||||||
#print("self._envStepCounter")
|
#print("self._envStepCounter")
|
||||||
#print(self._envStepCounter)
|
#print(self._envStepCounter)
|
||||||
if (self.terminated or self._envStepCounter>1000):
|
if (self.terminated or self._envStepCounter>maxSteps):
|
||||||
self._observation = self.getExtendedObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
return True
|
return True
|
||||||
|
maxDist = 0.005
|
||||||
if (actualEndEffectorPos[2] <= 0.10):
|
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
|
||||||
|
|
||||||
|
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
|
||||||
self.terminated = 1
|
self.terminated = 1
|
||||||
|
|
||||||
#print("closing gripper, attempting grasp")
|
#print("closing gripper, attempting grasp")
|
||||||
#start grasp and terminate
|
#start grasp and terminate
|
||||||
fingerAngle = 0.3
|
fingerAngle = 0.3
|
||||||
|
for i in range (100):
|
||||||
for i in range (1000):
|
graspAction = [0,0,0.0001,0,fingerAngle]
|
||||||
graspAction = [0,0,0.001,0,fingerAngle]
|
|
||||||
self._kuka.applyAction(graspAction)
|
self._kuka.applyAction(graspAction)
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
fingerAngle = fingerAngle-(0.3/100.)
|
fingerAngle = fingerAngle-(0.3/100.)
|
||||||
if (fingerAngle<0):
|
if (fingerAngle<0):
|
||||||
fingerAngle=0
|
fingerAngle=0
|
||||||
|
|
||||||
|
for i in range (1000):
|
||||||
|
graspAction = [0,0,0.001,0,fingerAngle]
|
||||||
|
self._kuka.applyAction(graspAction)
|
||||||
|
p.stepSimulation()
|
||||||
|
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||||
|
if (blockPos[2] > 0.23):
|
||||||
|
#print("BLOCKPOS!")
|
||||||
|
#print(blockPos[2])
|
||||||
|
break
|
||||||
|
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||||
|
actualEndEffectorPos = state[0]
|
||||||
|
if (actualEndEffectorPos[2]>0.5):
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
self._observation = self.getExtendedObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -165,7 +197,7 @@ class KukaGymEnv(gym.Env):
|
|||||||
|
|
||||||
#rewards is height of target object
|
#rewards is height of target object
|
||||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000)
|
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
|
||||||
|
|
||||||
reward = -1000
|
reward = -1000
|
||||||
numPt = len(closestPoints)
|
numPt = len(closestPoints)
|
||||||
@@ -173,11 +205,10 @@ class KukaGymEnv(gym.Env):
|
|||||||
if (numPt>0):
|
if (numPt>0):
|
||||||
#print("reward:")
|
#print("reward:")
|
||||||
reward = -closestPoints[0][8]*10
|
reward = -closestPoints[0][8]*10
|
||||||
|
|
||||||
if (blockPos[2] >0.2):
|
if (blockPos[2] >0.2):
|
||||||
print("grasped a block!!!")
|
#print("grasped a block!!!")
|
||||||
print("self._envStepCounter")
|
#print("self._envStepCounter")
|
||||||
print(self._envStepCounter)
|
#print(self._envStepCounter)
|
||||||
reward = reward+1000
|
reward = reward+1000
|
||||||
|
|
||||||
#print("reward")
|
#print("reward")
|
||||||
|
|||||||
@@ -32,6 +32,9 @@ OBSERVATION_EPS = 0.01
|
|||||||
RENDER_HEIGHT = 720
|
RENDER_HEIGHT = 720
|
||||||
RENDER_WIDTH = 960
|
RENDER_WIDTH = 960
|
||||||
|
|
||||||
|
duckStartPos = [0,0,0.25]
|
||||||
|
duckStartOrn = [0.5,0.5,0.5,0.5]
|
||||||
|
|
||||||
class MinitaurBulletEnv(gym.Env):
|
class MinitaurBulletEnv(gym.Env):
|
||||||
"""The gym environment for the minitaur.
|
"""The gym environment for the minitaur.
|
||||||
|
|
||||||
@@ -133,6 +136,7 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
self._on_rack = on_rack
|
self._on_rack = on_rack
|
||||||
self._cam_dist = 1.0
|
self._cam_dist = 1.0
|
||||||
self._cam_yaw = 0
|
self._cam_yaw = 0
|
||||||
|
self._duckId = -1
|
||||||
self._cam_pitch = -30
|
self._cam_pitch = -30
|
||||||
self._hard_reset = True
|
self._hard_reset = True
|
||||||
self._kd_for_pd_controllers = kd_for_pd_controllers
|
self._kd_for_pd_controllers = kd_for_pd_controllers
|
||||||
@@ -176,7 +180,8 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
self._pybullet_client.setPhysicsEngineParameter(
|
self._pybullet_client.setPhysicsEngineParameter(
|
||||||
numSolverIterations=int(self._num_bullet_solver_iterations))
|
numSolverIterations=int(self._num_bullet_solver_iterations))
|
||||||
self._pybullet_client.setTimeStep(self._time_step)
|
self._pybullet_client.setTimeStep(self._time_step)
|
||||||
self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
|
self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
|
||||||
|
self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
|
||||||
self._pybullet_client.setGravity(0, 0, -10)
|
self._pybullet_client.setGravity(0, 0, -10)
|
||||||
acc_motor = self._accurate_motor_model_enabled
|
acc_motor = self._accurate_motor_model_enabled
|
||||||
motor_protect = self._motor_overheat_protection
|
motor_protect = self._motor_overheat_protection
|
||||||
@@ -196,7 +201,7 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
||||||
else:
|
else:
|
||||||
self.minitaur.Reset(reload_urdf=False)
|
self.minitaur.Reset(reload_urdf=False)
|
||||||
|
self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
|
||||||
if self._env_randomizer is not None:
|
if self._env_randomizer is not None:
|
||||||
self._env_randomizer.randomize_env(self)
|
self._env_randomizer.randomize_env(self)
|
||||||
|
|
||||||
@@ -322,6 +327,10 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
"""
|
"""
|
||||||
return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
|
return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
|
||||||
|
|
||||||
|
def lost_duck(self):
|
||||||
|
points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
|
||||||
|
return len(points)>0
|
||||||
|
|
||||||
def is_fallen(self):
|
def is_fallen(self):
|
||||||
"""Decide whether the minitaur has fallen.
|
"""Decide whether the minitaur has fallen.
|
||||||
|
|
||||||
@@ -342,7 +351,7 @@ class MinitaurBulletEnv(gym.Env):
|
|||||||
def _termination(self):
|
def _termination(self):
|
||||||
position = self.minitaur.GetBasePosition()
|
position = self.minitaur.GetBasePosition()
|
||||||
distance = math.sqrt(position[0]**2 + position[1]**2)
|
distance = math.sqrt(position[0]**2 + position[1]**2)
|
||||||
return self.is_fallen() or distance > self._distance_limit
|
return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
|
||||||
|
|
||||||
def _reward(self):
|
def _reward(self):
|
||||||
current_base_position = self.minitaur.GetBasePosition()
|
current_base_position = self.minitaur.GetBasePosition()
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import time
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
environment = KukaGymEnv(renders=True)
|
environment = KukaGymEnv(renders=True,isDiscrete=False)
|
||||||
|
|
||||||
|
|
||||||
motorsIds=[]
|
motorsIds=[]
|
||||||
@@ -19,10 +19,10 @@ def main():
|
|||||||
#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
|
#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
|
||||||
#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
|
#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
|
||||||
|
|
||||||
dv = 0.001
|
dv = 1
|
||||||
motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
|
motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
|
||||||
motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
|
motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
|
||||||
motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,-dv))
|
motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
|
||||||
motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
|
motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
|
||||||
motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
|
motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
|
||||||
|
|
||||||
@@ -33,8 +33,8 @@ def main():
|
|||||||
for motorId in motorsIds:
|
for motorId in motorsIds:
|
||||||
action.append(environment._p.readUserDebugParameter(motorId))
|
action.append(environment._p.readUserDebugParameter(motorId))
|
||||||
|
|
||||||
state, reward, done, info = environment.step2(action)
|
state, reward, done, info = environment.step(action)
|
||||||
obs = environment.getExtendedObservation()
|
obs = environment.getExtendedObservation()
|
||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -44,9 +44,9 @@ def MotorOverheatExample():
|
|||||||
motor_kd=0.00,
|
motor_kd=0.00,
|
||||||
on_rack=False)
|
on_rack=False)
|
||||||
|
|
||||||
action = [2.0] * 8
|
action = [.0] * 8
|
||||||
for i in range(8):
|
for i in range(8):
|
||||||
action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
|
action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
|
||||||
|
|
||||||
steps = 500
|
steps = 500
|
||||||
actions_and_observations = []
|
actions_and_observations = []
|
||||||
@@ -112,9 +112,9 @@ def SinePolicyExample():
|
|||||||
on_rack=False)
|
on_rack=False)
|
||||||
sum_reward = 0
|
sum_reward = 0
|
||||||
steps = 20000
|
steps = 20000
|
||||||
amplitude_1_bound = 0.5
|
amplitude_1_bound = 0.1
|
||||||
amplitude_2_bound = 0.5
|
amplitude_2_bound = 0.1
|
||||||
speed = 40
|
speed = 1
|
||||||
|
|
||||||
for step_counter in range(steps):
|
for step_counter in range(steps):
|
||||||
time_step = 0.01
|
time_step = 0.01
|
||||||
@@ -124,9 +124,9 @@ def SinePolicyExample():
|
|||||||
amplitude2 = amplitude_2_bound
|
amplitude2 = amplitude_2_bound
|
||||||
steering_amplitude = 0
|
steering_amplitude = 0
|
||||||
if t < 10:
|
if t < 10:
|
||||||
steering_amplitude = 0.5
|
steering_amplitude = 0.1
|
||||||
elif t < 20:
|
elif t < 20:
|
||||||
steering_amplitude = -0.5
|
steering_amplitude = -0.1
|
||||||
else:
|
else:
|
||||||
steering_amplitude = 0
|
steering_amplitude = 0
|
||||||
|
|
||||||
|
|||||||
@@ -41,4 +41,4 @@ def main():
|
|||||||
print(obs)
|
print(obs)
|
||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -3,12 +3,13 @@ import os, inspect
|
|||||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||||
os.sys.path.insert(0,parentdir)
|
os.sys.path.insert(0,parentdir)
|
||||||
|
isDiscrete = False
|
||||||
|
|
||||||
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
environment = RacecarZEDGymEnv(renders=True, isDiscrete=True)
|
environment = RacecarZEDGymEnv(renders=True, isDiscrete=isDiscrete)
|
||||||
|
|
||||||
targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
|
targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
|
||||||
steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0)
|
steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0)
|
||||||
@@ -16,25 +17,28 @@ def main():
|
|||||||
while (True):
|
while (True):
|
||||||
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
|
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
|
||||||
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
|
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
|
||||||
discreteAction = 0
|
if (isDiscrete):
|
||||||
if (targetVelocity<-0.33):
|
discreteAction = 0
|
||||||
discreteAction=0
|
if (targetVelocity<-0.33):
|
||||||
|
discreteAction=0
|
||||||
|
else:
|
||||||
|
if (targetVelocity>0.33):
|
||||||
|
discreteAction=6
|
||||||
|
else:
|
||||||
|
discreteAction=3
|
||||||
|
if (steeringAngle>-0.17):
|
||||||
|
if (steeringAngle>0.17):
|
||||||
|
discreteAction=discreteAction+2
|
||||||
|
else:
|
||||||
|
discreteAction=discreteAction+1
|
||||||
|
action=discreteAction
|
||||||
else:
|
else:
|
||||||
if (targetVelocity>0.33):
|
action=[targetVelocity,steeringAngle]
|
||||||
discreteAction=6
|
|
||||||
else:
|
|
||||||
discreteAction=3
|
|
||||||
if (steeringAngle>-0.17):
|
|
||||||
if (steeringAngle>0.17):
|
|
||||||
discreteAction=discreteAction+2
|
|
||||||
else:
|
|
||||||
discreteAction=discreteAction+1
|
|
||||||
action=discreteAction
|
|
||||||
|
|
||||||
state, reward, done, info = environment.step(action)
|
state, reward, done, info = environment.step(action)
|
||||||
obs = environment.getExtendedObservation()
|
obs = environment.getExtendedObservation()
|
||||||
print("obs")
|
#print("obs")
|
||||||
print(obs)
|
#print(obs)
|
||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user