From 4798d66f19056e144516cce3d44b32bd68008f91 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Wed, 1 Nov 2017 18:06:47 -0700
Subject: [PATCH] fixes in kuka/racecar environment. kuka still doesn't train
 well, work-in-progress

---
 .../gym/pybullet_envs/agents/configs.py       |   6 +-
 .../pybullet/gym/pybullet_envs/bullet/kuka.py |  15 +-
 .../gym/pybullet_envs/bullet/kukaCamGymEnv.py |  27 +++-
 .../gym/pybullet_envs/bullet/kukaGymEnv.py    | 131 +++++++++++++-----
 .../gym/pybullet_envs/bullet/racecarGymEnv.py |  36 +++--
 .../pybullet_envs/bullet/racecarZEDGymEnv.py  |  24 +++-
 .../pybullet_envs/examples/kukaGymEnvTest.py  |   6 +-
 .../pybullet_envs/examples/kukaGymEnvTest2.py |  38 +++++
 8 files changed, 218 insertions(+), 65 deletions(-)
 create mode 100644 examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py

diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
index 1444637c8..edf4cd23d 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -34,7 +34,7 @@ def default():
   # General
   algorithm = ppo.PPOAlgorithm
   num_agents = 10
-  eval_episodes = 25
+  eval_episodes = 20
   use_gpu = False
   # Network
   network = networks.feed_forward_gaussian
@@ -47,7 +47,7 @@ def default():
   init_mean_factor = 0.05
   init_logstd = -1
   # Optimization
-  update_every = 25
+  update_every = 20
   policy_optimizer = 'AdamOptimizer'
   value_optimizer = 'AdamOptimizer'
   update_epochs_policy = 50
@@ -105,7 +105,7 @@ def pybullet_kuka_grasping():
   locals().update(default())
   # Environment
   env = 'KukaBulletEnv-v0'
-  max_length = 10
+  max_length = 1000
   steps = 1e7  # 10M
   return locals()
 
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
index b2b02f2e7..4c745cca1 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -25,6 +25,7 @@ class Kuka:
     self.useNullSpace =21
     self.useOrientation = 1
     self.kukaEndEffectorIndex = 6
+    self.kukaGripperIndex = 7
     #lower limits for null space
     self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
     #upper limits for null space
@@ -76,7 +77,7 @@ class Kuka:
 
   def getObservation(self):
     observation = []
-    state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
+    state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
     pos = state[0]
     orn = state[1]
     euler = p.getEulerFromQuaternion(orn)
@@ -106,13 +107,13 @@ class Kuka:
     
       
       self.endEffectorPos[0] = self.endEffectorPos[0]+dx
-      if (self.endEffectorPos[0]>0.75):
-        self.endEffectorPos[0]=0.75
-      if (self.endEffectorPos[0]<0.45):
-        self.endEffectorPos[0]=0.45
+      if (self.endEffectorPos[0]>0.65):
+        self.endEffectorPos[0]=0.65
+      if (self.endEffectorPos[0]<0.50):
+        self.endEffectorPos[0]=0.50
       self.endEffectorPos[1] = self.endEffectorPos[1]+dy
-      if (self.endEffectorPos[1]<-0.22):
-        self.endEffectorPos[1]=-0.22
+      if (self.endEffectorPos[1]<-0.17):
+        self.endEffectorPos[1]=-0.17
       if (self.endEffectorPos[1]>0.22):
         self.endEffectorPos[1]=0.22
       
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
index a1cdc8242..4e7d7e70e 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
@@ -16,6 +16,9 @@ import random
 import pybullet_data
 maxSteps = 1000
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class KukaCamGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -24,9 +27,9 @@ class KukaCamGymEnv(gym.Env):
 
   def __init__(self,
                urdfRoot=pybullet_data.getDataPath(),
-               actionRepeat=25,
+               actionRepeat=1,
                isEnableSelfCollision=True,
-               renders=True,
+               renders=False,
                isDiscrete=False):
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
@@ -163,7 +166,25 @@ class KukaCamGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
 
   def _termination(self):
     #print (self._kuka.endEffectorPos[2])
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
index 0dfb62490..d2a47935d 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
@@ -14,8 +14,10 @@ from . import kuka
 import random
 import pybullet_data
 
-maxSteps = 1000
+largeValObservation = 100
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
 
 class KukaGymEnv(gym.Env):
   metadata = {
@@ -27,8 +29,10 @@ class KukaGymEnv(gym.Env):
                urdfRoot=pybullet_data.getDataPath(),
                actionRepeat=1,
                isEnableSelfCollision=True,
-               renders=True,
-               isDiscrete=False):
+               renders=False,
+               isDiscrete=False,
+               maxSteps = 1000):
+    #print("KukaGymEnv __init__")
     self._isDiscrete = isDiscrete
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
@@ -37,7 +41,12 @@ class KukaGymEnv(gym.Env):
     self._observation = []
     self._envStepCounter = 0
     self._renders = renders
+    self._maxSteps = maxSteps
     self.terminated = 0
+    self._cam_dist = 1.3
+    self._cam_yaw = 180 
+    self._cam_pitch = -40
+
     self._p = p
     if self._renders:
       cid = p.connect(p.SHARED_MEMORY)
@@ -53,7 +62,7 @@ class KukaGymEnv(gym.Env):
     #print("observationDim")
     #print(observationDim)
     
-    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
+    observation_high = np.array([largeValObservation] * observationDim)    
     if (self._isDiscrete):
       self.action_space = spaces.Discrete(7)
     else:
@@ -65,6 +74,7 @@ class KukaGymEnv(gym.Env):
     self.viewer = None
 
   def _reset(self):
+    #print("KukaGymEnv _reset")
     self.terminated = 0
     p.resetSimulation()
     p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -73,11 +83,11 @@ class KukaGymEnv(gym.Env):
     
     p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
     
-    xpos = 0.5 +0.2*random.random()
-    ypos = 0 +0.25*random.random()
-    ang = 3.1415925438*random.random()
+    xpos = 0.55 +0.12*random.random()
+    ypos = 0 +0.2*random.random()
+    ang = 3.14*0.5+3.1415925438*random.random()
     orn = p.getQuaternionFromEuler([0,0,ang])
-    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
+    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
             
     p.setGravity(0,0,-10)
     self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
@@ -95,57 +105,104 @@ class KukaGymEnv(gym.Env):
 
   def getExtendedObservation(self):
      self._observation = self._kuka.getObservation()
-     eeState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
-     endEffectorPos = eeState[0]
-     endEffectorOrn = eeState[1]
+     gripperState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
+     gripperPos = gripperState[0]
+     gripperOrn = gripperState[1]
      blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
 
-     invEEPos,invEEOrn = p.invertTransform(endEffectorPos,endEffectorOrn)
-     blockPosInEE,blockOrnInEE = p.multiplyTransforms(invEEPos,invEEOrn,blockPos,blockOrn)
-     blockEulerInEE = p.getEulerFromQuaternion(blockOrnInEE)
-     self._observation.extend(list(blockPosInEE))
-     self._observation.extend(list(blockEulerInEE))
-
+     invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
+     gripperMat = p.getMatrixFromQuaternion(gripperOrn)
+     dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
+     dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
+     dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
+             
+     gripperEul =  p.getEulerFromQuaternion(gripperOrn)
+     #print("gripperEul")
+     #print(gripperEul)
+     blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
+     projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
+     blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
+     #print("projectedBlockPos2D")
+     #print(projectedBlockPos2D)
+     #print("blockEulerInGripper")
+     #print(blockEulerInGripper)
+     
+     #we return the relative x,y position and euler angle of block in gripper space
+     blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
+     
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
+     
+     self._observation.extend(list(blockInGripperPosXYEulZ))
      return self._observation
   
   def _step(self, action):
     if (self._isDiscrete):
-      dv = 0.01
+      dv = 0.005
       dx = [0,-dv,dv,0,0,0,0][action]
       dy = [0,0,0,-dv,dv,0,0][action]
-      da = [0,0,0,0,0,-0.1,0.1][action]
+      da = [0,0,0,0,0,-0.05,0.05][action]
       f = 0.3
       realAction = [dx,dy,-0.002,da,f]
     else:
-      dv = 0.01
+      #print("action[0]=", str(action[0]))
+      dv = 0.005
       dx = action[0] * dv
       dy = action[1] * dv
-      da = action[2] * 0.1
+      da = action[2] * 0.05
       f = 0.3
       realAction = [dx,dy,-0.002,da,f]
     return self.step2( realAction)
      
   def step2(self, action):
-    self._kuka.applyAction(action)
     for i in range(self._actionRepeat):
+      self._kuka.applyAction(action)
       p.stepSimulation()
-      if self._renders:
-        time.sleep(self._timeStep)
-      self._observation = self.getExtendedObservation()
       if self._termination():
         break
       self._envStepCounter += 1
+    if self._renders:
+      time.sleep(self._timeStep)
+    self._observation = self.getExtendedObservation()
+
     #print("self._envStepCounter")
     #print(self._envStepCounter)
     
     done = self._termination()
-    reward = self._reward()
+    npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
+    actionCost = np.linalg.norm(npaction)*10.
+    #print("actionCost")
+    #print(actionCost)
+    reward = self._reward()-actionCost
+    #print("reward")
+    #print(reward)
+ 
     #print("len=%r" % len(self._observation))
     
     return np.array(self._observation), reward, done, {}
 
-  def _render(self, mode='human', close=False):
-      return
+  def _render(self, mode="rgb_array", close=False):
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
 
   def _termination(self):
     #print (self._kuka.endEffectorPos[2])
@@ -154,7 +211,7 @@ class KukaGymEnv(gym.Env):
       
     #print("self._envStepCounter")
     #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>maxSteps):
+    if (self.terminated or self._envStepCounter>self._maxSteps):
       self._observation = self.getExtendedObservation()
       return True
     maxDist = 0.005 
@@ -163,7 +220,7 @@ class KukaGymEnv(gym.Env):
     if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
       self.terminated = 1
       
-      #print("closing gripper, attempting grasp")
+      #print("terminating, closing gripper, attempting grasp")
       #start grasp and terminate
       fingerAngle = 0.3
       for i in range (100):
@@ -199,18 +256,22 @@ class KukaGymEnv(gym.Env):
     blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
     closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) 
 
-    reward = -1000    
+    reward = -1000
+    
     numPt = len(closestPoints)
     #print(numPt)
     if (numPt>0):
       #print("reward:")
       reward = -closestPoints[0][8]*10
     if (blockPos[2] >0.2):
-      #print("grasped a block!!!")
+      reward = reward+10000
+      print("successfully grasped a block!!!")
       #print("self._envStepCounter")
       #print(self._envStepCounter)
-      reward = reward+1000
-
-    #print("reward")
+      #print("self._envStepCounter")
+      #print(self._envStepCounter)
+      #print("reward")
+      #print(reward)
+     #print("reward")
     #print(reward)
     return reward
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
index bea79bd77..7537d54c1 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -15,6 +15,9 @@ import random
 from . import bullet_client
 import pybullet_data
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class RacecarGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -136,20 +139,27 @@ class RacecarGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      width=320
-      height=200
-      img_arr = self._p.getCameraImage(width,height)
-      w=img_arr[0]
-      h=img_arr[1]
-      rgb=img_arr[2]
-      dep=img_arr[3]
-      #print 'width = %d height = %d' % (w,h)
-      # reshape creates np array
-      np_img_arr = np.reshape(rgb, (h, w, 4))
-      # remove alpha channel
-      np_img_arr = np_img_arr[:, :, :3]
-      return np_img_arr
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
 
+    
   def _termination(self):
     return self._envStepCounter>1000
     
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
index 507f6cb1a..d9546de8c 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -15,6 +15,9 @@ from . import racecar
 import random
 import pybullet_data
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class RacecarZEDGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -151,7 +154,26 @@ class RacecarZEDGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
 
   def _termination(self):
     return self._envStepCounter>1000
diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
index 5649063b2..3917f08cf 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
@@ -9,7 +9,7 @@ import time
 
 def main():
 
-	environment = KukaGymEnv(renders=True,isDiscrete=False)
+	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
 	
 	  
 	motorsIds=[]
@@ -19,7 +19,7 @@ def main():
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
-	dv = 1 
+	dv = 0.01 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
@@ -33,7 +33,7 @@ def main():
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
 	  
-	  state, reward, done, info = environment.step(action)
+	  state, reward, done, info = environment.step2(action)
 	  obs = environment.getExtendedObservation()
 	  
 if __name__=="__main__":
diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
new file mode 100644
index 000000000..5c7f245c3
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
@@ -0,0 +1,38 @@
+#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
+import time
+
+def main():
+
+	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
+	
+	  
+	motorsIds=[]
+	#motorsIds.append(environment._p.addUserDebugParameter("posX",-1,1,0))
+	#motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0))
+	#motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2))
+	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
+	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
+	
+	dv = 1 
+	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
+	
+	done = False
+	while (not done):
+	    
+	  action=[]
+	  for motorId in motorsIds:
+	    action.append(environment._p.readUserDebugParameter(motorId))
+	  
+	  state, reward, done, info = environment.step(action)
+	  obs = environment.getExtendedObservation()
+	  
+if __name__=="__main__":
+    main()