From a5a73ba0d82758665f139e94083a64106481d6da Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Thu, 26 Oct 2017 08:11:58 -0700
Subject: [PATCH 1/8] fix for issue 1400 use default output name for VHACD test
 binary

---
 Extras/VHACD/test/src/main.cpp                  | 17 +++++++++++++++++
 src/BulletDynamics/Vehicle/btRaycastVehicle.cpp |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Extras/VHACD/test/src/main.cpp b/Extras/VHACD/test/src/main.cpp
index 6b1204726..1c6112339 100644
--- a/Extras/VHACD/test/src/main.cpp
+++ b/Extras/VHACD/test/src/main.cpp
@@ -38,6 +38,14 @@
 using namespace VHACD;
 using namespace std;
 
+bool replace(std::string& str, const std::string& from, const std::string& to) {
+    size_t start_pos = str.find(from);
+    if(start_pos == std::string::npos)
+        return false;
+    str.replace(start_pos, from.length(), to);
+    return true;
+}
+
 class MyCallback : public IVHACD::IUserCallback {
 public:
     MyCallback(void) {}
@@ -293,7 +301,16 @@ void ParseParameters(int argc, char* argv[], Parameters& params)
     for (int i = 1; i < argc; ++i) {
         if (!strcmp(argv[i], "--input")) {
             if (++i < argc)
+		{
                 params.m_fileNameIn = argv[i];
+		//come up with some default output name, if not provided
+		if (params.m_fileNameOut.length()==0)
+		{
+                	string tmp = argv[i];
+			replace(tmp,".obj",".vhacd.obj");
+			params.m_fileNameOut = tmp;
+		}
+		}
         }
         else if (!strcmp(argv[i], "--output")) {
             if (++i < argc)
diff --git a/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp b/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
index a7b168846..f05a4376c 100644
--- a/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
+++ b/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
@@ -615,7 +615,7 @@ void	btRaycastVehicle::updateFriction(btScalar	timeStep)
 					btScalar defaultRollingFrictionImpulse = 0.f;
 					btScalar maxImpulse = wheelInfo.m_brake ? wheelInfo.m_brake : defaultRollingFrictionImpulse;
 					btWheelContactPoint contactPt(m_chassisBody,groundObject,wheelInfo.m_raycastInfo.m_contactPointWS,m_forwardWS[wheel],maxImpulse);
-					rollingFriction = calcRollingFriction(contactPt);
+					rollingFriction = calcRollingFriction(contactPt)/btScalar(getNumWheels());
 				}
 			}
 

From 48f0f298128d8eb864b8fb8c9952456fbac45372 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Fri, 27 Oct 2017 18:26:32 -0700
Subject: [PATCH 2/8] add integrate.py example

---
 examples/pybullet/examples/integrate.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 examples/pybullet/examples/integrate.py

diff --git a/examples/pybullet/examples/integrate.py b/examples/pybullet/examples/integrate.py
new file mode 100644
index 000000000..a4e4deaa2
--- /dev/null
+++ b/examples/pybullet/examples/integrate.py
@@ -0,0 +1,13 @@
+import pybullet as p
+p.connect(p.GUI)
+cube  = p.loadURDF("cube.urdf")
+frequency = 240
+timeStep = 1./frequency
+p.setGravity(0,0,-9.8)
+p.changeDynamics(cube,-1,linearDamping=0,angularDamping=0)
+p.setPhysicsEngineParameter(fixedTimeStep = timeStep)
+for i in range (frequency):
+	p.stepSimulation()
+pos,orn = p.getBasePositionAndOrientation(cube)
+print(pos)
+

From 55f5e52ecd4d088ec9124da20a082fd4e66a5619 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 15:50:34 -0700
Subject: [PATCH 3/8] add continuous versions of kukaGymEnv, kukaCamGymEnv,
 racecarZEDGymEnv etc. should be trainable with PPO or evolution strategies
 (ES) now

---
 .../pybullet/gym/pybullet_envs/bullet/kuka.py | 16 ++--
 .../gym/pybullet_envs/bullet/kukaCamGymEnv.py | 94 +++++++++++++------
 .../gym/pybullet_envs/bullet/kukaGymEnv.py    | 79 +++++++++++-----
 .../pybullet_envs/bullet/minitaur_gym_env.py  | 15 ++-
 .../pybullet_envs/examples/kukaGymEnvTest.py  | 10 +-
 .../examples/minitaur_gym_env_example.py      | 14 +--
 .../examples/racecarGymEnvTest.py             |  2 +-
 .../examples/racecarZEDGymEnvTest.py          | 38 ++++----
 8 files changed, 172 insertions(+), 96 deletions(-)

diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
index 3e7ee6c49..b2b02f2e7 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -17,12 +17,12 @@ class Kuka:
     self.timeStep = timeStep
     
     self.maxForce = 200.
-    self.fingerAForce = 6
-    self.fingerBForce = 5.5
-    self.fingerTipForce = 6
+    self.fingerAForce = 2 
+    self.fingerBForce = 2.5
+    self.fingerTipForce = 2
     self.useInverseKinematics = 1
     self.useSimulation = 1
-    self.useNullSpace = 1
+    self.useNullSpace =21
     self.useOrientation = 1
     self.kukaEndEffectorIndex = 6
     #lower limits for null space
@@ -120,10 +120,8 @@ class Kuka:
       #print (self.endEffectorPos[2])
       #print("actualEndEffectorPos[2]")
       #print(actualEndEffectorPos[2])
-      if (dz>0 or actualEndEffectorPos[2]>0.10):
-        self.endEffectorPos[2] = self.endEffectorPos[2]+dz
-      if (actualEndEffectorPos[2]<0.10):
-        self.endEffectorPos[2] = self.endEffectorPos[2]+0.0001
+      #if (dz<0 or actualEndEffectorPos[2]<0.5):
+      self.endEffectorPos[2] = self.endEffectorPos[2]+dz
     
      
       self.endEffectorAngle = self.endEffectorAngle + da
@@ -147,7 +145,7 @@ class Kuka:
       if (self.useSimulation):
         for i in range (self.kukaEndEffectorIndex+1):
           #print(i)
-          p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.03,velocityGain=1)
+          p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.3,velocityGain=1)
       else:
         #reset the joint state (ignoring all dynamics, not recommended to use during simulation)
         for i in range (self.numJoints):
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
index ea2b07b82..a1cdc8242 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
@@ -14,7 +14,7 @@ import pybullet as p
 from . import kuka
 import random
 import pybullet_data
-
+maxSteps = 1000
 
 class KukaCamGymEnv(gym.Env):
   metadata = {
@@ -24,10 +24,10 @@ class KukaCamGymEnv(gym.Env):
 
   def __init__(self,
                urdfRoot=pybullet_data.getDataPath(),
-               actionRepeat=1,
+               actionRepeat=25,
                isEnableSelfCollision=True,
-               renders=True):
-    print("init")
+               renders=True,
+               isDiscrete=False):
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
     self._actionRepeat = actionRepeat
@@ -37,6 +37,7 @@ class KukaCamGymEnv(gym.Env):
     self._renders = renders
     self._width = 341
     self._height = 256
+    self._isDiscrete=isDiscrete
     self.terminated = 0
     self._p = p
     if self._renders:
@@ -54,12 +55,17 @@ class KukaCamGymEnv(gym.Env):
     #print(observationDim)
     
     observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    self.action_space = spaces.Discrete(7)
+    if (self._isDiscrete):
+      self.action_space = spaces.Discrete(7)
+    else:
+      action_dim = 3
+      self._action_bound = 1
+      action_high = np.array([self._action_bound] * action_dim)
+      self.action_space = spaces.Box(-action_high, action_high)
     self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
     self.viewer = None
 
   def _reset(self):
-    print("reset")
     self.terminated = 0
     p.resetSimulation()
     p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -68,8 +74,8 @@ class KukaCamGymEnv(gym.Env):
     
     p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
     
-    xpos = 0.5 +0.05*random.random()
-    ypos = 0 +0.05*random.random()
+    xpos = 0.5 +0.2*random.random()
+    ypos = 0 +0.25*random.random()
     ang = 3.1415925438*random.random()
     orn = p.getQuaternionFromEuler([0,0,ang])
     self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
@@ -117,24 +123,36 @@ class KukaCamGymEnv(gym.Env):
      return self._observation
   
   def _step(self, action):
-    dv = 0.01
-    dx = [0,-dv,dv,0,0,0,0][action]
-    dy = [0,0,0,-dv,dv,0,0][action]
-    da = [0,0,0,0,0,-0.1,0.1][action]
-    f = 0.3
-    realAction = [dx,dy,-0.002,da,f]
+    if (self._isDiscrete):
+      dv = 0.01
+      dx = [0,-dv,dv,0,0,0,0][action]
+      dy = [0,0,0,-dv,dv,0,0][action]
+      da = [0,0,0,0,0,-0.1,0.1][action]
+      f = 0.3
+      realAction = [dx,dy,-0.002,da,f]
+    else:
+      dv = 0.01
+      dx = action[0] * dv
+      dy = action[1] * dv
+      da = action[2] * 0.1
+      f = 0.3
+      realAction = [dx,dy,-0.002,da,f]
+
     return self.step2( realAction)
      
   def step2(self, action):
-    self._kuka.applyAction(action)
     for i in range(self._actionRepeat):
+      self._kuka.applyAction(action)
       p.stepSimulation()
-      if self._renders:
-        time.sleep(self._timeStep)
-      self._observation = self.getExtendedObservation()
       if self._termination():
         break
+      #self._observation = self.getExtendedObservation()
       self._envStepCounter += 1
+
+    self._observation = self.getExtendedObservation()
+    if self._renders:
+        time.sleep(self._timeStep)
+
     #print("self._envStepCounter")
     #print(self._envStepCounter)
     
@@ -154,25 +172,41 @@ class KukaCamGymEnv(gym.Env):
       
     #print("self._envStepCounter")
     #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>1000):
+    if (self.terminated or self._envStepCounter>maxSteps):
       self._observation = self.getExtendedObservation()
       return True
-    
-    if (actualEndEffectorPos[2] <= 0.10):
+    maxDist = 0.005 
+    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
+     
+    if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
       self.terminated = 1
       
       #print("closing gripper, attempting grasp")
       #start grasp and terminate
       fingerAngle = 0.3
-      
-      for i in range (1000):
-        graspAction = [0,0,0.001,0,fingerAngle]
+      for i in range (100):
+        graspAction = [0,0,0.0001,0,fingerAngle]
         self._kuka.applyAction(graspAction)
         p.stepSimulation()
         fingerAngle = fingerAngle-(0.3/100.)
         if (fingerAngle<0):
           fingerAngle=0
-        
+    
+      for i in range (1000):
+        graspAction = [0,0,0.001,0,fingerAngle]
+        self._kuka.applyAction(graspAction)
+        p.stepSimulation()
+        blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
+        if (blockPos[2] > 0.23):
+          #print("BLOCKPOS!")
+          #print(blockPos[2])
+          break
+        state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+        actualEndEffectorPos = state[0]
+        if (actualEndEffectorPos[2]>0.5):
+          break
+
+    
       self._observation = self.getExtendedObservation()
       return True
     return False
@@ -181,7 +215,7 @@ class KukaCamGymEnv(gym.Env):
     
     #rewards is height of target object
     blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
-    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) 
+    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) 
 
     reward = -1000    
     numPt = len(closestPoints)
@@ -189,13 +223,13 @@ class KukaCamGymEnv(gym.Env):
     if (numPt>0):
       #print("reward:")
       reward = -closestPoints[0][8]*10
-
     if (blockPos[2] >0.2):
-      print("grasped a block!!!")
-      print("self._envStepCounter")
-      print(self._envStepCounter)
+      #print("grasped a block!!!")
+      #print("self._envStepCounter")
+      #print(self._envStepCounter)
       reward = reward+1000
 
     #print("reward")
     #print(reward)
     return reward
+
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
index 5928a7232..0dfb62490 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
@@ -14,6 +14,8 @@ from . import kuka
 import random
 import pybullet_data
 
+maxSteps = 1000
+
 
 class KukaGymEnv(gym.Env):
   metadata = {
@@ -25,8 +27,9 @@ class KukaGymEnv(gym.Env):
                urdfRoot=pybullet_data.getDataPath(),
                actionRepeat=1,
                isEnableSelfCollision=True,
-               renders=True):
-    print("init")
+               renders=True,
+               isDiscrete=False):
+    self._isDiscrete = isDiscrete
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
     self._actionRepeat = actionRepeat
@@ -51,12 +54,17 @@ class KukaGymEnv(gym.Env):
     #print(observationDim)
     
     observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    self.action_space = spaces.Discrete(7)
+    if (self._isDiscrete):
+      self.action_space = spaces.Discrete(7)
+    else:
+       action_dim = 3
+       self._action_bound = 1
+       action_high = np.array([self._action_bound] * action_dim)
+       self.action_space = spaces.Box(-action_high, action_high)
     self.observation_space = spaces.Box(-observation_high, observation_high)
     self.viewer = None
 
   def _reset(self):
-    print("reset")
     self.terminated = 0
     p.resetSimulation()
     p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -65,8 +73,8 @@ class KukaGymEnv(gym.Env):
     
     p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
     
-    xpos = 0.5 +0.05*random.random()
-    ypos = 0 +0.05*random.random()
+    xpos = 0.5 +0.2*random.random()
+    ypos = 0 +0.25*random.random()
     ang = 3.1415925438*random.random()
     orn = p.getQuaternionFromEuler([0,0,ang])
     self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
@@ -101,12 +109,20 @@ class KukaGymEnv(gym.Env):
      return self._observation
   
   def _step(self, action):
-    dv = 0.01
-    dx = [0,-dv,dv,0,0,0,0][action]
-    dy = [0,0,0,-dv,dv,0,0][action]
-    da = [0,0,0,0,0,-0.1,0.1][action]
-    f = 0.3
-    realAction = [dx,dy,-0.002,da,f]
+    if (self._isDiscrete):
+      dv = 0.01
+      dx = [0,-dv,dv,0,0,0,0][action]
+      dy = [0,0,0,-dv,dv,0,0][action]
+      da = [0,0,0,0,0,-0.1,0.1][action]
+      f = 0.3
+      realAction = [dx,dy,-0.002,da,f]
+    else:
+      dv = 0.01
+      dx = action[0] * dv
+      dy = action[1] * dv
+      da = action[2] * 0.1
+      f = 0.3
+      realAction = [dx,dy,-0.002,da,f]
     return self.step2( realAction)
      
   def step2(self, action):
@@ -138,25 +154,41 @@ class KukaGymEnv(gym.Env):
       
     #print("self._envStepCounter")
     #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>1000):
+    if (self.terminated or self._envStepCounter>maxSteps):
       self._observation = self.getExtendedObservation()
       return True
-    
-    if (actualEndEffectorPos[2] <= 0.10):
+    maxDist = 0.005 
+    closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
+     
+    if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
       self.terminated = 1
       
       #print("closing gripper, attempting grasp")
       #start grasp and terminate
       fingerAngle = 0.3
-      
-      for i in range (1000):
-        graspAction = [0,0,0.001,0,fingerAngle]
+      for i in range (100):
+        graspAction = [0,0,0.0001,0,fingerAngle]
         self._kuka.applyAction(graspAction)
         p.stepSimulation()
         fingerAngle = fingerAngle-(0.3/100.)
         if (fingerAngle<0):
           fingerAngle=0
-        
+    
+      for i in range (1000):
+        graspAction = [0,0,0.001,0,fingerAngle]
+        self._kuka.applyAction(graspAction)
+        p.stepSimulation()
+        blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
+        if (blockPos[2] > 0.23):
+          #print("BLOCKPOS!")
+          #print(blockPos[2])
+          break
+        state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
+        actualEndEffectorPos = state[0]
+        if (actualEndEffectorPos[2]>0.5):
+          break
+
+    
       self._observation = self.getExtendedObservation()
       return True
     return False
@@ -165,7 +197,7 @@ class KukaGymEnv(gym.Env):
     
     #rewards is height of target object
     blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
-    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) 
+    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) 
 
     reward = -1000    
     numPt = len(closestPoints)
@@ -173,11 +205,10 @@ class KukaGymEnv(gym.Env):
     if (numPt>0):
       #print("reward:")
       reward = -closestPoints[0][8]*10
-
     if (blockPos[2] >0.2):
-      print("grasped a block!!!")
-      print("self._envStepCounter")
-      print(self._envStepCounter)
+      #print("grasped a block!!!")
+      #print("self._envStepCounter")
+      #print(self._envStepCounter)
       reward = reward+1000
 
     #print("reward")
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
index 38ec631a5..f553ac46e 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -32,6 +32,9 @@ OBSERVATION_EPS = 0.01
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 
+duckStartPos = [0,0,0.25]
+duckStartOrn = [0.5,0.5,0.5,0.5]
+
 class MinitaurBulletEnv(gym.Env):
   """The gym environment for the minitaur.
 
@@ -133,6 +136,7 @@ class MinitaurBulletEnv(gym.Env):
     self._on_rack = on_rack
     self._cam_dist = 1.0
     self._cam_yaw = 0
+    self._duckId = -1
     self._cam_pitch = -30
     self._hard_reset = True
     self._kd_for_pd_controllers = kd_for_pd_controllers
@@ -176,7 +180,8 @@ class MinitaurBulletEnv(gym.Env):
       self._pybullet_client.setPhysicsEngineParameter(
           numSolverIterations=int(self._num_bullet_solver_iterations))
       self._pybullet_client.setTimeStep(self._time_step)
-      self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
+      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
+      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
       self._pybullet_client.setGravity(0, 0, -10)
       acc_motor = self._accurate_motor_model_enabled
       motor_protect = self._motor_overheat_protection
@@ -196,7 +201,7 @@ class MinitaurBulletEnv(gym.Env):
           kd_for_pd_controllers=self._kd_for_pd_controllers))
     else:
       self.minitaur.Reset(reload_urdf=False)
-
+      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
     if self._env_randomizer is not None:
       self._env_randomizer.randomize_env(self)
 
@@ -322,6 +327,10 @@ class MinitaurBulletEnv(gym.Env):
     """
     return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
 
+  def lost_duck(self):
+    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
+    return len(points)>0
+
   def is_fallen(self):
     """Decide whether the minitaur has fallen.
 
@@ -342,7 +351,7 @@ class MinitaurBulletEnv(gym.Env):
   def _termination(self):
     position = self.minitaur.GetBasePosition()
     distance = math.sqrt(position[0]**2 + position[1]**2)
-    return self.is_fallen() or distance > self._distance_limit
+    return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
 
   def _reward(self):
     current_base_position = self.minitaur.GetBasePosition()
diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
index 65120c5f8..5649063b2 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
@@ -9,7 +9,7 @@ import time
 
 def main():
 
-	environment = KukaGymEnv(renders=True)
+	environment = KukaGymEnv(renders=True,isDiscrete=False)
 	
 	  
 	motorsIds=[]
@@ -19,10 +19,10 @@ def main():
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
-	dv = 0.001
+	dv = 1 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
-	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,-dv))
+	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
@@ -33,8 +33,8 @@ def main():
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
 	  
-	  state, reward, done, info = environment.step2(action)
+	  state, reward, done, info = environment.step(action)
 	  obs = environment.getExtendedObservation()
 	  
 if __name__=="__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
index d75d2254c..2aebfd79a 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
@@ -44,9 +44,9 @@ def MotorOverheatExample():
       motor_kd=0.00,
       on_rack=False)
 
-  action = [2.0] * 8
+  action = [.0] * 8
   for i in range(8):
-    action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
+    action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
 
   steps = 500
   actions_and_observations = []
@@ -112,9 +112,9 @@ def SinePolicyExample():
       on_rack=False)
   sum_reward = 0
   steps = 20000
-  amplitude_1_bound = 0.5
-  amplitude_2_bound = 0.5
-  speed = 40
+  amplitude_1_bound = 0.1
+  amplitude_2_bound = 0.1
+  speed = 1
 
   for step_counter in range(steps):
     time_step = 0.01
@@ -124,9 +124,9 @@ def SinePolicyExample():
     amplitude2 = amplitude_2_bound
     steering_amplitude = 0
     if t < 10:
-      steering_amplitude = 0.5
+      steering_amplitude = 0.1
     elif t < 20:
-      steering_amplitude = -0.5
+      steering_amplitude = -0.1
     else:
       steering_amplitude = 0
 
diff --git a/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py
index aa78baa32..d640951b0 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py
@@ -41,4 +41,4 @@ def main():
 	  print(obs)
 
 if __name__=="__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py
index 64521b121..cee459e70 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py
@@ -3,12 +3,13 @@ import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
 os.sys.path.insert(0,parentdir)
+isDiscrete = False
 
 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
 
 def main():
 	
-	environment = RacecarZEDGymEnv(renders=True, isDiscrete=True)
+	environment = RacecarZEDGymEnv(renders=True, isDiscrete=isDiscrete)
 	
 	targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
 	steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0)
@@ -16,25 +17,28 @@ def main():
 	while (True):
 	  targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
 	  steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
-	  discreteAction = 0
-	  if (targetVelocity<-0.33):
-	    discreteAction=0
+	  if (isDiscrete):
+	    discreteAction = 0
+	    if (targetVelocity<-0.33):
+	      discreteAction=0
+	    else:
+	      if (targetVelocity>0.33):
+	        discreteAction=6
+	      else:
+	        discreteAction=3
+	    if (steeringAngle>-0.17):
+	      if (steeringAngle>0.17):
+	        discreteAction=discreteAction+2
+	      else:
+	        discreteAction=discreteAction+1
+	    action=discreteAction
 	  else:
-	    if (targetVelocity>0.33):
-	      discreteAction=6
-	    else:
-	      discreteAction=3
-	  if (steeringAngle>-0.17):
-	    if (steeringAngle>0.17):
-	      discreteAction=discreteAction+2
-	    else:
-	      discreteAction=discreteAction+1
-	  action=discreteAction
-	    
+	    action=[targetVelocity,steeringAngle]
+
 	  state, reward, done, info = environment.step(action)
 	  obs = environment.getExtendedObservation()
-	  print("obs")
-	  print(obs)
+	  #print("obs")
+	  #print(obs)
 
 if __name__=="__main__":
     main()

From 4e2dfbe58efb1197f0d9b654ef1e9fcc3dd40f55 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 16:05:52 -0700
Subject: [PATCH 4/8] add minitaur duck environment see also 
 https://www.youtube.com/watch?v=SLwUCMZdKLo and evolution strategies
 (hardmaru): https://twitter.com/hardmaru/status/925074585984237568

---
 .../pybullet/gym/pybullet_envs/__init__.py    |   9 +
 .../gym/pybullet_envs/agents/configs.py       |  16 +
 .../gym/pybullet_envs/bullet/__init__.py      |   1 +
 .../bullet/minitaur_duck_gym_env.py           | 387 ++++++++++++++++++
 .../pybullet_envs/bullet/minitaur_gym_env.py  |  15 +-
 5 files changed, 416 insertions(+), 12 deletions(-)
 create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py

diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py
index 74b9b7c9b..8f8211845 100644
--- a/examples/pybullet/gym/pybullet_envs/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/__init__.py
@@ -9,6 +9,15 @@ register(
     reward_threshold=5.0,
 )
 
+register(
+    id='MinitaurBulletDuckEnv-v0',
+    entry_point='pybullet_envs.bullet:MinitaurBulletDuckEnv',
+    timestep_limit=1000,
+    reward_threshold=5.0,
+)
+
+MinitaurBulletDuckEnv
+
 register(
     id='RacecarBulletEnv-v0',
     entry_point='pybullet_envs.bullet:RacecarGymEnv',
diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
index 6e787d129..84d92ab1d 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -23,6 +23,7 @@ import functools
 from agents import ppo
 from agents.scripts import networks
 from pybullet_envs.bullet import minitaur_gym_env
+from pybullet_envs.bullet import minitaur_duck_gym_env
 from pybullet_envs.bullet import minitaur_env_randomizer
 import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
 import pybullet_envs
@@ -124,3 +125,18 @@ def pybullet_minitaur():
   steps = 3e7  # 30M
   return locals()
 
+def pybullet_duck_minitaur():
+  """Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class."""
+  locals().update(default())
+  randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
+  env = functools.partial(
+      minitaur_gym_env.MinitaurBulletDuckEnv,
+      accurate_motor_model_enabled=True,
+      motor_overheat_protection=True,
+      pd_control_enabled=True,
+      env_randomizer=randomizer,
+      render=False)
+  max_length = 1000
+  steps = 3e7  # 30M
+  return locals()
+
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
index 8b1735fb1..4c08abbba 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/__init__.py
@@ -1,5 +1,6 @@
 from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
 from pybullet_envs.bullet.minitaur_gym_env import MinitaurBulletEnv
+from pybullet_envs.bullet.minitaur_duck_gym_env import MinitaurBulletDuckEnv
 from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
 from pybullet_envs.bullet.simpleHumanoidGymEnv import SimpleHumanoidGymEnv
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
new file mode 100644
index 000000000..44b41dd7d
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_duck_gym_env.py
@@ -0,0 +1,387 @@
+"""This file implements the gym environment of minitaur.
+
+"""
+
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+
+import math
+import time
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+import pybullet
+from . import bullet_client
+from . import minitaur
+import os
+import pybullet_data
+from . import minitaur_env_randomizer
+
+NUM_SUBSTEPS = 5
+NUM_MOTORS = 8
+MOTOR_ANGLE_OBSERVATION_INDEX = 0
+MOTOR_VELOCITY_OBSERVATION_INDEX = MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS
+MOTOR_TORQUE_OBSERVATION_INDEX = MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS
+BASE_ORIENTATION_OBSERVATION_INDEX = MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS
+ACTION_EPS = 0.01
+OBSERVATION_EPS = 0.01
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
+duckStartPos = [0,0,0.25]
+duckStartOrn = [0.5,0.5,0.5,0.5]
+
+class MinitaurBulletDuckEnv(gym.Env):
+  """The gym environment for the minitaur.
+
+  It simulates the locomotion of a minitaur, a quadruped robot. The state space
+  include the angles, velocities and torques for all the motors and the action
+  space is the desired motor angle for each motor. The reward function is based
+  on how far the minitaur walks in 1000 steps and penalizes the energy
+  expenditure.
+
+  """
+  metadata = {
+      "render.modes": ["human", "rgb_array"],
+      "video.frames_per_second": 50
+  }
+
+  def __init__(self,
+               urdf_root=pybullet_data.getDataPath(),
+               action_repeat=1,
+               distance_weight=1.0,
+               energy_weight=0.005,
+               shake_weight=0.0,
+               drift_weight=0.0,
+               distance_limit=float("inf"),
+               observation_noise_stdev=0.0,
+               self_collision_enabled=True,
+               motor_velocity_limit=np.inf,
+               pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
+               leg_model_enabled=True,
+               accurate_motor_model_enabled=True,
+               motor_kp=1.0,
+               motor_kd=0.02,
+               torque_control_enabled=False,
+               motor_overheat_protection=True,
+               hard_reset=True,
+               on_rack=False,
+               render=False,
+               kd_for_pd_controllers=0.3,
+               env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
+    """Initialize the minitaur gym environment.
+
+    Args:
+      urdf_root: The path to the urdf data folder.
+      action_repeat: The number of simulation steps before actions are applied.
+      distance_weight: The weight of the distance term in the reward.
+      energy_weight: The weight of the energy term in the reward.
+      shake_weight: The weight of the vertical shakiness term in the reward.
+      drift_weight: The weight of the sideways drift term in the reward.
+      distance_limit: The maximum distance to terminate the episode.
+      observation_noise_stdev: The standard deviation of observation noise.
+      self_collision_enabled: Whether to enable self collision in the sim.
+      motor_velocity_limit: The velocity limit of each motor.
+      pd_control_enabled: Whether to use PD controller for each motor.
+      leg_model_enabled: Whether to use a leg motor to reparameterize the action
+        space.
+      accurate_motor_model_enabled: Whether to use the accurate DC motor model.
+      motor_kp: proportional gain for the accurate motor model.
+      motor_kd: derivative gain for the accurate motor model.
+      torque_control_enabled: Whether to use the torque control, if set to
+        False, pose control will be used.
+      motor_overheat_protection: Whether to shutdown the motor that has exerted
+        large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time
+        (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more
+        details.
+      hard_reset: Whether to wipe the simulation and load everything when reset
+        is called. If set to false, reset just place the minitaur back to start
+        position and set its pose to initial configuration.
+      on_rack: Whether to place the minitaur on rack. This is only used to debug
+        the walking gait. In this mode, the minitaur's base is hanged midair so
+        that its walking gait is clearer to visualize.
+      render: Whether to render the simulation.
+      kd_for_pd_controllers: kd value for the pd controllers of the motors
+      env_randomizer: An EnvRandomizer to randomize the physical properties
+        during reset().
+    """
+    self._time_step = 0.01
+    self._action_repeat = action_repeat
+    self._num_bullet_solver_iterations = 300
+    self._urdf_root = urdf_root
+    self._self_collision_enabled = self_collision_enabled
+    self._motor_velocity_limit = motor_velocity_limit
+    self._observation = []
+    self._env_step_counter = 0
+    self._is_render = render
+    self._last_base_position = [0, 0, 0]
+    self._distance_weight = distance_weight
+    self._energy_weight = energy_weight
+    self._drift_weight = drift_weight
+    self._shake_weight = shake_weight
+    self._distance_limit = distance_limit
+    self._observation_noise_stdev = observation_noise_stdev
+    self._action_bound = 1
+    self._pd_control_enabled = pd_control_enabled
+    self._leg_model_enabled = leg_model_enabled
+    self._accurate_motor_model_enabled = accurate_motor_model_enabled
+    self._motor_kp = motor_kp
+    self._motor_kd = motor_kd
+    self._torque_control_enabled = torque_control_enabled
+    self._motor_overheat_protection = motor_overheat_protection
+    self._on_rack = on_rack
+    self._cam_dist = 1.0
+    self._cam_yaw = 0
+    self._duckId = -1
+    self._cam_pitch = -30
+    self._hard_reset = True
+    self._kd_for_pd_controllers = kd_for_pd_controllers
+    self._last_frame_time = 0.0
+    print("urdf_root=" + self._urdf_root)
+    self._env_randomizer = env_randomizer
+    # PD control needs smaller time step for stability.
+    if pd_control_enabled or accurate_motor_model_enabled:
+      self._time_step /= NUM_SUBSTEPS
+      self._num_bullet_solver_iterations /= NUM_SUBSTEPS
+      self._action_repeat *= NUM_SUBSTEPS
+
+    if self._is_render:
+      self._pybullet_client = bullet_client.BulletClient(
+          connection_mode=pybullet.GUI)
+    else:
+      self._pybullet_client = bullet_client.BulletClient()
+
+    self._seed()
+    self.reset()
+    observation_high = (
+        self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
+    observation_low = (
+        self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
+    action_dim = 8
+    action_high = np.array([self._action_bound] * action_dim)
+    self.action_space = spaces.Box(-action_high, action_high)
+    self.observation_space = spaces.Box(observation_low, observation_high)
+    self.viewer = None
+    self._hard_reset = hard_reset  # This assignment need to be after reset()
+
+  def set_env_randomizer(self, env_randomizer):
+    self._env_randomizer = env_randomizer
+
+  def configure(self, args):
+    self._args = args
+
+  def _reset(self):
+    if self._hard_reset:
+      self._pybullet_client.resetSimulation()
+      self._pybullet_client.setPhysicsEngineParameter(
+          numSolverIterations=int(self._num_bullet_solver_iterations))
+      self._pybullet_client.setTimeStep(self._time_step)
+      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
+      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
+      self._pybullet_client.setGravity(0, 0, -10)
+      acc_motor = self._accurate_motor_model_enabled
+      motor_protect = self._motor_overheat_protection
+      self.minitaur = (minitaur.Minitaur(
+          pybullet_client=self._pybullet_client,
+          urdf_root=self._urdf_root,
+          time_step=self._time_step,
+          self_collision_enabled=self._self_collision_enabled,
+          motor_velocity_limit=self._motor_velocity_limit,
+          pd_control_enabled=self._pd_control_enabled,
+          accurate_motor_model_enabled=acc_motor,
+          motor_kp=self._motor_kp,
+          motor_kd=self._motor_kd,
+          torque_control_enabled=self._torque_control_enabled,
+          motor_overheat_protection=motor_protect,
+          on_rack=self._on_rack,
+          kd_for_pd_controllers=self._kd_for_pd_controllers))
+    else:
+      self.minitaur.Reset(reload_urdf=False)
+      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
+    if self._env_randomizer is not None:
+      self._env_randomizer.randomize_env(self)
+
+    self._env_step_counter = 0
+    self._last_base_position = [0, 0, 0]
+    self._objectives = []
+    self._pybullet_client.resetDebugVisualizerCamera(
+        self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
+    if not self._torque_control_enabled:
+      for _ in range(100):
+        if self._pd_control_enabled or self._accurate_motor_model_enabled:
+          self.minitaur.ApplyAction([math.pi / 2] * 8)
+        self._pybullet_client.stepSimulation()
+    return self._noisy_observation()
+
+  def _seed(self, seed=None):
+    self.np_random, seed = seeding.np_random(seed)
+    return [seed]
+
+  def _transform_action_to_motor_command(self, action):
+    if self._leg_model_enabled:
+      for i, action_component in enumerate(action):
+        if not (-self._action_bound - ACTION_EPS <= action_component <=
+                self._action_bound + ACTION_EPS):
+          raise ValueError(
+              "{}th action {} out of bounds.".format(i, action_component))
+      action = self.minitaur.ConvertFromLegModel(action)
+    return action
+
+  def _step(self, action):
+    """Step forward the simulation, given the action.
+
+    Args:
+      action: A list of desired motor angles for eight motors.
+
+    Returns:
+      observations: The angles, velocities and torques of all motors.
+      reward: The reward for the current state-action pair.
+      done: Whether the episode has ended.
+      info: A dictionary that stores diagnostic information.
+
+    Raises:
+      ValueError: The action dimension is not the same as the number of motors.
+      ValueError: The magnitude of actions is out of bounds.
+    """
+    if self._is_render:
+      # Sleep, otherwise the computation takes less time than real time,
+      # which will make the visualization like a fast-forward video.
+      time_spent = time.time() - self._last_frame_time
+      self._last_frame_time = time.time()
+      time_to_sleep = self._action_repeat * self._time_step - time_spent
+      if time_to_sleep > 0:
+        time.sleep(time_to_sleep)
+      base_pos = self.minitaur.GetBasePosition()
+      self._pybullet_client.resetDebugVisualizerCamera(
+          self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
+    action = self._transform_action_to_motor_command(action)
+    for _ in range(self._action_repeat):
+      self.minitaur.ApplyAction(action)
+      self._pybullet_client.stepSimulation()
+
+    self._env_step_counter += 1
+    reward = self._reward()
+    done = self._termination()
+    return np.array(self._noisy_observation()), reward, done, {}
+
+  def _render(self, mode="rgb_array", close=False):
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos = self.minitaur.GetBasePosition()
+    view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._pybullet_client.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
+  def get_minitaur_motor_angles(self):
+    """Get the minitaur's motor angles.
+
+    Returns:
+      A numpy array of motor angles.
+    """
+    return np.array(
+        self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
+                          MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_motor_velocities(self):
+    """Get the minitaur's motor velocities.
+
+    Returns:
+      A numpy array of motor velocities.
+    """
+    return np.array(
+        self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
+                          MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_motor_torques(self):
+    """Get the minitaur's motor torques.
+
+    Returns:
+      A numpy array of motor torques.
+    """
+    return np.array(
+        self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
+                          MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
+
+  def get_minitaur_base_orientation(self):
+    """Get the minitaur's base orientation, represented by a quaternion.
+
+    Returns:
+      A numpy array of minitaur's orientation.
+    """
+    return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
+
+  def lost_duck(self):
+    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
+    return len(points)>0
+
+  def is_fallen(self):
+    """Decide whether the minitaur has fallen.
+
+    If the up directions between the base and the world is larger (the dot
+    product is smaller than 0.85) or the base is very low on the ground
+    (the height is smaller than 0.13 meter), the minitaur is considered fallen.
+
+    Returns:
+      Boolean value that indicates whether the minitaur has fallen.
+    """
+    orientation = self.minitaur.GetBaseOrientation()
+    rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
+    local_up = rot_mat[6:]
+    pos = self.minitaur.GetBasePosition()
+    return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
+            pos[2] < 0.13)
+
+  def _termination(self):
+    position = self.minitaur.GetBasePosition()
+    distance = math.sqrt(position[0]**2 + position[1]**2)
+    return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
+
+  def _reward(self):
+    current_base_position = self.minitaur.GetBasePosition()
+    forward_reward = current_base_position[0] - self._last_base_position[0]
+    drift_reward = -abs(current_base_position[1] - self._last_base_position[1])
+    shake_reward = -abs(current_base_position[2] - self._last_base_position[2])
+    self._last_base_position = current_base_position
+    energy_reward = np.abs(
+        np.dot(self.minitaur.GetMotorTorques(),
+               self.minitaur.GetMotorVelocities())) * self._time_step
+    reward = (
+        self._distance_weight * forward_reward -
+        self._energy_weight * energy_reward + self._drift_weight * drift_reward
+        + self._shake_weight * shake_reward)
+    self._objectives.append(
+        [forward_reward, energy_reward, drift_reward, shake_reward])
+    return reward
+
+  def get_objectives(self):
+    return self._objectives
+
+  def _get_observation(self):
+    self._observation = self.minitaur.GetObservation()
+    return self._observation
+
+  def _noisy_observation(self):
+    self._get_observation()
+    observation = np.array(self._observation)
+    if self._observation_noise_stdev > 0:
+      observation += (np.random.normal(
+          scale=self._observation_noise_stdev, size=observation.shape) *
+                      self.minitaur.GetObservationUpperBound())
+    return observation
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
index f553ac46e..38ec631a5 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py
@@ -32,9 +32,6 @@ OBSERVATION_EPS = 0.01
 RENDER_HEIGHT = 720
 RENDER_WIDTH = 960
 
-duckStartPos = [0,0,0.25]
-duckStartOrn = [0.5,0.5,0.5,0.5]
-
 class MinitaurBulletEnv(gym.Env):
   """The gym environment for the minitaur.
 
@@ -136,7 +133,6 @@ class MinitaurBulletEnv(gym.Env):
     self._on_rack = on_rack
     self._cam_dist = 1.0
     self._cam_yaw = 0
-    self._duckId = -1
     self._cam_pitch = -30
     self._hard_reset = True
     self._kd_for_pd_controllers = kd_for_pd_controllers
@@ -180,8 +176,7 @@ class MinitaurBulletEnv(gym.Env):
       self._pybullet_client.setPhysicsEngineParameter(
           numSolverIterations=int(self._num_bullet_solver_iterations))
       self._pybullet_client.setTimeStep(self._time_step)
-      self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
-      self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
+      self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
       self._pybullet_client.setGravity(0, 0, -10)
       acc_motor = self._accurate_motor_model_enabled
       motor_protect = self._motor_overheat_protection
@@ -201,7 +196,7 @@ class MinitaurBulletEnv(gym.Env):
           kd_for_pd_controllers=self._kd_for_pd_controllers))
     else:
       self.minitaur.Reset(reload_urdf=False)
-      self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
+
     if self._env_randomizer is not None:
       self._env_randomizer.randomize_env(self)
 
@@ -327,10 +322,6 @@ class MinitaurBulletEnv(gym.Env):
     """
     return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
 
-  def lost_duck(self):
-    points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
-    return len(points)>0
-
   def is_fallen(self):
     """Decide whether the minitaur has fallen.
 
@@ -351,7 +342,7 @@ class MinitaurBulletEnv(gym.Env):
   def _termination(self):
     position = self.minitaur.GetBasePosition()
     distance = math.sqrt(position[0]**2 + position[1]**2)
-    return self.lost_duck() or self.is_fallen() or distance > self._distance_limit
+    return self.is_fallen() or distance > self._distance_limit
 
   def _reward(self):
     current_base_position = self.minitaur.GetBasePosition()

From 33d32d7ab4a88f9d6edfcc54e0ea158fac754964 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 16:10:02 -0700
Subject: [PATCH 5/8] add continuous version of kuka cam gym env test

---
 .../examples/kukaCamGymEnvTest.py             | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 examples/pybullet/gym/pybullet_envs/examples/kukaCamGymEnvTest.py

diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaCamGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaCamGymEnvTest.py
new file mode 100644
index 000000000..00fe0d6b3
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaCamGymEnvTest.py
@@ -0,0 +1,40 @@
+#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv
+import time
+
+def main():
+
+	environment = KukaCamGymEnv(renders=True,isDiscrete=False)
+	
+	  
+	motorsIds=[]
+	#motorsIds.append(environment._p.addUserDebugParameter("posX",0.4,0.75,0.537))
+	#motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0))
+	#motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2))
+	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
+	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
+	
+	dv = 1 
+	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
+	
+	done = False
+	while (not done):
+	    
+	  action=[]
+	  for motorId in motorsIds:
+	    action.append(environment._p.readUserDebugParameter(motorId))
+	  
+	  state, reward, done, info = environment.step(action)
+	  obs = environment.getExtendedObservation()
+	  
+if __name__=="__main__":
+    main()

From 3709e68645d24a26b074f8699f0b081cda228c99 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 16:14:20 -0700
Subject: [PATCH 6/8] fix discrete versions of kuka grasping environment should
 fix issue 1386

---
 .../pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py | 2 +-
 .../gym/pybullet_envs/baselines/train_kuka_cam_grasping.py      | 2 +-
 .../pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py
index 5950a593e..1a5b7c2a8 100644
--- a/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/enjoy_kuka_grasping.py
@@ -12,7 +12,7 @@ from baselines import deepq
 
 def main():
     
-    env = KukaGymEnv(renders=True)
+    env = KukaGymEnv(renders=True, isDiscrete=True)
     act = deepq.load("kuka_model.pkl")
     print(act)
     while True:
diff --git a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py
index 053bc2ab2..4fd9f5a48 100644
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_cam_grasping.py
@@ -25,7 +25,7 @@ def callback(lcl, glb):
 
 def main():
   	
-    env = KukaCamGymEnv(renders=True)
+    env = KukaCamGymEnv(renders=False, isDiscrete=True)
     model = deepq.models.cnn_to_mlp(
         convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
         hiddens=[256],
diff --git a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py
index 4de8130cb..932a08e77 100644
--- a/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py
+++ b/examples/pybullet/gym/pybullet_envs/baselines/train_kuka_grasping.py
@@ -25,7 +25,7 @@ def callback(lcl, glb):
 
 def main():
   	
-    env = KukaGymEnv(renders=False)
+    env = KukaGymEnv(renders=False, isDiscrete=True)
     model = deepq.models.mlp([64])
     act = deepq.learn(
         env,

From e4f58ddc332d269493e02dfcb88be3b436142cff Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Tue, 31 Oct 2017 16:33:13 -0700
Subject: [PATCH 7/8] fix typo, add train_ppo kuka task

---
 examples/pybullet/gym/pybullet_envs/__init__.py       |  1 -
 examples/pybullet/gym/pybullet_envs/agents/configs.py | 10 ++++++++++
 setup.py                                              |  2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py
index 8f8211845..f263bc7cb 100644
--- a/examples/pybullet/gym/pybullet_envs/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/__init__.py
@@ -16,7 +16,6 @@ register(
     reward_threshold=5.0,
 )
 
-MinitaurBulletDuckEnv
 
 register(
     id='RacecarBulletEnv-v0',
diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
index 84d92ab1d..1444637c8 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -100,6 +100,16 @@ def pybullet_ant():
   steps = 5e7  # 50M
   return locals()
 
+def pybullet_kuka_grasping():
+  """Configuration for Bullet Kuka grasping task."""
+  locals().update(default())
+  # Environment
+  env = 'KukaBulletEnv-v0'
+  max_length = 10
+  steps = 1e7  # 10M
+  return locals()
+
+
 def pybullet_racecar():
   """Configuration for Bullet MIT Racecar task."""
   locals().update(default())
diff --git a/setup.py b/setup.py
index 04e3c4cae..c7109b85f 100644
--- a/setup.py
+++ b/setup.py
@@ -441,7 +441,7 @@ print("-----")
 
 setup(
 	name = 'pybullet',
-	version='1.5.8',
+	version='1.6.3',
 	description='Official Python Interface for the Bullet Physics SDK specialized for Robotics Simulation and Reinforcement Learning',
 	long_description='pybullet is an easy to use Python module for physics simulation, robotics and deep reinforcement learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
 	url='https://github.com/bulletphysics/bullet3',

From 4798d66f19056e144516cce3d44b32bd68008f91 Mon Sep 17 00:00:00 2001
From: Erwin Coumans
 <erwincoumans@erwincoumans-macbookpro2.roam.corp.google.com>
Date: Wed, 1 Nov 2017 18:06:47 -0700
Subject: [PATCH 8/8] fixes in kuka/racecar environment. kuka still doesn't
 train well, work-in-progress

---
 .../gym/pybullet_envs/agents/configs.py       |   6 +-
 .../pybullet/gym/pybullet_envs/bullet/kuka.py |  15 +-
 .../gym/pybullet_envs/bullet/kukaCamGymEnv.py |  27 +++-
 .../gym/pybullet_envs/bullet/kukaGymEnv.py    | 131 +++++++++++++-----
 .../gym/pybullet_envs/bullet/racecarGymEnv.py |  36 +++--
 .../pybullet_envs/bullet/racecarZEDGymEnv.py  |  24 +++-
 .../pybullet_envs/examples/kukaGymEnvTest.py  |   6 +-
 .../pybullet_envs/examples/kukaGymEnvTest2.py |  38 +++++
 8 files changed, 218 insertions(+), 65 deletions(-)
 create mode 100644 examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py

diff --git a/examples/pybullet/gym/pybullet_envs/agents/configs.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
index 1444637c8..edf4cd23d 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/configs.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -34,7 +34,7 @@ def default():
   # General
   algorithm = ppo.PPOAlgorithm
   num_agents = 10
-  eval_episodes = 25
+  eval_episodes = 20
   use_gpu = False
   # Network
   network = networks.feed_forward_gaussian
@@ -47,7 +47,7 @@ def default():
   init_mean_factor = 0.05
   init_logstd = -1
   # Optimization
-  update_every = 25
+  update_every = 20
   policy_optimizer = 'AdamOptimizer'
   value_optimizer = 'AdamOptimizer'
   update_epochs_policy = 50
@@ -105,7 +105,7 @@ def pybullet_kuka_grasping():
   locals().update(default())
   # Environment
   env = 'KukaBulletEnv-v0'
-  max_length = 10
+  max_length = 1000
   steps = 1e7  # 10M
   return locals()
 
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
index b2b02f2e7..4c745cca1 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -25,6 +25,7 @@ class Kuka:
     self.useNullSpace =21
     self.useOrientation = 1
     self.kukaEndEffectorIndex = 6
+    self.kukaGripperIndex = 7
     #lower limits for null space
     self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
     #upper limits for null space
@@ -76,7 +77,7 @@ class Kuka:
 
   def getObservation(self):
     observation = []
-    state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
+    state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
     pos = state[0]
     orn = state[1]
     euler = p.getEulerFromQuaternion(orn)
@@ -106,13 +107,13 @@ class Kuka:
     
       
       self.endEffectorPos[0] = self.endEffectorPos[0]+dx
-      if (self.endEffectorPos[0]>0.75):
-        self.endEffectorPos[0]=0.75
-      if (self.endEffectorPos[0]<0.45):
-        self.endEffectorPos[0]=0.45
+      if (self.endEffectorPos[0]>0.65):
+        self.endEffectorPos[0]=0.65
+      if (self.endEffectorPos[0]<0.50):
+        self.endEffectorPos[0]=0.50
       self.endEffectorPos[1] = self.endEffectorPos[1]+dy
-      if (self.endEffectorPos[1]<-0.22):
-        self.endEffectorPos[1]=-0.22
+      if (self.endEffectorPos[1]<-0.17):
+        self.endEffectorPos[1]=-0.17
       if (self.endEffectorPos[1]>0.22):
         self.endEffectorPos[1]=0.22
       
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
index a1cdc8242..4e7d7e70e 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py
@@ -16,6 +16,9 @@ import random
 import pybullet_data
 maxSteps = 1000
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class KukaCamGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -24,9 +27,9 @@ class KukaCamGymEnv(gym.Env):
 
   def __init__(self,
                urdfRoot=pybullet_data.getDataPath(),
-               actionRepeat=25,
+               actionRepeat=1,
                isEnableSelfCollision=True,
-               renders=True,
+               renders=False,
                isDiscrete=False):
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
@@ -163,7 +166,25 @@ class KukaCamGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
 
   def _termination(self):
     #print (self._kuka.endEffectorPos[2])
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
index 0dfb62490..d2a47935d 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py
@@ -14,8 +14,10 @@ from . import kuka
 import random
 import pybullet_data
 
-maxSteps = 1000
+largeValObservation = 100
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
 
 class KukaGymEnv(gym.Env):
   metadata = {
@@ -27,8 +29,10 @@ class KukaGymEnv(gym.Env):
                urdfRoot=pybullet_data.getDataPath(),
                actionRepeat=1,
                isEnableSelfCollision=True,
-               renders=True,
-               isDiscrete=False):
+               renders=False,
+               isDiscrete=False,
+               maxSteps = 1000):
+    #print("KukaGymEnv __init__")
     self._isDiscrete = isDiscrete
     self._timeStep = 1./240.
     self._urdfRoot = urdfRoot
@@ -37,7 +41,12 @@ class KukaGymEnv(gym.Env):
     self._observation = []
     self._envStepCounter = 0
     self._renders = renders
+    self._maxSteps = maxSteps
     self.terminated = 0
+    self._cam_dist = 1.3
+    self._cam_yaw = 180 
+    self._cam_pitch = -40
+
     self._p = p
     if self._renders:
       cid = p.connect(p.SHARED_MEMORY)
@@ -53,7 +62,7 @@ class KukaGymEnv(gym.Env):
     #print("observationDim")
     #print(observationDim)
     
-    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
+    observation_high = np.array([largeValObservation] * observationDim)    
     if (self._isDiscrete):
       self.action_space = spaces.Discrete(7)
     else:
@@ -65,6 +74,7 @@ class KukaGymEnv(gym.Env):
     self.viewer = None
 
   def _reset(self):
+    #print("KukaGymEnv _reset")
     self.terminated = 0
     p.resetSimulation()
     p.setPhysicsEngineParameter(numSolverIterations=150)
@@ -73,11 +83,11 @@ class KukaGymEnv(gym.Env):
     
     p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
     
-    xpos = 0.5 +0.2*random.random()
-    ypos = 0 +0.25*random.random()
-    ang = 3.1415925438*random.random()
+    xpos = 0.55 +0.12*random.random()
+    ypos = 0 +0.2*random.random()
+    ang = 3.14*0.5+3.1415925438*random.random()
     orn = p.getQuaternionFromEuler([0,0,ang])
-    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
+    self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
             
     p.setGravity(0,0,-10)
     self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
@@ -95,57 +105,104 @@ class KukaGymEnv(gym.Env):
 
   def getExtendedObservation(self):
      self._observation = self._kuka.getObservation()
-     eeState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
-     endEffectorPos = eeState[0]
-     endEffectorOrn = eeState[1]
+     gripperState  = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
+     gripperPos = gripperState[0]
+     gripperOrn = gripperState[1]
      blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
 
-     invEEPos,invEEOrn = p.invertTransform(endEffectorPos,endEffectorOrn)
-     blockPosInEE,blockOrnInEE = p.multiplyTransforms(invEEPos,invEEOrn,blockPos,blockOrn)
-     blockEulerInEE = p.getEulerFromQuaternion(blockOrnInEE)
-     self._observation.extend(list(blockPosInEE))
-     self._observation.extend(list(blockEulerInEE))
-
+     invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
+     gripperMat = p.getMatrixFromQuaternion(gripperOrn)
+     dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
+     dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
+     dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
+             
+     gripperEul =  p.getEulerFromQuaternion(gripperOrn)
+     #print("gripperEul")
+     #print(gripperEul)
+     blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
+     projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
+     blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
+     #print("projectedBlockPos2D")
+     #print(projectedBlockPos2D)
+     #print("blockEulerInGripper")
+     #print(blockEulerInGripper)
+     
+     #we return the relative x,y position and euler angle of block in gripper space
+     blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
+     
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
+     #p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
+     
+     self._observation.extend(list(blockInGripperPosXYEulZ))
      return self._observation
   
   def _step(self, action):
     if (self._isDiscrete):
-      dv = 0.01
+      dv = 0.005
       dx = [0,-dv,dv,0,0,0,0][action]
       dy = [0,0,0,-dv,dv,0,0][action]
-      da = [0,0,0,0,0,-0.1,0.1][action]
+      da = [0,0,0,0,0,-0.05,0.05][action]
       f = 0.3
       realAction = [dx,dy,-0.002,da,f]
     else:
-      dv = 0.01
+      #print("action[0]=", str(action[0]))
+      dv = 0.005
       dx = action[0] * dv
       dy = action[1] * dv
-      da = action[2] * 0.1
+      da = action[2] * 0.05
       f = 0.3
       realAction = [dx,dy,-0.002,da,f]
     return self.step2( realAction)
      
   def step2(self, action):
-    self._kuka.applyAction(action)
     for i in range(self._actionRepeat):
+      self._kuka.applyAction(action)
       p.stepSimulation()
-      if self._renders:
-        time.sleep(self._timeStep)
-      self._observation = self.getExtendedObservation()
       if self._termination():
         break
       self._envStepCounter += 1
+    if self._renders:
+      time.sleep(self._timeStep)
+    self._observation = self.getExtendedObservation()
+
     #print("self._envStepCounter")
     #print(self._envStepCounter)
     
     done = self._termination()
-    reward = self._reward()
+    npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
+    actionCost = np.linalg.norm(npaction)*10.
+    #print("actionCost")
+    #print(actionCost)
+    reward = self._reward()-actionCost
+    #print("reward")
+    #print(reward)
+ 
     #print("len=%r" % len(self._observation))
     
     return np.array(self._observation), reward, done, {}
 
-  def _render(self, mode='human', close=False):
-      return
+  def _render(self, mode="rgb_array", close=False):
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
 
   def _termination(self):
     #print (self._kuka.endEffectorPos[2])
@@ -154,7 +211,7 @@ class KukaGymEnv(gym.Env):
       
     #print("self._envStepCounter")
     #print(self._envStepCounter)
-    if (self.terminated or self._envStepCounter>maxSteps):
+    if (self.terminated or self._envStepCounter>self._maxSteps):
       self._observation = self.getExtendedObservation()
       return True
     maxDist = 0.005 
@@ -163,7 +220,7 @@ class KukaGymEnv(gym.Env):
     if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
       self.terminated = 1
       
-      #print("closing gripper, attempting grasp")
+      #print("terminating, closing gripper, attempting grasp")
       #start grasp and terminate
       fingerAngle = 0.3
       for i in range (100):
@@ -199,18 +256,22 @@ class KukaGymEnv(gym.Env):
     blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
     closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) 
 
-    reward = -1000    
+    reward = -1000
+    
     numPt = len(closestPoints)
     #print(numPt)
     if (numPt>0):
       #print("reward:")
       reward = -closestPoints[0][8]*10
     if (blockPos[2] >0.2):
-      #print("grasped a block!!!")
+      reward = reward+10000
+      print("successfully grasped a block!!!")
       #print("self._envStepCounter")
       #print(self._envStepCounter)
-      reward = reward+1000
-
-    #print("reward")
+      #print("self._envStepCounter")
+      #print(self._envStepCounter)
+      #print("reward")
+      #print(reward)
+     #print("reward")
     #print(reward)
     return reward
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
index bea79bd77..7537d54c1 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarGymEnv.py
@@ -15,6 +15,9 @@ import random
 from . import bullet_client
 import pybullet_data
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class RacecarGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -136,20 +139,27 @@ class RacecarGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      width=320
-      height=200
-      img_arr = self._p.getCameraImage(width,height)
-      w=img_arr[0]
-      h=img_arr[1]
-      rgb=img_arr[2]
-      dep=img_arr[3]
-      #print 'width = %d height = %d' % (w,h)
-      # reshape creates np array
-      np_img_arr = np.reshape(rgb, (h, w, 4))
-      # remove alpha channel
-      np_img_arr = np_img_arr[:, :, :3]
-      return np_img_arr
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
 
+    
   def _termination(self):
     return self._envStepCounter>1000
     
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
index 507f6cb1a..d9546de8c 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/racecarZEDGymEnv.py
@@ -15,6 +15,9 @@ from . import racecar
 import random
 import pybullet_data
 
+RENDER_HEIGHT = 720
+RENDER_WIDTH = 960
+
 class RacecarZEDGymEnv(gym.Env):
   metadata = {
       'render.modes': ['human', 'rgb_array'],
@@ -151,7 +154,26 @@ class RacecarZEDGymEnv(gym.Env):
     return np.array(self._observation), reward, done, {}
 
   def _render(self, mode='human', close=False):
-      return
+    if mode != "rgb_array":
+      return np.array([])
+    base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
+    view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
+        cameraTargetPosition=base_pos,
+        distance=self._cam_dist,
+        yaw=self._cam_yaw,
+        pitch=self._cam_pitch,
+        roll=0,
+        upAxisIndex=2)
+    proj_matrix = self._p.computeProjectionMatrixFOV(
+        fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
+        nearVal=0.1, farVal=100.0)
+    (_, _, px, _, _) = self._p.getCameraImage(
+        width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
+        projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
+    rgb_array = np.array(px)
+    rgb_array = rgb_array[:, :, :3]
+    return rgb_array
+
 
   def _termination(self):
     return self._envStepCounter>1000
diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
index 5649063b2..3917f08cf 100644
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
@@ -9,7 +9,7 @@ import time
 
 def main():
 
-	environment = KukaGymEnv(renders=True,isDiscrete=False)
+	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
 	
 	  
 	motorsIds=[]
@@ -19,7 +19,7 @@ def main():
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
-	dv = 1 
+	dv = 0.01 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
@@ -33,7 +33,7 @@ def main():
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
 	  
-	  state, reward, done, info = environment.step(action)
+	  state, reward, done, info = environment.step2(action)
 	  obs = environment.getExtendedObservation()
 	  
 if __name__=="__main__":
diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
new file mode 100644
index 000000000..5c7f245c3
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest2.py
@@ -0,0 +1,38 @@
+#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
+import time
+
+def main():
+
+	environment = KukaGymEnv(renders=True,isDiscrete=False, maxSteps = 10000000)
+	
+	  
+	motorsIds=[]
+	#motorsIds.append(environment._p.addUserDebugParameter("posX",-1,1,0))
+	#motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0))
+	#motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2))
+	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
+	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
+	
+	dv = 1 
+	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
+	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
+	
+	done = False
+	while (not done):
+	    
+	  action=[]
+	  for motorId in motorsIds:
+	    action.append(environment._p.readUserDebugParameter(motorId))
+	  
+	  state, reward, done, info = environment.step(action)
+	  obs = environment.getExtendedObservation()
+	  
+if __name__=="__main__":
+    main()