add continuous versions of kukaGymEnv, kukaCamGymEnv, racecarZEDGymEnv etc.

should be trainable with PPO or evolution strategies (ES) now
2017-10-31 15:50:34 -07:00
parent 32312e60a8
commit 55f5e52ecd
8 changed files with 172 additions and 96 deletions
--- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py
@@ -9,7 +9,7 @@ import time

 def main():

-	environment = KukaGymEnv(renders=True)
+	environment = KukaGymEnv(renders=True,isDiscrete=False)
 	
 	  
 	motorsIds=[]
@@ -19,10 +19,10 @@ def main():
 	#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0))
 	#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
-	dv = 0.001
+	dv = 1 
 	motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0))
-	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,-dv))
+	motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0))
 	motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3))
 	
@@ -33,8 +33,8 @@ def main():
 	  for motorId in motorsIds:
 	    action.append(environment._p.readUserDebugParameter(motorId))
 	  
-	  state, reward, done, info = environment.step2(action)
+	  state, reward, done, info = environment.step(action)
 	  obs = environment.getExtendedObservation()
 	  
 if __name__=="__main__":
-    main()
+    main()
--- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py
@@ -44,9 +44,9 @@ def MotorOverheatExample():
      motor_kd=0.00,
      on_rack=False)

-  action = [2.0] * 8
+  action = [.0] * 8
  for i in range(8):
-    action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
+    action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)

  steps = 500
  actions_and_observations = []
@@ -112,9 +112,9 @@ def SinePolicyExample():
      on_rack=False)
  sum_reward = 0
  steps = 20000
-  amplitude_1_bound = 0.5
-  amplitude_2_bound = 0.5
-  speed = 40
+  amplitude_1_bound = 0.1
+  amplitude_2_bound = 0.1
+  speed = 1

  for step_counter in range(steps):
    time_step = 0.01
@@ -124,9 +124,9 @@ def SinePolicyExample():
    amplitude2 = amplitude_2_bound
    steering_amplitude = 0
    if t < 10:
-      steering_amplitude = 0.5
+      steering_amplitude = 0.1
    elif t < 20:
-      steering_amplitude = -0.5
+      steering_amplitude = -0.1
    else:
      steering_amplitude = 0

--- a/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py
@@ -41,4 +41,4 @@ def main():
 	  print(obs)

 if __name__=="__main__":
-    main()
+    main()
--- a/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py
+++ b/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py
@@ -3,12 +3,13 @@ import os, inspect
 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 parentdir = os.path.dirname(os.path.dirname(currentdir))
 os.sys.path.insert(0,parentdir)
+isDiscrete = False

 from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv

 def main():
 	
-	environment = RacecarZEDGymEnv(renders=True, isDiscrete=True)
+	environment = RacecarZEDGymEnv(renders=True, isDiscrete=isDiscrete)
 	
 	targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
 	steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0)
@@ -16,25 +17,28 @@ def main():
 	while (True):
 	  targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
 	  steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
-	  discreteAction = 0
-	  if (targetVelocity<-0.33):
-	    discreteAction=0
+	  if (isDiscrete):
+	    discreteAction = 0
+	    if (targetVelocity<-0.33):
+	      discreteAction=0
+	    else:
+	      if (targetVelocity>0.33):
+	        discreteAction=6
+	      else:
+	        discreteAction=3
+	    if (steeringAngle>-0.17):
+	      if (steeringAngle>0.17):
+	        discreteAction=discreteAction+2
+	      else:
+	        discreteAction=discreteAction+1
+	    action=discreteAction
 	  else:
-	    if (targetVelocity>0.33):
-	      discreteAction=6
-	    else:
-	      discreteAction=3
-	  if (steeringAngle>-0.17):
-	    if (steeringAngle>0.17):
-	      discreteAction=discreteAction+2
-	    else:
-	      discreteAction=discreteAction+1
-	  action=discreteAction
-	    
+	    action=[targetVelocity,steeringAngle]
+
 	  state, reward, done, info = environment.step(action)
 	  obs = environment.getExtendedObservation()
-	  print("obs")
-	  print(obs)
+	  #print("obs")
+	  #print(obs)

 if __name__=="__main__":
    main()