Merge pull request #1181 from erwincoumans/master

Implement OpenAI baselines dqn train/enjoy pybullet racecar
2017-06-10 20:34:42 -07:00
parent dc8c2e4d5e 1752aa55ca
commit 00e85554fb
9 changed files with 164 additions and 46 deletions
--- a/examples/SharedMemory/PhysicsDirect.cpp
+++ b/examples/SharedMemory/PhysicsDirect.cpp
@@ -64,9 +64,15 @@ struct PhysicsDirectInternalData
 	PhysicsDirectInternalData()
 		:m_hasStatus(false),
 		m_verboseOutput(false),
 		m_cachedCameraPixelsWidth(0),
 		m_cachedCameraPixelsHeight(0),
 		m_commandProcessor(NULL),
 		m_ownsCommandProcessor(false),
 		m_timeOutInSeconds(1e30)
 	{
 		memset(&m_command, 0, sizeof(m_command));
 		memset(&m_serverStatus, 0, sizeof(m_serverStatus));
 		memset(m_bulletStreamDataServerToClient, 0, sizeof(m_bulletStreamDataServerToClient));
 	}
 };
--- a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
+++ b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
@@ -177,7 +177,7 @@ void convertURDFToVisualShape(const UrdfShape* visual, const char* urdfPathPrefi
 	visualShapeOut.m_dimensions[0] = 0;
 	visualShapeOut.m_dimensions[1] = 0;
 	visualShapeOut.m_dimensions[2] = 0;
-	visualShapeOut.m_meshAssetFileName[0] = 0;
+	memset(visualShapeOut.m_meshAssetFileName, 0, sizeof(visualShapeOut.m_meshAssetFileName));
 	if (visual->m_geometry.m_hasLocalMaterial) {
 		visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0];
 		visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1];
--- a/examples/pybullet/gym/enjoy_pybullet_racecar.py
+++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py
@@ -0,0 +1,26 @@
 import gym
 from envs.bullet.racecarGymEnv import RacecarGymEnv
 from baselines import deepq
 def main():
    env = RacecarGymEnv(renders=True)
    act = deepq.load("racecar_model.pkl")
    print(act)
    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
 if __name__ == '__main__':
    main()
--- a/examples/pybullet/gym/envs/bullet/racecar.py
+++ b/examples/pybullet/gym/envs/bullet/racecar.py
@@ -12,7 +12,7 @@ class Racecar:
  def reset(self):
    self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2])
-    self.maxForce = 10
+    self.maxForce = 20
    self.nMotors = 2
    self.motorizedwheels=[2]
    self.inactiveWheels = [3,5,7]
@@ -21,7 +21,7 @@ class Racecar:
    self.motorizedWheels = [2]
    self.steeringLinks=[4,6]
-    self.speedMultiplier = 10.
+    self.speedMultiplier = 4.
  def getActionDimension(self):
@@ -33,18 +33,21 @@ class Racecar:
  def getObservation(self):
    observation = []
    pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId)
    observation.extend(list(pos))
    observation.extend(list(orn))
    return observation
  def applyAction(self, motorCommands):
    targetVelocity=motorCommands[0]*self.speedMultiplier
-    print("targetVelocity")
+    #print("targetVelocity")
-    print(targetVelocity)
+    #print(targetVelocity)
    steeringAngle = motorCommands[1]
-    print("steeringAngle")
+    #print("steeringAngle")
-    print(steeringAngle)
+    #print(steeringAngle)
-    print("maxForce")
+    #print("maxForce")
-    print(self.maxForce)
+    #print(self.maxForce)
    for motor in self.motorizedwheels:
--- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
+++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py
@@ -6,6 +6,7 @@ import numpy as np
 import time
 import pybullet as p
 from . import racecar
 import random
 class RacecarGymEnv(gym.Env):
  metadata = {
@@ -15,9 +16,9 @@ class RacecarGymEnv(gym.Env):
  def __init__(self,
               urdfRoot="",
-               actionRepeat=1,
+               actionRepeat=50,
               isEnableSelfCollision=True,
-               render=True):
+               renders=True):
    print("init")
    self._timeStep = 0.01
    self._urdfRoot = urdfRoot
@@ -26,19 +27,20 @@ class RacecarGymEnv(gym.Env):
    self._observation = []
    self._ballUniqueId = -1
    self._envStepCounter = 0
-    self._render = render
+    self._renders = renders
    self._p = p
-    if self._render:
+    if self._renders:
      p.connect(p.GUI)
    else:
      p.connect(p.DIRECT)
    self._seed()
    self.reset()
-    observationDim = self._racecar.getObservationDimension()
+    observationDim = len(self.getExtendedObservation())
    #print("observationDim")
    #print(observationDim)
    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
-    actionDim = 8
+    self.action_space = spaces.Discrete(9)
    action_high = np.array([1] * actionDim)
    self.action_space = spaces.Box(-action_high, action_high)
    self.observation_space = spaces.Box(-observation_high, observation_high)
    self.viewer = None
@@ -47,16 +49,28 @@ class RacecarGymEnv(gym.Env):
    #p.setPhysicsEngineParameter(numSolverIterations=300)
    p.setTimeStep(self._timeStep)
    #p.loadURDF("%splane.urdf" % self._urdfRoot)
-    p.loadSDF("%sstadium.sdf" % self._urdfRoot)
+    stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot)
    #move the stadium objects slightly above 0
    for i in stadiumobjects:
    	pos,orn = p.getBasePositionAndOrientation(i)
    	newpos = [pos[0],pos[1],pos[2]+0.1]
    	p.resetBasePositionAndOrientation(i,newpos,orn)
-    self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1])
+    dist = 5 +2.*random.random()
    ang = 2.*3.1415925438*random.random()
    ballx = dist * math.sin(ang)
    bally = dist * math.cos(ang)
    ballz = 1
    self._ballUniqueId = p.loadURDF("sphere2.urdf",[ballx,bally,ballz])
    p.setGravity(0,0,-10)
    self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    for i in range(100):
      p.stepSimulation()
-    self._observation = self._racecar.getObservation()
+    self._observation = self.getExtendedObservation()
-    return self._observation
+    return np.array(self._observation)
  def __del__(self):
    p.disconnect()
@@ -65,44 +79,56 @@ class RacecarGymEnv(gym.Env):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]
  def getExtendedObservation(self):
     self._observation = [] #self._racecar.getObservation()
     carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
     ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)      
     invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
     ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
     self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
     return self._observation
  def _step(self, action):
-    if (self._render):
+    if (self._renders):
      basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
-      p.resetDebugVisualizerCamera(1, 30, -40, basePos)
+      #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
-    if len(action) != self._racecar.getActionDimension():
+    fwd = [-5,-5,-5,0,0,0,5,5,5]
-      raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action)))
+    steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3]
-
+    forward = fwd[action]
-    for i in range(len(action)):
+    steer = steerings[action]
-      if not -1.01 <= action[i] <= 1.01:
+    realaction = [forward,steer]
-        raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i]))
+    self._racecar.applyAction(realaction)
    self._racecar.applyAction(action)
    for i in range(self._actionRepeat):
      p.stepSimulation()
-      if self._render:
+      if self._renders:
        time.sleep(self._timeStep)
-      self._observation = self._racecar.getObservation()
+      self._observation = self.getExtendedObservation()
      if self._termination():
        break
      self._envStepCounter += 1
    reward = self._reward()
    done = self._termination()
    #print("len=%r" % len(self._observation))
    return np.array(self._observation), reward, done, {}
  def _render(self, mode='human', close=False):
      return
  def _termination(self):
-    return False
+    return self._envStepCounter>1000
  def _reward(self):
    closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) 
    numPt = len(closestPoints)
    reward=-1000
-    print(numPt)
+    #print(numPt)
    if (numPt>0):
-      print("reward:")
+      #print("reward:")
-      reward = closestPoints[0][8]
+      reward = -closestPoints[0][8]
-      print(reward)
+      #print(reward)
    return reward
--- a/examples/pybullet/gym/racecarGymEnvTest.py
+++ b/examples/pybullet/gym/racecarGymEnvTest.py
@@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
 while (True):
  targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
  steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
  discreteAction = 0
  if (targetVelocity<-0.33):
    discreteAction=0
  else:
    if (targetVelocity>0.33):
      discreteAction=6
    else:
      discreteAction=3
  if (steeringAngle>-0.17):
    if (steeringAngle>0.17):
      discreteAction=discreteAction+2
    else:
      discreteAction=discreteAction+1
-  action=[targetVelocity,steeringAngle]
+  action=discreteAction
  state, reward, done, info = environment.step(action)
  obs = environment.getExtendedObservation()
  print("obs")
  print(obs)
--- a/examples/pybullet/gym/train_pybullet_racecar.py
+++ b/examples/pybullet/gym/train_pybullet_racecar.py
@@ -0,0 +1,38 @@
 import gym
 from envs.bullet.racecarGymEnv import RacecarGymEnv
 from baselines import deepq
 import datetime
 def callback(lcl, glb):
    # stop training if reward exceeds 199
    total = sum(lcl['episode_rewards'][-101:-1]) / 100
    totalt = lcl['t']
    is_solved = totalt > 2000 and total >= -50
    return is_solved
 def main():
    env = RacecarGymEnv(renders=False)
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=10000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to racecar_model.pkl")
    act.save("racecar_model.pkl")
 if __name__ == '__main__':
    main()
--- a/setup.py
+++ b/setup.py
@@ -419,7 +419,7 @@ else:
 setup(
 	name = 'pybullet',
-	version='1.1.3',
+	version='1.1.4',
 	description='Official Python Interface for the Bullet Physics SDK Robotics Simulator',
 	long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
 	url='https://github.com/bulletphysics/bullet3',
--- a/src/BulletCollision/CollisionShapes/btSphereShape.h
+++ b/src/BulletCollision/CollisionShapes/btSphereShape.h
@@ -29,8 +29,11 @@ public:
 	btSphereShape (btScalar radius) : btConvexInternalShape ()
 	{
 		m_shapeType = SPHERE_SHAPE_PROXYTYPE;
 		m_localScaling.setValue(1.0, 1.0, 1.0);
 		m_implicitShapeDimensions.setZero();
 		m_implicitShapeDimensions.setX(radius);
 		m_collisionMargin = radius;
 		m_padding = 0;
 	}
 	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;