Merge pull request #1181 from erwincoumans/master

Implement OpenAI baselines dqn train/enjoy pybullet racecar
This commit is contained in:
erwincoumans
2017-06-10 20:34:42 -07:00
committed by GitHub
9 changed files with 164 additions and 46 deletions

View File

@@ -64,9 +64,15 @@ struct PhysicsDirectInternalData
PhysicsDirectInternalData() PhysicsDirectInternalData()
:m_hasStatus(false), :m_hasStatus(false),
m_verboseOutput(false), m_verboseOutput(false),
m_cachedCameraPixelsWidth(0),
m_cachedCameraPixelsHeight(0),
m_commandProcessor(NULL),
m_ownsCommandProcessor(false), m_ownsCommandProcessor(false),
m_timeOutInSeconds(1e30) m_timeOutInSeconds(1e30)
{ {
memset(&m_command, 0, sizeof(m_command));
memset(&m_serverStatus, 0, sizeof(m_serverStatus));
memset(m_bulletStreamDataServerToClient, 0, sizeof(m_bulletStreamDataServerToClient));
} }
}; };

View File

@@ -177,7 +177,7 @@ void convertURDFToVisualShape(const UrdfShape* visual, const char* urdfPathPrefi
visualShapeOut.m_dimensions[0] = 0; visualShapeOut.m_dimensions[0] = 0;
visualShapeOut.m_dimensions[1] = 0; visualShapeOut.m_dimensions[1] = 0;
visualShapeOut.m_dimensions[2] = 0; visualShapeOut.m_dimensions[2] = 0;
visualShapeOut.m_meshAssetFileName[0] = 0; memset(visualShapeOut.m_meshAssetFileName, 0, sizeof(visualShapeOut.m_meshAssetFileName));
if (visual->m_geometry.m_hasLocalMaterial) { if (visual->m_geometry.m_hasLocalMaterial) {
visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0]; visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0];
visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1]; visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1];

View File

@@ -0,0 +1,26 @@
import gym
from envs.bullet.racecarGymEnv import RacecarGymEnv
from baselines import deepq
def main():
env = RacecarGymEnv(renders=True)
act = deepq.load("racecar_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()

View File

@@ -12,7 +12,7 @@ class Racecar:
def reset(self): def reset(self):
self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2]) self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2])
self.maxForce = 10 self.maxForce = 20
self.nMotors = 2 self.nMotors = 2
self.motorizedwheels=[2] self.motorizedwheels=[2]
self.inactiveWheels = [3,5,7] self.inactiveWheels = [3,5,7]
@@ -21,7 +21,7 @@ class Racecar:
self.motorizedWheels = [2] self.motorizedWheels = [2]
self.steeringLinks=[4,6] self.steeringLinks=[4,6]
self.speedMultiplier = 10. self.speedMultiplier = 4.
def getActionDimension(self): def getActionDimension(self):
@@ -33,18 +33,21 @@ class Racecar:
def getObservation(self): def getObservation(self):
observation = [] observation = []
pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId) pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId)
observation.extend(list(pos)) observation.extend(list(pos))
observation.extend(list(orn))
return observation return observation
def applyAction(self, motorCommands): def applyAction(self, motorCommands):
targetVelocity=motorCommands[0]*self.speedMultiplier targetVelocity=motorCommands[0]*self.speedMultiplier
print("targetVelocity") #print("targetVelocity")
print(targetVelocity) #print(targetVelocity)
steeringAngle = motorCommands[1] steeringAngle = motorCommands[1]
print("steeringAngle") #print("steeringAngle")
print(steeringAngle) #print(steeringAngle)
print("maxForce") #print("maxForce")
print(self.maxForce) #print(self.maxForce)
for motor in self.motorizedwheels: for motor in self.motorizedwheels:

View File

@@ -6,6 +6,7 @@ import numpy as np
import time import time
import pybullet as p import pybullet as p
from . import racecar from . import racecar
import random
class RacecarGymEnv(gym.Env): class RacecarGymEnv(gym.Env):
metadata = { metadata = {
@@ -15,9 +16,9 @@ class RacecarGymEnv(gym.Env):
def __init__(self, def __init__(self,
urdfRoot="", urdfRoot="",
actionRepeat=1, actionRepeat=50,
isEnableSelfCollision=True, isEnableSelfCollision=True,
render=True): renders=True):
print("init") print("init")
self._timeStep = 0.01 self._timeStep = 0.01
self._urdfRoot = urdfRoot self._urdfRoot = urdfRoot
@@ -26,19 +27,20 @@ class RacecarGymEnv(gym.Env):
self._observation = [] self._observation = []
self._ballUniqueId = -1 self._ballUniqueId = -1
self._envStepCounter = 0 self._envStepCounter = 0
self._render = render self._renders = renders
self._p = p self._p = p
if self._render: if self._renders:
p.connect(p.GUI) p.connect(p.GUI)
else: else:
p.connect(p.DIRECT) p.connect(p.DIRECT)
self._seed() self._seed()
self.reset() self.reset()
observationDim = self._racecar.getObservationDimension() observationDim = len(self.getExtendedObservation())
#print("observationDim")
#print(observationDim)
observation_high = np.array([np.finfo(np.float32).max] * observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim)
actionDim = 8 self.action_space = spaces.Discrete(9)
action_high = np.array([1] * actionDim)
self.action_space = spaces.Box(-action_high, action_high)
self.observation_space = spaces.Box(-observation_high, observation_high) self.observation_space = spaces.Box(-observation_high, observation_high)
self.viewer = None self.viewer = None
@@ -47,16 +49,28 @@ class RacecarGymEnv(gym.Env):
#p.setPhysicsEngineParameter(numSolverIterations=300) #p.setPhysicsEngineParameter(numSolverIterations=300)
p.setTimeStep(self._timeStep) p.setTimeStep(self._timeStep)
#p.loadURDF("%splane.urdf" % self._urdfRoot) #p.loadURDF("%splane.urdf" % self._urdfRoot)
p.loadSDF("%sstadium.sdf" % self._urdfRoot) stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot)
#move the stadium objects slightly above 0
for i in stadiumobjects:
pos,orn = p.getBasePositionAndOrientation(i)
newpos = [pos[0],pos[1],pos[2]+0.1]
p.resetBasePositionAndOrientation(i,newpos,orn)
self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1]) dist = 5 +2.*random.random()
ang = 2.*3.1415925438*random.random()
ballx = dist * math.sin(ang)
bally = dist * math.cos(ang)
ballz = 1
self._ballUniqueId = p.loadURDF("sphere2.urdf",[ballx,bally,ballz])
p.setGravity(0,0,-10) p.setGravity(0,0,-10)
self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0 self._envStepCounter = 0
for i in range(100): for i in range(100):
p.stepSimulation() p.stepSimulation()
self._observation = self._racecar.getObservation() self._observation = self.getExtendedObservation()
return self._observation return np.array(self._observation)
def __del__(self): def __del__(self):
p.disconnect() p.disconnect()
@@ -65,44 +79,56 @@ class RacecarGymEnv(gym.Env):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
return [seed] return [seed]
def getExtendedObservation(self):
self._observation = [] #self._racecar.getObservation()
carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)
invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
return self._observation
def _step(self, action): def _step(self, action):
if (self._render): if (self._renders):
basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
p.resetDebugVisualizerCamera(1, 30, -40, basePos) #p.resetDebugVisualizerCamera(1, 30, -40, basePos)
if len(action) != self._racecar.getActionDimension(): fwd = [-5,-5,-5,0,0,0,5,5,5]
raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action))) steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3]
forward = fwd[action]
for i in range(len(action)): steer = steerings[action]
if not -1.01 <= action[i] <= 1.01: realaction = [forward,steer]
raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i])) self._racecar.applyAction(realaction)
self._racecar.applyAction(action)
for i in range(self._actionRepeat): for i in range(self._actionRepeat):
p.stepSimulation() p.stepSimulation()
if self._render: if self._renders:
time.sleep(self._timeStep) time.sleep(self._timeStep)
self._observation = self._racecar.getObservation() self._observation = self.getExtendedObservation()
if self._termination(): if self._termination():
break break
self._envStepCounter += 1 self._envStepCounter += 1
reward = self._reward() reward = self._reward()
done = self._termination() done = self._termination()
#print("len=%r" % len(self._observation))
return np.array(self._observation), reward, done, {} return np.array(self._observation), reward, done, {}
def _render(self, mode='human', close=False): def _render(self, mode='human', close=False):
return return
def _termination(self): def _termination(self):
return False return self._envStepCounter>1000
def _reward(self): def _reward(self):
closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
numPt = len(closestPoints) numPt = len(closestPoints)
reward=-1000 reward=-1000
print(numPt) #print(numPt)
if (numPt>0): if (numPt>0):
print("reward:") #print("reward:")
reward = closestPoints[0][8] reward = -closestPoints[0][8]
print(reward) #print(reward)
return reward return reward

View File

@@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
while (True): while (True):
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
steeringAngle = environment._p.readUserDebugParameter(steeringSlider) steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
discreteAction = 0
if (targetVelocity<-0.33):
discreteAction=0
else:
if (targetVelocity>0.33):
discreteAction=6
else:
discreteAction=3
if (steeringAngle>-0.17):
if (steeringAngle>0.17):
discreteAction=discreteAction+2
else:
discreteAction=discreteAction+1
action=[targetVelocity,steeringAngle] action=discreteAction
state, reward, done, info = environment.step(action) state, reward, done, info = environment.step(action)
obs = environment.getExtendedObservation()
print("obs")
print(obs)

View File

@@ -0,0 +1,38 @@
import gym
from envs.bullet.racecarGymEnv import RacecarGymEnv
from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
def main():
env = RacecarGymEnv(renders=False)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to racecar_model.pkl")
act.save("racecar_model.pkl")
if __name__ == '__main__':
main()

View File

@@ -419,7 +419,7 @@ else:
setup( setup(
name = 'pybullet', name = 'pybullet',
version='1.1.3', version='1.1.4',
description='Official Python Interface for the Bullet Physics SDK Robotics Simulator', description='Official Python Interface for the Bullet Physics SDK Robotics Simulator',
long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.', long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
url='https://github.com/bulletphysics/bullet3', url='https://github.com/bulletphysics/bullet3',

View File

@@ -29,8 +29,11 @@ public:
btSphereShape (btScalar radius) : btConvexInternalShape () btSphereShape (btScalar radius) : btConvexInternalShape ()
{ {
m_shapeType = SPHERE_SHAPE_PROXYTYPE; m_shapeType = SPHERE_SHAPE_PROXYTYPE;
m_localScaling.setValue(1.0, 1.0, 1.0);
m_implicitShapeDimensions.setZero();
m_implicitShapeDimensions.setX(radius); m_implicitShapeDimensions.setX(radius);
m_collisionMargin = radius; m_collisionMargin = radius;
m_padding = 0;
} }
virtual btVector3 localGetSupportingVertex(const btVector3& vec)const; virtual btVector3 localGetSupportingVertex(const btVector3& vec)const;