Merge pull request #1181 from erwincoumans/master
Implement OpenAI baselines dqn train/enjoy pybullet racecar
This commit is contained in:
@@ -64,9 +64,15 @@ struct PhysicsDirectInternalData
|
|||||||
PhysicsDirectInternalData()
|
PhysicsDirectInternalData()
|
||||||
:m_hasStatus(false),
|
:m_hasStatus(false),
|
||||||
m_verboseOutput(false),
|
m_verboseOutput(false),
|
||||||
|
m_cachedCameraPixelsWidth(0),
|
||||||
|
m_cachedCameraPixelsHeight(0),
|
||||||
|
m_commandProcessor(NULL),
|
||||||
m_ownsCommandProcessor(false),
|
m_ownsCommandProcessor(false),
|
||||||
m_timeOutInSeconds(1e30)
|
m_timeOutInSeconds(1e30)
|
||||||
{
|
{
|
||||||
|
memset(&m_command, 0, sizeof(m_command));
|
||||||
|
memset(&m_serverStatus, 0, sizeof(m_serverStatus));
|
||||||
|
memset(m_bulletStreamDataServerToClient, 0, sizeof(m_bulletStreamDataServerToClient));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ void convertURDFToVisualShape(const UrdfShape* visual, const char* urdfPathPrefi
|
|||||||
visualShapeOut.m_dimensions[0] = 0;
|
visualShapeOut.m_dimensions[0] = 0;
|
||||||
visualShapeOut.m_dimensions[1] = 0;
|
visualShapeOut.m_dimensions[1] = 0;
|
||||||
visualShapeOut.m_dimensions[2] = 0;
|
visualShapeOut.m_dimensions[2] = 0;
|
||||||
visualShapeOut.m_meshAssetFileName[0] = 0;
|
memset(visualShapeOut.m_meshAssetFileName, 0, sizeof(visualShapeOut.m_meshAssetFileName));
|
||||||
if (visual->m_geometry.m_hasLocalMaterial) {
|
if (visual->m_geometry.m_hasLocalMaterial) {
|
||||||
visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0];
|
visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0];
|
||||||
visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1];
|
visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1];
|
||||||
|
|||||||
26
examples/pybullet/gym/enjoy_pybullet_racecar.py
Normal file
26
examples/pybullet/gym/enjoy_pybullet_racecar.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import gym
|
||||||
|
from envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||||
|
|
||||||
|
from baselines import deepq
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
env = RacecarGymEnv(renders=True)
|
||||||
|
act = deepq.load("racecar_model.pkl")
|
||||||
|
print(act)
|
||||||
|
while True:
|
||||||
|
obs, done = env.reset(), False
|
||||||
|
print("===================================")
|
||||||
|
print("obs")
|
||||||
|
print(obs)
|
||||||
|
episode_rew = 0
|
||||||
|
while not done:
|
||||||
|
env.render()
|
||||||
|
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||||
|
episode_rew += rew
|
||||||
|
print("Episode reward", episode_rew)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -12,7 +12,7 @@ class Racecar:
|
|||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2])
|
self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2])
|
||||||
self.maxForce = 10
|
self.maxForce = 20
|
||||||
self.nMotors = 2
|
self.nMotors = 2
|
||||||
self.motorizedwheels=[2]
|
self.motorizedwheels=[2]
|
||||||
self.inactiveWheels = [3,5,7]
|
self.inactiveWheels = [3,5,7]
|
||||||
@@ -21,7 +21,7 @@ class Racecar:
|
|||||||
|
|
||||||
self.motorizedWheels = [2]
|
self.motorizedWheels = [2]
|
||||||
self.steeringLinks=[4,6]
|
self.steeringLinks=[4,6]
|
||||||
self.speedMultiplier = 10.
|
self.speedMultiplier = 4.
|
||||||
|
|
||||||
|
|
||||||
def getActionDimension(self):
|
def getActionDimension(self):
|
||||||
@@ -33,18 +33,21 @@ class Racecar:
|
|||||||
def getObservation(self):
|
def getObservation(self):
|
||||||
observation = []
|
observation = []
|
||||||
pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId)
|
pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId)
|
||||||
|
|
||||||
observation.extend(list(pos))
|
observation.extend(list(pos))
|
||||||
|
observation.extend(list(orn))
|
||||||
|
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
def applyAction(self, motorCommands):
|
def applyAction(self, motorCommands):
|
||||||
targetVelocity=motorCommands[0]*self.speedMultiplier
|
targetVelocity=motorCommands[0]*self.speedMultiplier
|
||||||
print("targetVelocity")
|
#print("targetVelocity")
|
||||||
print(targetVelocity)
|
#print(targetVelocity)
|
||||||
steeringAngle = motorCommands[1]
|
steeringAngle = motorCommands[1]
|
||||||
print("steeringAngle")
|
#print("steeringAngle")
|
||||||
print(steeringAngle)
|
#print(steeringAngle)
|
||||||
print("maxForce")
|
#print("maxForce")
|
||||||
print(self.maxForce)
|
#print(self.maxForce)
|
||||||
|
|
||||||
|
|
||||||
for motor in self.motorizedwheels:
|
for motor in self.motorizedwheels:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import numpy as np
|
|||||||
import time
|
import time
|
||||||
import pybullet as p
|
import pybullet as p
|
||||||
from . import racecar
|
from . import racecar
|
||||||
|
import random
|
||||||
|
|
||||||
class RacecarGymEnv(gym.Env):
|
class RacecarGymEnv(gym.Env):
|
||||||
metadata = {
|
metadata = {
|
||||||
@@ -15,9 +16,9 @@ class RacecarGymEnv(gym.Env):
|
|||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
urdfRoot="",
|
urdfRoot="",
|
||||||
actionRepeat=1,
|
actionRepeat=50,
|
||||||
isEnableSelfCollision=True,
|
isEnableSelfCollision=True,
|
||||||
render=True):
|
renders=True):
|
||||||
print("init")
|
print("init")
|
||||||
self._timeStep = 0.01
|
self._timeStep = 0.01
|
||||||
self._urdfRoot = urdfRoot
|
self._urdfRoot = urdfRoot
|
||||||
@@ -26,19 +27,20 @@ class RacecarGymEnv(gym.Env):
|
|||||||
self._observation = []
|
self._observation = []
|
||||||
self._ballUniqueId = -1
|
self._ballUniqueId = -1
|
||||||
self._envStepCounter = 0
|
self._envStepCounter = 0
|
||||||
self._render = render
|
self._renders = renders
|
||||||
self._p = p
|
self._p = p
|
||||||
if self._render:
|
if self._renders:
|
||||||
p.connect(p.GUI)
|
p.connect(p.GUI)
|
||||||
else:
|
else:
|
||||||
p.connect(p.DIRECT)
|
p.connect(p.DIRECT)
|
||||||
self._seed()
|
self._seed()
|
||||||
self.reset()
|
self.reset()
|
||||||
observationDim = self._racecar.getObservationDimension()
|
observationDim = len(self.getExtendedObservation())
|
||||||
|
#print("observationDim")
|
||||||
|
#print(observationDim)
|
||||||
|
|
||||||
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||||
actionDim = 8
|
self.action_space = spaces.Discrete(9)
|
||||||
action_high = np.array([1] * actionDim)
|
|
||||||
self.action_space = spaces.Box(-action_high, action_high)
|
|
||||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
|
|
||||||
@@ -47,16 +49,28 @@ class RacecarGymEnv(gym.Env):
|
|||||||
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
||||||
p.setTimeStep(self._timeStep)
|
p.setTimeStep(self._timeStep)
|
||||||
#p.loadURDF("%splane.urdf" % self._urdfRoot)
|
#p.loadURDF("%splane.urdf" % self._urdfRoot)
|
||||||
p.loadSDF("%sstadium.sdf" % self._urdfRoot)
|
stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot)
|
||||||
|
#move the stadium objects slightly above 0
|
||||||
|
for i in stadiumobjects:
|
||||||
|
pos,orn = p.getBasePositionAndOrientation(i)
|
||||||
|
newpos = [pos[0],pos[1],pos[2]+0.1]
|
||||||
|
p.resetBasePositionAndOrientation(i,newpos,orn)
|
||||||
|
|
||||||
self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1])
|
dist = 5 +2.*random.random()
|
||||||
|
ang = 2.*3.1415925438*random.random()
|
||||||
|
|
||||||
|
ballx = dist * math.sin(ang)
|
||||||
|
bally = dist * math.cos(ang)
|
||||||
|
ballz = 1
|
||||||
|
|
||||||
|
self._ballUniqueId = p.loadURDF("sphere2.urdf",[ballx,bally,ballz])
|
||||||
p.setGravity(0,0,-10)
|
p.setGravity(0,0,-10)
|
||||||
self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||||
self._envStepCounter = 0
|
self._envStepCounter = 0
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
self._observation = self._racecar.getObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
return self._observation
|
return np.array(self._observation)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
p.disconnect()
|
p.disconnect()
|
||||||
@@ -65,44 +79,56 @@ class RacecarGymEnv(gym.Env):
|
|||||||
self.np_random, seed = seeding.np_random(seed)
|
self.np_random, seed = seeding.np_random(seed)
|
||||||
return [seed]
|
return [seed]
|
||||||
|
|
||||||
|
def getExtendedObservation(self):
|
||||||
|
self._observation = [] #self._racecar.getObservation()
|
||||||
|
carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||||
|
ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId)
|
||||||
|
invCarPos,invCarOrn = p.invertTransform(carpos,carorn)
|
||||||
|
ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
|
||||||
|
|
||||||
|
self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
|
||||||
|
return self._observation
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
if (self._render):
|
if (self._renders):
|
||||||
basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||||
p.resetDebugVisualizerCamera(1, 30, -40, basePos)
|
#p.resetDebugVisualizerCamera(1, 30, -40, basePos)
|
||||||
|
|
||||||
if len(action) != self._racecar.getActionDimension():
|
fwd = [-5,-5,-5,0,0,0,5,5,5]
|
||||||
raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action)))
|
steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3]
|
||||||
|
forward = fwd[action]
|
||||||
for i in range(len(action)):
|
steer = steerings[action]
|
||||||
if not -1.01 <= action[i] <= 1.01:
|
realaction = [forward,steer]
|
||||||
raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i]))
|
self._racecar.applyAction(realaction)
|
||||||
|
|
||||||
self._racecar.applyAction(action)
|
|
||||||
for i in range(self._actionRepeat):
|
for i in range(self._actionRepeat):
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
if self._render:
|
if self._renders:
|
||||||
time.sleep(self._timeStep)
|
time.sleep(self._timeStep)
|
||||||
self._observation = self._racecar.getObservation()
|
self._observation = self.getExtendedObservation()
|
||||||
|
|
||||||
if self._termination():
|
if self._termination():
|
||||||
break
|
break
|
||||||
self._envStepCounter += 1
|
self._envStepCounter += 1
|
||||||
reward = self._reward()
|
reward = self._reward()
|
||||||
done = self._termination()
|
done = self._termination()
|
||||||
|
#print("len=%r" % len(self._observation))
|
||||||
|
|
||||||
return np.array(self._observation), reward, done, {}
|
return np.array(self._observation), reward, done, {}
|
||||||
|
|
||||||
def _render(self, mode='human', close=False):
|
def _render(self, mode='human', close=False):
|
||||||
return
|
return
|
||||||
|
|
||||||
def _termination(self):
|
def _termination(self):
|
||||||
return False
|
return self._envStepCounter>1000
|
||||||
|
|
||||||
def _reward(self):
|
def _reward(self):
|
||||||
closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
|
closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
|
||||||
|
|
||||||
numPt = len(closestPoints)
|
numPt = len(closestPoints)
|
||||||
reward=-1000
|
reward=-1000
|
||||||
print(numPt)
|
#print(numPt)
|
||||||
if (numPt>0):
|
if (numPt>0):
|
||||||
print("reward:")
|
#print("reward:")
|
||||||
reward = closestPoints[0][8]
|
reward = -closestPoints[0][8]
|
||||||
print(reward)
|
#print(reward)
|
||||||
return reward
|
return reward
|
||||||
|
|||||||
@@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0)
|
|||||||
while (True):
|
while (True):
|
||||||
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
|
targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
|
||||||
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
|
steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
|
||||||
|
discreteAction = 0
|
||||||
|
if (targetVelocity<-0.33):
|
||||||
|
discreteAction=0
|
||||||
|
else:
|
||||||
|
if (targetVelocity>0.33):
|
||||||
|
discreteAction=6
|
||||||
|
else:
|
||||||
|
discreteAction=3
|
||||||
|
if (steeringAngle>-0.17):
|
||||||
|
if (steeringAngle>0.17):
|
||||||
|
discreteAction=discreteAction+2
|
||||||
|
else:
|
||||||
|
discreteAction=discreteAction+1
|
||||||
|
|
||||||
action=[targetVelocity,steeringAngle]
|
action=discreteAction
|
||||||
state, reward, done, info = environment.step(action)
|
state, reward, done, info = environment.step(action)
|
||||||
|
obs = environment.getExtendedObservation()
|
||||||
|
print("obs")
|
||||||
|
print(obs)
|
||||||
|
|||||||
38
examples/pybullet/gym/train_pybullet_racecar.py
Normal file
38
examples/pybullet/gym/train_pybullet_racecar.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import gym
|
||||||
|
from envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||||
|
|
||||||
|
from baselines import deepq
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def callback(lcl, glb):
|
||||||
|
# stop training if reward exceeds 199
|
||||||
|
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||||
|
totalt = lcl['t']
|
||||||
|
is_solved = totalt > 2000 and total >= -50
|
||||||
|
return is_solved
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
env = RacecarGymEnv(renders=False)
|
||||||
|
model = deepq.models.mlp([64])
|
||||||
|
act = deepq.learn(
|
||||||
|
env,
|
||||||
|
q_func=model,
|
||||||
|
lr=1e-3,
|
||||||
|
max_timesteps=10000,
|
||||||
|
buffer_size=50000,
|
||||||
|
exploration_fraction=0.1,
|
||||||
|
exploration_final_eps=0.02,
|
||||||
|
print_freq=10,
|
||||||
|
callback=callback
|
||||||
|
)
|
||||||
|
print("Saving model to racecar_model.pkl")
|
||||||
|
act.save("racecar_model.pkl")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
2
setup.py
2
setup.py
@@ -419,7 +419,7 @@ else:
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name = 'pybullet',
|
name = 'pybullet',
|
||||||
version='1.1.3',
|
version='1.1.4',
|
||||||
description='Official Python Interface for the Bullet Physics SDK Robotics Simulator',
|
description='Official Python Interface for the Bullet Physics SDK Robotics Simulator',
|
||||||
long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
|
long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
|
||||||
url='https://github.com/bulletphysics/bullet3',
|
url='https://github.com/bulletphysics/bullet3',
|
||||||
|
|||||||
@@ -29,8 +29,11 @@ public:
|
|||||||
btSphereShape (btScalar radius) : btConvexInternalShape ()
|
btSphereShape (btScalar radius) : btConvexInternalShape ()
|
||||||
{
|
{
|
||||||
m_shapeType = SPHERE_SHAPE_PROXYTYPE;
|
m_shapeType = SPHERE_SHAPE_PROXYTYPE;
|
||||||
|
m_localScaling.setValue(1.0, 1.0, 1.0);
|
||||||
|
m_implicitShapeDimensions.setZero();
|
||||||
m_implicitShapeDimensions.setX(radius);
|
m_implicitShapeDimensions.setX(radius);
|
||||||
m_collisionMargin = radius;
|
m_collisionMargin = radius;
|
||||||
|
m_padding = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual btVector3 localGetSupportingVertex(const btVector3& vec)const;
|
virtual btVector3 localGetSupportingVertex(const btVector3& vec)const;
|
||||||
|
|||||||
Reference in New Issue
Block a user