From 82e3c553b9bfdc44fbeb48c18aeac90ac7616d24 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Fri, 9 Jun 2017 14:03:47 -0700 Subject: [PATCH 1/4] bump up pybullet version (some memory leaks were fixed since 1.1.3) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 235396713..bb86bdd4b 100644 --- a/setup.py +++ b/setup.py @@ -419,7 +419,7 @@ else: setup( name = 'pybullet', - version='1.1.3', + version='1.1.4', description='Official Python Interface for the Bullet Physics SDK Robotics Simulator', long_description='pybullet is an easy to use Python module for physics simulation, robotics and machine learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.', url='https://github.com/bulletphysics/bullet3', From b361722500d05a53eaacdc3e5ded402658713338 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Fri, 9 Jun 2017 19:26:07 -0700 Subject: [PATCH 2/4] Implement train_pybullet_racecar.py and enjoy_pybullet_racecar.py using OpenAI baselines DQN for the RacecarGymEnv. --- .../pybullet/gym/enjoy_pybullet_racecar.py | 45 +++++++++++++ examples/pybullet/gym/envs/bullet/racecar.py | 21 +++--- .../pybullet/gym/envs/bullet/racecarGymEnv.py | 65 ++++++++++++------- .../pybullet/gym/train_pybullet_racecar.py | 40 ++++++++++++ 4 files changed, 138 insertions(+), 33 deletions(-) create mode 100644 examples/pybullet/gym/enjoy_pybullet_racecar.py create mode 100644 examples/pybullet/gym/train_pybullet_racecar.py diff --git a/examples/pybullet/gym/enjoy_pybullet_racecar.py b/examples/pybullet/gym/enjoy_pybullet_racecar.py new file mode 100644 index 000000000..b2c46337a --- /dev/null +++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py @@ -0,0 +1,45 @@ +import gym +from envs.bullet.racecarGymEnv import RacecarGymEnv + +from baselines import deepq + + +def main(): + + env = RacecarGymEnv(render=True) + act = deepq.load("racecar_model.pkl") + print(act) + while True: + obs, done = env.reset(), False + print("===================================") + print("obs") + print(obs) + episode_rew = 0 + while not done: + #env.render() + + print("!!!!!!!!!!!!!!!!!!!!!!!!!!") + print("obs") + print(obs) + print("???????????????????????????") + print("obs[None]") + print(obs[None]) + o = obs[None] + print("o") + print(o) + aa = act(o) + print("aa") + print (aa) + a = aa[0] + print("a") + print(a) + obs, rew, done, _ = env.step(a) + print("===================================") + print("obs") + print(obs) + episode_rew += rew + print("Episode reward", episode_rew) + + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py index 3ceb923f5..9737ede90 100644 --- a/examples/pybullet/gym/envs/bullet/racecar.py +++ b/examples/pybullet/gym/envs/bullet/racecar.py @@ -22,7 +22,7 @@ class Racecar: self.motorizedWheels = [2] self.steeringLinks=[4,6] self.speedMultiplier = 10. - + def getActionDimension(self): return self.nMotors @@ -33,22 +33,25 @@ class Racecar: def getObservation(self): observation = [] pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId) + observation.extend(list(pos)) + observation.extend(list(orn)) + return observation def applyAction(self, motorCommands): targetVelocity=motorCommands[0]*self.speedMultiplier - print("targetVelocity") - print(targetVelocity) + #print("targetVelocity") + #print(targetVelocity) steeringAngle = motorCommands[1] - print("steeringAngle") - print(steeringAngle) - print("maxForce") - print(self.maxForce) + #print("steeringAngle") + #print(steeringAngle) + #print("maxForce") + #print(self.maxForce) for motor in self.motorizedwheels: - p.setJointMotorControl2(self.racecarUniqueId,motor,p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce) + p.setJointMotorControl2(self.racecarUniqueId,motor,p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce) for steer in self.steeringLinks: p.setJointMotorControl2(self.racecarUniqueId,steer,p.POSITION_CONTROL,targetPosition=steeringAngle) - \ No newline at end of file + \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py index 87c2d0be5..6e12bdbde 100644 --- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py +++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py @@ -6,6 +6,7 @@ import numpy as np import time import pybullet as p from . import racecar +import random class RacecarGymEnv(gym.Env): metadata = { @@ -34,11 +35,12 @@ class RacecarGymEnv(gym.Env): p.connect(p.DIRECT) self._seed() self.reset() - observationDim = self._racecar.getObservationDimension() - observation_high = np.array([np.finfo(np.float32).max] * observationDim) - actionDim = 8 - action_high = np.array([1] * actionDim) - self.action_space = spaces.Box(-action_high, action_high) + observationDim = len(self.getExtendedObservation()) + #print("observationDim") + #print(observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + self.action_space = spaces.Discrete(9) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None @@ -49,14 +51,21 @@ class RacecarGymEnv(gym.Env): #p.loadURDF("%splane.urdf" % self._urdfRoot) p.loadSDF("%sstadium.sdf" % self._urdfRoot) - self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1]) + dist = 1.+10.*random.random() + ang = 2.*3.1415925438*random.random() + + ballx = dist * math.sin(ang) + bally = dist * math.cos(ang) + ballz = 1 + + self._ballUniqueId = p.loadURDF("sphere2.urdf",[ballx,bally,ballz]) p.setGravity(0,0,-10) self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 for i in range(100): p.stepSimulation() - self._observation = self._racecar.getObservation() - return self._observation + self._observation = self.getExtendedObservation() + return np.array(self._observation) def __del__(self): p.disconnect() @@ -65,44 +74,52 @@ class RacecarGymEnv(gym.Env): self.np_random, seed = seeding.np_random(seed) return [seed] + def getExtendedObservation(self): + self._observation = self._racecar.getObservation() + pos,orn = p.getBasePositionAndOrientation(self._ballUniqueId) + self._observation.extend(list(pos)) + return self._observation + def _step(self, action): if (self._render): basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) - p.resetDebugVisualizerCamera(1, 30, -40, basePos) - - if len(action) != self._racecar.getActionDimension(): - raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action))) - - for i in range(len(action)): - if not -1.01 <= action[i] <= 1.01: - raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i])) - - self._racecar.applyAction(action) + #p.resetDebugVisualizerCamera(1, 30, -40, basePos) + + fwd = [-1,-1,-1,0,0,0,1,1,1] + steerings = [-0.5,0,0.5,-0.5,0,0.5,-0.5,0,0.5] + forward = fwd[action] + steer = steerings[action] + realaction = [forward,steer] + self._racecar.applyAction(realaction) for i in range(self._actionRepeat): p.stepSimulation() if self._render: time.sleep(self._timeStep) - self._observation = self._racecar.getObservation() + self._observation = self.getExtendedObservation() + if self._termination(): break self._envStepCounter += 1 reward = self._reward() done = self._termination() + #print("len=%r" % len(self._observation)) + return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): return def _termination(self): - return False + return self._envStepCounter>1000 def _reward(self): closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) + numPt = len(closestPoints) reward=-1000 - print(numPt) + #print(numPt) if (numPt>0): - print("reward:") - reward = closestPoints[0][8] - print(reward) + #print("reward:") + reward = -closestPoints[0][8] + #print(reward) return reward diff --git a/examples/pybullet/gym/train_pybullet_racecar.py b/examples/pybullet/gym/train_pybullet_racecar.py new file mode 100644 index 000000000..3ce064d8d --- /dev/null +++ b/examples/pybullet/gym/train_pybullet_racecar.py @@ -0,0 +1,40 @@ +import gym +from envs.bullet.racecarGymEnv import RacecarGymEnv + +from baselines import deepq + +import datetime + + + +def callback(lcl, glb): + # stop training if reward exceeds 199 + is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199 + #uniq_filename = "racecar_model" + str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.') + #print("uniq_filename=") + #print(uniq_filename) + #act.save(uniq_filename) + return is_solved + + +def main(): + + env = RacecarGymEnv(render=False) + model = deepq.models.mlp([64]) + act = deepq.learn( + env, + q_func=model, + lr=1e-3, + max_timesteps=10000000, + buffer_size=50000, + exploration_fraction=0.1, + exploration_final_eps=0.02, + print_freq=10, + callback=callback + ) + print("Saving model to racecar_model.pkl") + act.save("racecar_model.pkl") + + +if __name__ == '__main__': + main() From 4a7469a1ba2cca9be9fdbc20cd75e8e3bc0c580c Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Fri, 9 Jun 2017 21:19:02 -0700 Subject: [PATCH 3/4] fix uninitialized-variable issues (were unused initialized, but triggers some memory checking tools) --- examples/SharedMemory/PhysicsDirect.cpp | 6 ++++++ examples/SharedMemory/TinyRendererVisualShapeConverter.cpp | 2 +- src/BulletCollision/CollisionShapes/btSphereShape.h | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/SharedMemory/PhysicsDirect.cpp b/examples/SharedMemory/PhysicsDirect.cpp index ed8376c9f..e49784d47 100644 --- a/examples/SharedMemory/PhysicsDirect.cpp +++ b/examples/SharedMemory/PhysicsDirect.cpp @@ -64,9 +64,15 @@ struct PhysicsDirectInternalData PhysicsDirectInternalData() :m_hasStatus(false), m_verboseOutput(false), + m_cachedCameraPixelsWidth(0), + m_cachedCameraPixelsHeight(0), + m_commandProcessor(NULL), m_ownsCommandProcessor(false), m_timeOutInSeconds(1e30) { + memset(&m_command, 0, sizeof(m_command)); + memset(&m_serverStatus, 0, sizeof(m_serverStatus)); + memset(m_bulletStreamDataServerToClient, 0, sizeof(m_bulletStreamDataServerToClient)); } }; diff --git a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp index f076d301e..143c931ce 100644 --- a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp +++ b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp @@ -177,7 +177,7 @@ void convertURDFToVisualShape(const UrdfShape* visual, const char* urdfPathPrefi visualShapeOut.m_dimensions[0] = 0; visualShapeOut.m_dimensions[1] = 0; visualShapeOut.m_dimensions[2] = 0; - visualShapeOut.m_meshAssetFileName[0] = 0; + memset(visualShapeOut.m_meshAssetFileName, 0, sizeof(visualShapeOut.m_meshAssetFileName)); if (visual->m_geometry.m_hasLocalMaterial) { visualShapeOut.m_rgbaColor[0] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[0]; visualShapeOut.m_rgbaColor[1] = visual->m_geometry.m_localMaterial.m_matColor.m_rgbaColor[1]; diff --git a/src/BulletCollision/CollisionShapes/btSphereShape.h b/src/BulletCollision/CollisionShapes/btSphereShape.h index b192efeeb..50561f7f5 100644 --- a/src/BulletCollision/CollisionShapes/btSphereShape.h +++ b/src/BulletCollision/CollisionShapes/btSphereShape.h @@ -29,8 +29,11 @@ public: btSphereShape (btScalar radius) : btConvexInternalShape () { m_shapeType = SPHERE_SHAPE_PROXYTYPE; + m_localScaling.setValue(1.0, 1.0, 1.0); + m_implicitShapeDimensions.setZero(); m_implicitShapeDimensions.setX(radius); m_collisionMargin = radius; + m_padding = 0; } virtual btVector3 localGetSupportingVertex(const btVector3& vec)const; From 1752aa55ca0dc41e2bb65a96900c67bc95345225 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Sat, 10 Jun 2017 18:46:36 -0700 Subject: [PATCH 4/4] train_pybullet_racecar.py works, self-driving car drives towards the ball using OpenAI baselines DQN :-) See https://www.youtube.com/watch?v=DZ5Px-ocelw for video and how-to-install. --- .../pybullet/gym/enjoy_pybullet_racecar.py | 25 ++----------- examples/pybullet/gym/envs/bullet/racecar.py | 4 +-- .../pybullet/gym/envs/bullet/racecarGymEnv.py | 35 ++++++++++++------- examples/pybullet/gym/racecarGymEnvTest.py | 20 +++++++++-- .../pybullet/gym/train_pybullet_racecar.py | 12 +++---- 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/examples/pybullet/gym/enjoy_pybullet_racecar.py b/examples/pybullet/gym/enjoy_pybullet_racecar.py index b2c46337a..bc965f32d 100644 --- a/examples/pybullet/gym/enjoy_pybullet_racecar.py +++ b/examples/pybullet/gym/enjoy_pybullet_racecar.py @@ -6,7 +6,7 @@ from baselines import deepq def main(): - env = RacecarGymEnv(render=True) + env = RacecarGymEnv(renders=True) act = deepq.load("racecar_model.pkl") print(act) while True: @@ -16,27 +16,8 @@ def main(): print(obs) episode_rew = 0 while not done: - #env.render() - - print("!!!!!!!!!!!!!!!!!!!!!!!!!!") - print("obs") - print(obs) - print("???????????????????????????") - print("obs[None]") - print(obs[None]) - o = obs[None] - print("o") - print(o) - aa = act(o) - print("aa") - print (aa) - a = aa[0] - print("a") - print(a) - obs, rew, done, _ = env.step(a) - print("===================================") - print("obs") - print(obs) + env.render() + obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew) diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py index 9737ede90..96db73c78 100644 --- a/examples/pybullet/gym/envs/bullet/racecar.py +++ b/examples/pybullet/gym/envs/bullet/racecar.py @@ -12,7 +12,7 @@ class Racecar: def reset(self): self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2]) - self.maxForce = 10 + self.maxForce = 20 self.nMotors = 2 self.motorizedwheels=[2] self.inactiveWheels = [3,5,7] @@ -21,7 +21,7 @@ class Racecar: self.motorizedWheels = [2] self.steeringLinks=[4,6] - self.speedMultiplier = 10. + self.speedMultiplier = 4. def getActionDimension(self): diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py index 6e12bdbde..930f9bf88 100644 --- a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py +++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py @@ -16,9 +16,9 @@ class RacecarGymEnv(gym.Env): def __init__(self, urdfRoot="", - actionRepeat=1, + actionRepeat=50, isEnableSelfCollision=True, - render=True): + renders=True): print("init") self._timeStep = 0.01 self._urdfRoot = urdfRoot @@ -27,9 +27,9 @@ class RacecarGymEnv(gym.Env): self._observation = [] self._ballUniqueId = -1 self._envStepCounter = 0 - self._render = render + self._renders = renders self._p = p - if self._render: + if self._renders: p.connect(p.GUI) else: p.connect(p.DIRECT) @@ -49,9 +49,14 @@ class RacecarGymEnv(gym.Env): #p.setPhysicsEngineParameter(numSolverIterations=300) p.setTimeStep(self._timeStep) #p.loadURDF("%splane.urdf" % self._urdfRoot) - p.loadSDF("%sstadium.sdf" % self._urdfRoot) + stadiumobjects = p.loadSDF("%sstadium.sdf" % self._urdfRoot) + #move the stadium objects slightly above 0 + for i in stadiumobjects: + pos,orn = p.getBasePositionAndOrientation(i) + newpos = [pos[0],pos[1],pos[2]+0.1] + p.resetBasePositionAndOrientation(i,newpos,orn) - dist = 1.+10.*random.random() + dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() ballx = dist * math.sin(ang) @@ -75,25 +80,29 @@ class RacecarGymEnv(gym.Env): return [seed] def getExtendedObservation(self): - self._observation = self._racecar.getObservation() - pos,orn = p.getBasePositionAndOrientation(self._ballUniqueId) - self._observation.extend(list(pos)) + self._observation = [] #self._racecar.getObservation() + carpos,carorn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + ballpos,ballorn = p.getBasePositionAndOrientation(self._ballUniqueId) + invCarPos,invCarOrn = p.invertTransform(carpos,carorn) + ballPosInCar,ballOrnInCar = p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn) + + self._observation.extend([ballPosInCar[0],ballPosInCar[1]]) return self._observation def _step(self, action): - if (self._render): + if (self._renders): basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) #p.resetDebugVisualizerCamera(1, 30, -40, basePos) - fwd = [-1,-1,-1,0,0,0,1,1,1] - steerings = [-0.5,0,0.5,-0.5,0,0.5,-0.5,0,0.5] + fwd = [-5,-5,-5,0,0,0,5,5,5] + steerings = [-0.3,0,0.3,-0.3,0,0.3,-0.3,0,0.3] forward = fwd[action] steer = steerings[action] realaction = [forward,steer] self._racecar.applyAction(realaction) for i in range(self._actionRepeat): p.stepSimulation() - if self._render: + if self._renders: time.sleep(self._timeStep) self._observation = self.getExtendedObservation() diff --git a/examples/pybullet/gym/racecarGymEnvTest.py b/examples/pybullet/gym/racecarGymEnvTest.py index 5e96cbcc2..6be102dc3 100644 --- a/examples/pybullet/gym/racecarGymEnvTest.py +++ b/examples/pybullet/gym/racecarGymEnvTest.py @@ -9,6 +9,22 @@ steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0) while (True): targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) steeringAngle = environment._p.readUserDebugParameter(steeringSlider) - - action=[targetVelocity,steeringAngle] + discreteAction = 0 + if (targetVelocity<-0.33): + discreteAction=0 + else: + if (targetVelocity>0.33): + discreteAction=6 + else: + discreteAction=3 + if (steeringAngle>-0.17): + if (steeringAngle>0.17): + discreteAction=discreteAction+2 + else: + discreteAction=discreteAction+1 + + action=discreteAction state, reward, done, info = environment.step(action) + obs = environment.getExtendedObservation() + print("obs") + print(obs) diff --git a/examples/pybullet/gym/train_pybullet_racecar.py b/examples/pybullet/gym/train_pybullet_racecar.py index 3ce064d8d..face83e11 100644 --- a/examples/pybullet/gym/train_pybullet_racecar.py +++ b/examples/pybullet/gym/train_pybullet_racecar.py @@ -9,23 +9,21 @@ import datetime def callback(lcl, glb): # stop training if reward exceeds 199 - is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199 - #uniq_filename = "racecar_model" + str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.') - #print("uniq_filename=") - #print(uniq_filename) - #act.save(uniq_filename) + total = sum(lcl['episode_rewards'][-101:-1]) / 100 + totalt = lcl['t'] + is_solved = totalt > 2000 and total >= -50 return is_solved def main(): - env = RacecarGymEnv(render=False) + env = RacecarGymEnv(renders=False) model = deepq.models.mlp([64]) act = deepq.learn( env, q_func=model, lr=1e-3, - max_timesteps=10000000, + max_timesteps=10000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02,