diff --git a/examples/pybullet/examples/createSphereMultiBodies.py b/examples/pybullet/examples/createSphereMultiBodies.py index b38066518..fb24febb1 100644 --- a/examples/pybullet/examples/createSphereMultiBodies.py +++ b/examples/pybullet/examples/createSphereMultiBodies.py @@ -5,8 +5,6 @@ useMaximalCoordinates = 0 p.connect(p.GUI) p.loadSDF("stadium.sdf",useMaximalCoordinates=useMaximalCoordinates) -p.setPhysicsEngineParameter(numSolverIterations=10) -p.setPhysicsEngineParameter(fixedTimeStep=1./120.) sphereRadius = 0.05 colSphereId = p.createCollisionShape(p.GEOM_SPHERE,radius=sphereRadius) colBoxId = p.createCollisionShape(p.GEOM_BOX,halfExtents=[sphereRadius,sphereRadius,sphereRadius]) diff --git a/examples/pybullet/examples/racecar.py b/examples/pybullet/examples/racecar.py index 2bf744dff..d09f83a4b 100644 --- a/examples/pybullet/examples/racecar.py +++ b/examples/pybullet/examples/racecar.py @@ -20,7 +20,12 @@ car = p.loadURDF("racecar/racecar.urdf") for i in range (p.getNumJoints(car)): print (p.getJointInfo(car,i)) -wheels = [2,3,5,7] +inactive_wheels = [3,5,7] +wheels = [2] + +for wheel in inactive_wheels: + p.setJointMotorControl2(car,wheel,p.VELOCITY_CONTROL,targetVelocity=0,force=0) + steering = [4,6] targetVelocitySlider = p.addUserDebugParameter("wheelVelocity",-10,10,0) diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py index 605d5e531..43b7a50ac 100644 --- a/examples/pybullet/gym/envs/__init__.py +++ b/examples/pybullet/gym/envs/__init__.py @@ -15,3 +15,10 @@ register( timestep_limit=1000, reward_threshold=5.0, ) + +register( + id='RacecarBulletEnv-v0', + entry_point='envs.bullet:RacecarBulletEnv', + timestep_limit=1000, + reward_threshold=5.0, +) diff --git a/examples/pybullet/gym/envs/bullet/racecar.py b/examples/pybullet/gym/envs/bullet/racecar.py new file mode 100644 index 000000000..3ceb923f5 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/racecar.py @@ -0,0 +1,54 @@ +import pybullet as p +import numpy as np +import copy +import math + +class Racecar: + + def __init__(self, urdfRootPath='', timeStep=0.01): + self.urdfRootPath = urdfRootPath + self.timeStep = timeStep + self.reset() + + def reset(self): + self.racecarUniqueId = p.loadURDF("racecar/racecar.urdf", [0,0,.2]) + self.maxForce = 10 + self.nMotors = 2 + self.motorizedwheels=[2] + self.inactiveWheels = [3,5,7] + for wheel in self.inactiveWheels: + p.setJointMotorControl2(self.racecarUniqueId,wheel,p.VELOCITY_CONTROL,targetVelocity=0,force=0) + + self.motorizedWheels = [2] + self.steeringLinks=[4,6] + self.speedMultiplier = 10. + + + def getActionDimension(self): + return self.nMotors + + def getObservationDimension(self): + return len(self.getObservation()) + + def getObservation(self): + observation = [] + pos,orn=p.getBasePositionAndOrientation(self.racecarUniqueId) + observation.extend(list(pos)) + return observation + + def applyAction(self, motorCommands): + targetVelocity=motorCommands[0]*self.speedMultiplier + print("targetVelocity") + print(targetVelocity) + steeringAngle = motorCommands[1] + print("steeringAngle") + print(steeringAngle) + print("maxForce") + print(self.maxForce) + + + for motor in self.motorizedwheels: + p.setJointMotorControl2(self.racecarUniqueId,motor,p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce) + for steer in self.steeringLinks: + p.setJointMotorControl2(self.racecarUniqueId,steer,p.POSITION_CONTROL,targetPosition=steeringAngle) + \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/racecarGymEnv.py b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py new file mode 100644 index 000000000..87c2d0be5 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/racecarGymEnv.py @@ -0,0 +1,108 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import pybullet as p +from . import racecar + +class RacecarGymEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self, + urdfRoot="", + actionRepeat=1, + isEnableSelfCollision=True, + render=True): + print("init") + self._timeStep = 0.01 + self._urdfRoot = urdfRoot + self._actionRepeat = actionRepeat + self._isEnableSelfCollision = isEnableSelfCollision + self._observation = [] + self._ballUniqueId = -1 + self._envStepCounter = 0 + self._render = render + self._p = p + if self._render: + p.connect(p.GUI) + else: + p.connect(p.DIRECT) + self._seed() + self.reset() + observationDim = self._racecar.getObservationDimension() + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + actionDim = 8 + action_high = np.array([1] * actionDim) + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(-observation_high, observation_high) + self.viewer = None + + def _reset(self): + p.resetSimulation() + #p.setPhysicsEngineParameter(numSolverIterations=300) + p.setTimeStep(self._timeStep) + #p.loadURDF("%splane.urdf" % self._urdfRoot) + p.loadSDF("%sstadium.sdf" % self._urdfRoot) + + self._ballUniqueId = p.loadURDF("sphere2.urdf",[20,20,1]) + p.setGravity(0,0,-10) + self._racecar = racecar.Racecar(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) + self._envStepCounter = 0 + for i in range(100): + p.stepSimulation() + self._observation = self._racecar.getObservation() + return self._observation + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _step(self, action): + if (self._render): + basePos,orn = p.getBasePositionAndOrientation(self._racecar.racecarUniqueId) + p.resetDebugVisualizerCamera(1, 30, -40, basePos) + + if len(action) != self._racecar.getActionDimension(): + raise ValueError("We expect {} continuous action not {}.".format(self._racecar.getActionDimension(), len(action))) + + for i in range(len(action)): + if not -1.01 <= action[i] <= 1.01: + raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i])) + + self._racecar.applyAction(action) + for i in range(self._actionRepeat): + p.stepSimulation() + if self._render: + time.sleep(self._timeStep) + self._observation = self._racecar.getObservation() + if self._termination(): + break + self._envStepCounter += 1 + reward = self._reward() + done = self._termination() + return np.array(self._observation), reward, done, {} + + def _render(self, mode='human', close=False): + return + + def _termination(self): + return False + + def _reward(self): + closestPoints = p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000) + numPt = len(closestPoints) + reward=-1000 + print(numPt) + if (numPt>0): + print("reward:") + reward = closestPoints[0][8] + print(reward) + return reward diff --git a/examples/pybullet/gym/racecarGymEnvTest.py b/examples/pybullet/gym/racecarGymEnvTest.py new file mode 100644 index 000000000..5e96cbcc2 --- /dev/null +++ b/examples/pybullet/gym/racecarGymEnvTest.py @@ -0,0 +1,14 @@ + +from envs.bullet.racecarGymEnv import RacecarGymEnv +print ("hello") +environment = RacecarGymEnv(render=True) + +targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0) +steeringSlider = environment._p.addUserDebugParameter("steering",-0.5,0.5,0) + +while (True): + targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) + steeringAngle = environment._p.readUserDebugParameter(steeringSlider) + + action=[targetVelocity,steeringAngle] + state, reward, done, info = environment.step(action)