diff --git a/examples/pybullet/gym/cartpole_bullet_gym_example.py b/examples/pybullet/gym/cartpole_bullet_gym_example.py new file mode 100644 index 000000000..52cdab258 --- /dev/null +++ b/examples/pybullet/gym/cartpole_bullet_gym_example.py @@ -0,0 +1,31 @@ +"""One-line documentation for gym_example module. + +A detailed description of gym_example. +""" + +import gym +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +import setuptools +import time +import numpy as np + + +w = [0.3, 0.02, 0.02, 0.012] + +def main(): + env = gym.make('CartPoleBulletEnv-v0') + for i_episode in range(1): + observation = env.reset() + done = False + t = 0 + while not done: + print(observation) + action = np.array([np.inner(observation, w)]) + print(action) + observation, reward, done, info = env.step(action) + t = t + 1 + if done: + print("Episode finished after {} timesteps".format(t+1)) + break + +main() diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py new file mode 100644 index 000000000..605d5e531 --- /dev/null +++ b/examples/pybullet/gym/envs/__init__.py @@ -0,0 +1,17 @@ +from gym.envs.registration import registry, register, make, spec + +# ------------bullet------------- + +register( + id='CartPoleBulletEnv-v0', + entry_point='envs.bullet:CartPoleBulletEnv', + timestep_limit=1000, + reward_threshold=950.0, +) + +register( + id='MinitaurBulletEnv-v0', + entry_point='envs.bullet:MinitaurBulletEnv', + timestep_limit=1000, + reward_threshold=5.0, +) diff --git a/examples/pybullet/gym/envs/bullet/__init__.py b/examples/pybullet/gym/envs/bullet/__init__.py new file mode 100644 index 000000000..25da62f87 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/__init__.py @@ -0,0 +1,2 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from envs.bullet.minitaur_bullet import MinitaurBulletEnv diff --git a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py new file mode 100644 index 000000000..bf0d85ab9 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py @@ -0,0 +1,94 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c +""" +import os +import logging +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import subprocess +import pybullet as p + + +logger = logging.getLogger(__name__) + +class CartPoleBulletEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + # start the bullet physics server +# cmdStartBulletServer=['/Users/jietan/Projects/bullet3/build_cmake_python3/examples/SharedMemory/App_SharedMemoryPhysics_GUI'] +# subprocess.Popen(cmdStartBulletServer) + # wait to make sure that the physics server is ready +# time.sleep(1) + # connect to the physics server +# p.connect(p.SHARED_MEMORY) + p.connect(p.GUI) + observation_high = np.array([ + np.finfo(np.float32).max, + np.finfo(np.float32).max, + np.finfo(np.float32).max, + np.finfo(np.float32).max]) + action_high = np.array([0.1]) + + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(-observation_high, observation_high) + + self.theta_threshold_radians = 1 + self.x_threshold = 2.4 + self._seed() +# self.reset() + self.viewer = None + self._configure() + + def _configure(self, display=None): + self.display = display + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _step(self, action): + p.stepSimulation() +# time.sleep(self.timeStep) + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] + theta, theta_dot, x, x_dot = self.state + force = action + p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(action + self.state[3])) + + done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta < -self.theta_threshold_radians \ + or theta > self.theta_threshold_radians + reward = 1.0 + + return np.array(self.state), reward, done, {} + + def _reset(self): +# print("-----------reset simulation---------------") + p.resetSimulation() + self.cartpole = p.loadURDF("cartpole.urdf",[0,0,0]) + self.timeStep = 0.01 + p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0) + p.setGravity(0,0, -10) + p.setTimeStep(self.timeStep) + p.setRealTimeSimulation(0) + + initialCartPos = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) + initialAngle = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) + p.resetJointState(self.cartpole, 1, initialAngle) + p.resetJointState(self.cartpole, 0, initialCartPos) + + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] + + return np.array(self.state) + + def _render(self, mode='human', close=False): + return diff --git a/examples/pybullet/gym/envs/bullet/minitaur.py b/examples/pybullet/gym/envs/bullet/minitaur.py new file mode 100644 index 000000000..74ddedf84 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/minitaur.py @@ -0,0 +1,142 @@ +import pybullet as p +import numpy as np + +class Minitaur: + def __init__(self, urdfRootPath=''): + self.urdfRootPath = urdfRootPath + self.reset() + + def applyAction(self, motorCommands): + motorCommandsWithDir = np.multiply(motorCommands, self.motorDir) + for i in range(self.nMotors): + self.setMotorAngleById(self.motorIdList[i], motorCommandsWithDir[i]) + + def getObservation(self): + observation = [] + observation.extend(self.getMotorAngles().tolist()) + observation.extend(self.getMotorVelocities().tolist()) + observation.extend(self.getMotorTorques().tolist()) + observation.extend(list(self.getBaseOrientation())) + observation.extend(list(self.getBasePosition())) + return observation + + def getActionDimension(self): + return self.nMotors + + def getObservationDimension(self): + return len(self.getObservation()) + + def buildJointNameToIdDict(self): + nJoints = p.getNumJoints(self.quadruped) + self.jointNameToId = {} + for i in range(nJoints): + jointInfo = p.getJointInfo(self.quadruped, i) + self.jointNameToId[jointInfo[1].decode('UTF-8')] = jointInfo[0] + self.resetPose() + for i in range(100): + p.stepSimulation() + + def buildMotorIdList(self): + self.motorIdList.append(self.jointNameToId['motor_front_leftR_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_leftL_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_leftR_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_leftL_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_rightL_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_rightR_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_rightL_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint']) + + + def reset(self): + self.quadruped = p.loadURDF("%s/quadruped/quadruped.urdf" % self.urdfRootPath,0,0,.3) + self.kp = 1 + self.kd = 0.1 + self.maxForce = 3.5 + self.nMotors = 8 + self.motorIdList = [] + self.motorDir = [1, -1, 1, -1, -1, 1, -1, 1] + self.buildJointNameToIdDict() + self.buildMotorIdList() + + + def disableAllMotors(self): + nJoints = p.getNumJoints(self.quadruped) + for i in range(nJoints): + p.setJointMotorControl2(bodyIndex=self.quadruped, jointIndex=i, controlMode=p.VELOCITY_CONTROL, force=0) + + def setMotorAngleById(self, motorId, desiredAngle): + p.setJointMotorControl2(bodyIndex=self.quadruped, jointIndex=motorId, controlMode=p.POSITION_CONTROL, targetPosition=desiredAngle, positionGain=self.kp, velocityGain=self.kd, force=self.maxForce) + + def setMotorAngleByName(self, motorName, desiredAngle): + self.setMotorAngleById(self.jointNameToId[motorName], desiredAngle) + + + def resetPose(self): + #right front leg + self.disableAllMotors() + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_rightR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_rightR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_rightL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_rightL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_front_rightR_link'],self.quadruped,self.jointNameToId['knee_front_rightL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,0.01,0.2],[0,-0.015,0.2]) + self.setMotorAngleByName('motor_front_rightR_joint', 1.57) + self.setMotorAngleByName('motor_front_rightL_joint',-1.57) + + #left front leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_leftR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_leftR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_leftL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_leftL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_front_leftR_link'],self.quadruped,self.jointNameToId['knee_front_leftL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,-0.01,0.2],[0,0.015,0.2]) + self.setMotorAngleByName('motor_front_leftR_joint', 1.57) + self.setMotorAngleByName('motor_front_leftL_joint',-1.57) + + #right back leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_rightR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_rightR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_rightL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_rightL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_back_rightR_link'],self.quadruped,self.jointNameToId['knee_back_rightL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,0.01,0.2],[0,-0.015,0.2]) + self.setMotorAngleByName('motor_back_rightR_joint', 1.57) + self.setMotorAngleByName('motor_back_rightL_joint',-1.57) + + #left back leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_leftR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_leftR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_leftL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_leftL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_back_leftR_link'],self.quadruped,self.jointNameToId['knee_back_leftL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,-0.01,0.2],[0,0.015,0.2]) + self.setMotorAngleByName('motor_back_leftR_joint', 1.57) + self.setMotorAngleByName('motor_back_leftL_joint',-1.57) + + def getBasePosition(self): + position, orientation = p.getBasePositionAndOrientation(self.quadruped) + return position + + def getBaseOrientation(self): + position, orientation = p.getBasePositionAndOrientation(self.quadruped) + return orientation + + def getMotorAngles(self): + motorAngles = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorAngles.append(jointState[0]) + motorAngles = np.multiply(motorAngles, self.motorDir) + return motorAngles + + def getMotorVelocities(self): + motorVelocities = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorVelocities.append(jointState[1]) + motorVelocities = np.multiply(motorVelocities, self.motorDir) + return motorVelocities + + def getMotorTorques(self): + motorTorques = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorTorques.append(jointState[3]) + motorTorques = np.multiply(motorTorques, self.motorDir) + return motorTorques diff --git a/examples/pybullet/gym/envs/bullet/minitaur_bullet.py b/examples/pybullet/gym/envs/bullet/minitaur_bullet.py new file mode 100644 index 000000000..cd4d5f658 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/minitaur_bullet.py @@ -0,0 +1,90 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time + +import pybullet as p +from envs.bullet.minitaur import Minitaur + +class MinitaurBulletEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + self._timeStep = 0.01 + self._observation = [] + self._envStepCounter = 0 + self._lastBasePosition = [0, 0, 0] + + p.connect(p.GUI) + + p.resetSimulation() + p.setTimeStep(self._timeStep) + p.loadURDF("plane.urdf") + p.setGravity(0,0,-10) + self._minitaur = Minitaur() + + observationDim = self._minitaur.getObservationDimension() + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + actionDim = 8 + action_high = np.array([math.pi / 2.0] * actionDim) + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(-observation_high, observation_high) + + self._seed() + self.reset() + self.viewer = None + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _step(self, action): + if len(action) != self._minitaur.getActionDimension(): + raise ValueError("We expect {} continuous action not {}.".format(self._minitaur.getActionDimension(), len(actions.continuous_actions))) + + for i in range(len(action)): + if not -math.pi/2 <= action[i] <= math.pi/2: + raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i])) + action[i] += math.pi / 2 + + self._minitaur.applyAction(action) + p.stepSimulation() + self._observation = self._minitaur.getObservation() + self._envStepCounter += 1 + reward = self._reward() + done = self._termination() + return np.array(self._observation), reward, done, {} + + def _reset(self): + p.resetSimulation() + p.setTimeStep(self._timeStep) + p.loadURDF("plane.urdf") + p.setGravity(0,0,-10) + self._minitaur = Minitaur() + self._observation = self._minitaur.getObservation() + + def _render(self, mode='human', close=False): + return + + def is_fallen(self): + orientation = self._minitaur.getBaseOrientation() + rotMat = p.getMatrixFromQuaternion(orientation) + localUp = rotMat[6:] + return np.dot(np.asarray([0, 0, 1]), np.asarray(localUp)) < 0 + + def _termination(self): + return self.is_fallen() + + def _reward(self): + currentBasePosition = self._minitaur.getBasePosition() + reward = np.dot(np.asarray([-1, 0, 0]), np.asarray(currentBasePosition) - np.asarray(self._lastBasePosition)) + self._lastBasePosition = currentBasePosition + return reward diff --git a/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py b/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py new file mode 100644 index 000000000..4da575341 --- /dev/null +++ b/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py @@ -0,0 +1,27 @@ +"""One-line documentation for gym_example module. + +A detailed description of gym_example. +""" + +import gym +from envs.bullet.minitaur import MinitaurWalkEnv +import setuptools +import time +import numpy as np + + +def main(): + env = gym.make('MinitaurWalkEnv-v0') + for i_episode in range(1): + observation = env.reset() + done = False + while not done: + print(observation) + action = np.array([1.3, 0, 0, 0, 1.3, 0, 0, 0, 1.3, 3.14, 0, 0, 1.3, 3.14, 0, 0]) + print(action) + observation, reward, done, info = env.step(action) + if done: + print("Episode finished after {} timesteps".format(t+1)) + break + +main() diff --git a/examples/pybullet/gym/minitaur_bullet_gym_example.py b/examples/pybullet/gym/minitaur_bullet_gym_example.py new file mode 100644 index 000000000..18c70d43e --- /dev/null +++ b/examples/pybullet/gym/minitaur_bullet_gym_example.py @@ -0,0 +1,27 @@ +import gym +import numpy as np +import math + +from envs.bullet.minitaur_bullet import MinitaurBulletEnv + +def main(): + environment = gym.make('MinitaurBulletEnv-v0') + sum_reward = 0 + steps = 1000 + amplitude = 0.5 + speed = 0.3 + + for stepCounter in range(steps): + a1 = math.sin(stepCounter*speed)*amplitude + a2 = math.sin(stepCounter*speed+3.14)*amplitude + action = [a1, 0, a2, 0, 0, a1, 0, a2] + state, reward, done, info = environment.step(action) + sum_reward += reward + print(state) + if done: + environment.reset() + average_reward = sum_reward / steps + print("avg reward: ", average_reward) + + +main()