From 923fbe858851be28e5c523283b5ba2bdeb0333d8 Mon Sep 17 00:00:00 2001 From: Jie Tan Date: Fri, 10 Mar 2017 11:22:38 -0800 Subject: [PATCH 1/3] add gym examples --- .../gym/cartpole_bullet_gym_example.py | 31 ++++ examples/pybullet/gym/envs/__init__.py | 17 +++ examples/pybullet/gym/envs/bullet/__init__.py | 2 + .../gym/envs/bullet/cartpole_bullet.py | 94 ++++++++++++ examples/pybullet/gym/envs/bullet/minitaur.py | 142 ++++++++++++++++++ .../gym/envs/bullet/minitaur_bullet.py | 90 +++++++++++ .../gym/minitaurWalk_bullet_gym_example.py | 27 ++++ .../gym/minitaur_bullet_gym_example.py | 27 ++++ 8 files changed, 430 insertions(+) create mode 100644 examples/pybullet/gym/cartpole_bullet_gym_example.py create mode 100644 examples/pybullet/gym/envs/__init__.py create mode 100644 examples/pybullet/gym/envs/bullet/__init__.py create mode 100644 examples/pybullet/gym/envs/bullet/cartpole_bullet.py create mode 100644 examples/pybullet/gym/envs/bullet/minitaur.py create mode 100644 examples/pybullet/gym/envs/bullet/minitaur_bullet.py create mode 100644 examples/pybullet/gym/minitaurWalk_bullet_gym_example.py create mode 100644 examples/pybullet/gym/minitaur_bullet_gym_example.py diff --git a/examples/pybullet/gym/cartpole_bullet_gym_example.py b/examples/pybullet/gym/cartpole_bullet_gym_example.py new file mode 100644 index 000000000..52cdab258 --- /dev/null +++ b/examples/pybullet/gym/cartpole_bullet_gym_example.py @@ -0,0 +1,31 @@ +"""One-line documentation for gym_example module. + +A detailed description of gym_example. +""" + +import gym +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +import setuptools +import time +import numpy as np + + +w = [0.3, 0.02, 0.02, 0.012] + +def main(): + env = gym.make('CartPoleBulletEnv-v0') + for i_episode in range(1): + observation = env.reset() + done = False + t = 0 + while not done: + print(observation) + action = np.array([np.inner(observation, w)]) + print(action) + observation, reward, done, info = env.step(action) + t = t + 1 + if done: + print("Episode finished after {} timesteps".format(t+1)) + break + +main() diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py new file mode 100644 index 000000000..605d5e531 --- /dev/null +++ b/examples/pybullet/gym/envs/__init__.py @@ -0,0 +1,17 @@ +from gym.envs.registration import registry, register, make, spec + +# ------------bullet------------- + +register( + id='CartPoleBulletEnv-v0', + entry_point='envs.bullet:CartPoleBulletEnv', + timestep_limit=1000, + reward_threshold=950.0, +) + +register( + id='MinitaurBulletEnv-v0', + entry_point='envs.bullet:MinitaurBulletEnv', + timestep_limit=1000, + reward_threshold=5.0, +) diff --git a/examples/pybullet/gym/envs/bullet/__init__.py b/examples/pybullet/gym/envs/bullet/__init__.py new file mode 100644 index 000000000..25da62f87 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/__init__.py @@ -0,0 +1,2 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from envs.bullet.minitaur_bullet import MinitaurBulletEnv diff --git a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py new file mode 100644 index 000000000..bf0d85ab9 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py @@ -0,0 +1,94 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c +""" +import os +import logging +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import subprocess +import pybullet as p + + +logger = logging.getLogger(__name__) + +class CartPoleBulletEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + # start the bullet physics server +# cmdStartBulletServer=['/Users/jietan/Projects/bullet3/build_cmake_python3/examples/SharedMemory/App_SharedMemoryPhysics_GUI'] +# subprocess.Popen(cmdStartBulletServer) + # wait to make sure that the physics server is ready +# time.sleep(1) + # connect to the physics server +# p.connect(p.SHARED_MEMORY) + p.connect(p.GUI) + observation_high = np.array([ + np.finfo(np.float32).max, + np.finfo(np.float32).max, + np.finfo(np.float32).max, + np.finfo(np.float32).max]) + action_high = np.array([0.1]) + + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(-observation_high, observation_high) + + self.theta_threshold_radians = 1 + self.x_threshold = 2.4 + self._seed() +# self.reset() + self.viewer = None + self._configure() + + def _configure(self, display=None): + self.display = display + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _step(self, action): + p.stepSimulation() +# time.sleep(self.timeStep) + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] + theta, theta_dot, x, x_dot = self.state + force = action + p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, targetVelocity=(action + self.state[3])) + + done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta < -self.theta_threshold_radians \ + or theta > self.theta_threshold_radians + reward = 1.0 + + return np.array(self.state), reward, done, {} + + def _reset(self): +# print("-----------reset simulation---------------") + p.resetSimulation() + self.cartpole = p.loadURDF("cartpole.urdf",[0,0,0]) + self.timeStep = 0.01 + p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0) + p.setGravity(0,0, -10) + p.setTimeStep(self.timeStep) + p.setRealTimeSimulation(0) + + initialCartPos = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) + initialAngle = self.np_random.uniform(low=-0.5, high=0.5, size=(1,)) + p.resetJointState(self.cartpole, 1, initialAngle) + p.resetJointState(self.cartpole, 0, initialCartPos) + + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] + + return np.array(self.state) + + def _render(self, mode='human', close=False): + return diff --git a/examples/pybullet/gym/envs/bullet/minitaur.py b/examples/pybullet/gym/envs/bullet/minitaur.py new file mode 100644 index 000000000..74ddedf84 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/minitaur.py @@ -0,0 +1,142 @@ +import pybullet as p +import numpy as np + +class Minitaur: + def __init__(self, urdfRootPath=''): + self.urdfRootPath = urdfRootPath + self.reset() + + def applyAction(self, motorCommands): + motorCommandsWithDir = np.multiply(motorCommands, self.motorDir) + for i in range(self.nMotors): + self.setMotorAngleById(self.motorIdList[i], motorCommandsWithDir[i]) + + def getObservation(self): + observation = [] + observation.extend(self.getMotorAngles().tolist()) + observation.extend(self.getMotorVelocities().tolist()) + observation.extend(self.getMotorTorques().tolist()) + observation.extend(list(self.getBaseOrientation())) + observation.extend(list(self.getBasePosition())) + return observation + + def getActionDimension(self): + return self.nMotors + + def getObservationDimension(self): + return len(self.getObservation()) + + def buildJointNameToIdDict(self): + nJoints = p.getNumJoints(self.quadruped) + self.jointNameToId = {} + for i in range(nJoints): + jointInfo = p.getJointInfo(self.quadruped, i) + self.jointNameToId[jointInfo[1].decode('UTF-8')] = jointInfo[0] + self.resetPose() + for i in range(100): + p.stepSimulation() + + def buildMotorIdList(self): + self.motorIdList.append(self.jointNameToId['motor_front_leftR_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_leftL_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_leftR_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_leftL_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_rightL_joint']) + self.motorIdList.append(self.jointNameToId['motor_front_rightR_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_rightL_joint']) + self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint']) + + + def reset(self): + self.quadruped = p.loadURDF("%s/quadruped/quadruped.urdf" % self.urdfRootPath,0,0,.3) + self.kp = 1 + self.kd = 0.1 + self.maxForce = 3.5 + self.nMotors = 8 + self.motorIdList = [] + self.motorDir = [1, -1, 1, -1, -1, 1, -1, 1] + self.buildJointNameToIdDict() + self.buildMotorIdList() + + + def disableAllMotors(self): + nJoints = p.getNumJoints(self.quadruped) + for i in range(nJoints): + p.setJointMotorControl2(bodyIndex=self.quadruped, jointIndex=i, controlMode=p.VELOCITY_CONTROL, force=0) + + def setMotorAngleById(self, motorId, desiredAngle): + p.setJointMotorControl2(bodyIndex=self.quadruped, jointIndex=motorId, controlMode=p.POSITION_CONTROL, targetPosition=desiredAngle, positionGain=self.kp, velocityGain=self.kd, force=self.maxForce) + + def setMotorAngleByName(self, motorName, desiredAngle): + self.setMotorAngleById(self.jointNameToId[motorName], desiredAngle) + + + def resetPose(self): + #right front leg + self.disableAllMotors() + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_rightR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_rightR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_rightL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_rightL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_front_rightR_link'],self.quadruped,self.jointNameToId['knee_front_rightL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,0.01,0.2],[0,-0.015,0.2]) + self.setMotorAngleByName('motor_front_rightR_joint', 1.57) + self.setMotorAngleByName('motor_front_rightL_joint',-1.57) + + #left front leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_leftR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_leftR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_front_leftL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_front_leftL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_front_leftR_link'],self.quadruped,self.jointNameToId['knee_front_leftL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,-0.01,0.2],[0,0.015,0.2]) + self.setMotorAngleByName('motor_front_leftR_joint', 1.57) + self.setMotorAngleByName('motor_front_leftL_joint',-1.57) + + #right back leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_rightR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_rightR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_rightL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_rightL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_back_rightR_link'],self.quadruped,self.jointNameToId['knee_back_rightL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,0.01,0.2],[0,-0.015,0.2]) + self.setMotorAngleByName('motor_back_rightR_joint', 1.57) + self.setMotorAngleByName('motor_back_rightL_joint',-1.57) + + #left back leg + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_leftR_joint'],1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_leftR_link'],-2.2) + p.resetJointState(self.quadruped,self.jointNameToId['motor_back_leftL_joint'],-1.57) + p.resetJointState(self.quadruped,self.jointNameToId['knee_back_leftL_link'],2.2) + p.createConstraint(self.quadruped,self.jointNameToId['knee_back_leftR_link'],self.quadruped,self.jointNameToId['knee_back_leftL_link'],p.JOINT_POINT2POINT,[0,0,0],[0,-0.01,0.2],[0,0.015,0.2]) + self.setMotorAngleByName('motor_back_leftR_joint', 1.57) + self.setMotorAngleByName('motor_back_leftL_joint',-1.57) + + def getBasePosition(self): + position, orientation = p.getBasePositionAndOrientation(self.quadruped) + return position + + def getBaseOrientation(self): + position, orientation = p.getBasePositionAndOrientation(self.quadruped) + return orientation + + def getMotorAngles(self): + motorAngles = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorAngles.append(jointState[0]) + motorAngles = np.multiply(motorAngles, self.motorDir) + return motorAngles + + def getMotorVelocities(self): + motorVelocities = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorVelocities.append(jointState[1]) + motorVelocities = np.multiply(motorVelocities, self.motorDir) + return motorVelocities + + def getMotorTorques(self): + motorTorques = [] + for i in range(self.nMotors): + jointState = p.getJointState(self.quadruped, self.motorIdList[i]) + motorTorques.append(jointState[3]) + motorTorques = np.multiply(motorTorques, self.motorDir) + return motorTorques diff --git a/examples/pybullet/gym/envs/bullet/minitaur_bullet.py b/examples/pybullet/gym/envs/bullet/minitaur_bullet.py new file mode 100644 index 000000000..cd4d5f658 --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/minitaur_bullet.py @@ -0,0 +1,90 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time + +import pybullet as p +from envs.bullet.minitaur import Minitaur + +class MinitaurBulletEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + self._timeStep = 0.01 + self._observation = [] + self._envStepCounter = 0 + self._lastBasePosition = [0, 0, 0] + + p.connect(p.GUI) + + p.resetSimulation() + p.setTimeStep(self._timeStep) + p.loadURDF("plane.urdf") + p.setGravity(0,0,-10) + self._minitaur = Minitaur() + + observationDim = self._minitaur.getObservationDimension() + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + actionDim = 8 + action_high = np.array([math.pi / 2.0] * actionDim) + self.action_space = spaces.Box(-action_high, action_high) + self.observation_space = spaces.Box(-observation_high, observation_high) + + self._seed() + self.reset() + self.viewer = None + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _step(self, action): + if len(action) != self._minitaur.getActionDimension(): + raise ValueError("We expect {} continuous action not {}.".format(self._minitaur.getActionDimension(), len(actions.continuous_actions))) + + for i in range(len(action)): + if not -math.pi/2 <= action[i] <= math.pi/2: + raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i])) + action[i] += math.pi / 2 + + self._minitaur.applyAction(action) + p.stepSimulation() + self._observation = self._minitaur.getObservation() + self._envStepCounter += 1 + reward = self._reward() + done = self._termination() + return np.array(self._observation), reward, done, {} + + def _reset(self): + p.resetSimulation() + p.setTimeStep(self._timeStep) + p.loadURDF("plane.urdf") + p.setGravity(0,0,-10) + self._minitaur = Minitaur() + self._observation = self._minitaur.getObservation() + + def _render(self, mode='human', close=False): + return + + def is_fallen(self): + orientation = self._minitaur.getBaseOrientation() + rotMat = p.getMatrixFromQuaternion(orientation) + localUp = rotMat[6:] + return np.dot(np.asarray([0, 0, 1]), np.asarray(localUp)) < 0 + + def _termination(self): + return self.is_fallen() + + def _reward(self): + currentBasePosition = self._minitaur.getBasePosition() + reward = np.dot(np.asarray([-1, 0, 0]), np.asarray(currentBasePosition) - np.asarray(self._lastBasePosition)) + self._lastBasePosition = currentBasePosition + return reward diff --git a/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py b/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py new file mode 100644 index 000000000..4da575341 --- /dev/null +++ b/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py @@ -0,0 +1,27 @@ +"""One-line documentation for gym_example module. + +A detailed description of gym_example. +""" + +import gym +from envs.bullet.minitaur import MinitaurWalkEnv +import setuptools +import time +import numpy as np + + +def main(): + env = gym.make('MinitaurWalkEnv-v0') + for i_episode in range(1): + observation = env.reset() + done = False + while not done: + print(observation) + action = np.array([1.3, 0, 0, 0, 1.3, 0, 0, 0, 1.3, 3.14, 0, 0, 1.3, 3.14, 0, 0]) + print(action) + observation, reward, done, info = env.step(action) + if done: + print("Episode finished after {} timesteps".format(t+1)) + break + +main() diff --git a/examples/pybullet/gym/minitaur_bullet_gym_example.py b/examples/pybullet/gym/minitaur_bullet_gym_example.py new file mode 100644 index 000000000..18c70d43e --- /dev/null +++ b/examples/pybullet/gym/minitaur_bullet_gym_example.py @@ -0,0 +1,27 @@ +import gym +import numpy as np +import math + +from envs.bullet.minitaur_bullet import MinitaurBulletEnv + +def main(): + environment = gym.make('MinitaurBulletEnv-v0') + sum_reward = 0 + steps = 1000 + amplitude = 0.5 + speed = 0.3 + + for stepCounter in range(steps): + a1 = math.sin(stepCounter*speed)*amplitude + a2 = math.sin(stepCounter*speed+3.14)*amplitude + action = [a1, 0, a2, 0, 0, a1, 0, a2] + state, reward, done, info = environment.step(action) + sum_reward += reward + print(state) + if done: + environment.reset() + average_reward = sum_reward / steps + print("avg reward: ", average_reward) + + +main() From 37a809f5d14f56314ea86b394b9d301b7c880d29 Mon Sep 17 00:00:00 2001 From: Jie Tan Date: Fri, 10 Mar 2017 12:29:47 -0800 Subject: [PATCH 2/3] added the learning algorithm from RL-lab --- .../gym/envs/bullet/cartpole_bullet.py | 7 +-- .../gym/minitaur_bullet_gym_example.py | 27 ---------- .../pybullet/gym/trpo_cartpole_bullet_gym.py | 51 +++++++++++++++++++ .../gym/trpo_tf_cartpole_bullet_gym.py | 48 +++++++++++++++++ 4 files changed, 100 insertions(+), 33 deletions(-) delete mode 100644 examples/pybullet/gym/minitaur_bullet_gym_example.py create mode 100644 examples/pybullet/gym/trpo_cartpole_bullet_gym.py create mode 100644 examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py diff --git a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py index bf0d85ab9..b1f1a1e35 100644 --- a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py +++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py @@ -24,13 +24,8 @@ class CartPoleBulletEnv(gym.Env): def __init__(self): # start the bullet physics server -# cmdStartBulletServer=['/Users/jietan/Projects/bullet3/build_cmake_python3/examples/SharedMemory/App_SharedMemoryPhysics_GUI'] -# subprocess.Popen(cmdStartBulletServer) - # wait to make sure that the physics server is ready -# time.sleep(1) - # connect to the physics server -# p.connect(p.SHARED_MEMORY) p.connect(p.GUI) +# p.connect(p.DIRECT) observation_high = np.array([ np.finfo(np.float32).max, np.finfo(np.float32).max, diff --git a/examples/pybullet/gym/minitaur_bullet_gym_example.py b/examples/pybullet/gym/minitaur_bullet_gym_example.py deleted file mode 100644 index 18c70d43e..000000000 --- a/examples/pybullet/gym/minitaur_bullet_gym_example.py +++ /dev/null @@ -1,27 +0,0 @@ -import gym -import numpy as np -import math - -from envs.bullet.minitaur_bullet import MinitaurBulletEnv - -def main(): - environment = gym.make('MinitaurBulletEnv-v0') - sum_reward = 0 - steps = 1000 - amplitude = 0.5 - speed = 0.3 - - for stepCounter in range(steps): - a1 = math.sin(stepCounter*speed)*amplitude - a2 = math.sin(stepCounter*speed+3.14)*amplitude - action = [a1, 0, a2, 0, 0, a1, 0, a2] - state, reward, done, info = environment.step(action) - sum_reward += reward - print(state) - if done: - environment.reset() - average_reward = sum_reward / steps - print("avg reward: ", average_reward) - - -main() diff --git a/examples/pybullet/gym/trpo_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py new file mode 100644 index 000000000..4c257cf6b --- /dev/null +++ b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py @@ -0,0 +1,51 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from rllab.algos.trpo import TRPO +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.gym_env import GymEnv +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import stub, run_experiment_lite +from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy +import subprocess +import time + +stub(globals()) + +env = normalize(GymEnv("CartPoleBulletEnv-v0")) + +policy = GaussianMLPPolicy( + env_spec=env.spec, + # The neural network policy should have two hidden layers, each with 32 hidden units. + hidden_sizes=(8,) +) + +baseline = LinearFeatureBaseline(env_spec=env.spec) + +algo = TRPO( + env=env, + policy=policy, + baseline=baseline, + batch_size=5000, + max_path_length=env.horizon, + n_itr=50, + discount=0.999, + step_size=0.01, + # Uncomment both lines (this and the plot parameter below) to enable plotting +# plot=True, +) + +#cmdStartBulletServer=['~/Projects/rllab/bullet_examples/run_physics_server.sh'] +#subprocess.Popen(cmdStartBulletServer, shell=True) +#time.sleep(1) + + +run_experiment_lite( + algo.train(), + # Number of parallel workers for sampling + n_parallel=1, + # Only keep the snapshot parameters for the last iteration + snapshot_mode="last", + # Specifies the seed for the experiment. If this is not provided, a random seed + # will be used + seed=1, + # plot=True, +) diff --git a/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py new file mode 100644 index 000000000..0e6c1353b --- /dev/null +++ b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py @@ -0,0 +1,48 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from sandbox.rocky.tf.algos.trpo import TRPO +from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy +from sandbox.rocky.tf.envs.base import TfEnv + +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.gym_env import GymEnv +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import stub, run_experiment_lite + +stub(globals()) + +env = TfEnv(normalize(GymEnv("CartPoleBulletEnv-v0"))) + +policy = GaussianMLPPolicy( + name = "tf_gaussian_mlp", + env_spec=env.spec, + # The neural network policy should have two hidden layers, each with 32 hidden units. + hidden_sizes=(8,) +) + +baseline = LinearFeatureBaseline(env_spec=env.spec) + +algo = TRPO( + env=env, + policy=policy, + baseline=baseline, + batch_size=5000, + max_path_length=env.horizon, + n_itr=50, + discount=0.999, + step_size=0.01, + force_batch_sampler=True, + # Uncomment both lines (this and the plot parameter below) to enable plotting + #plot=True, +) + +run_experiment_lite( + algo.train(), + # Number of parallel workers for sampling + n_parallel=1, + # Only keep the snapshot parameters for the last iteration + snapshot_mode="last", + # Specifies the seed for the experiment. If this is not provided, a random seed + # will be used + seed=1, + #plot=True, +) From 8c27a62e042cbf444c829f4ae97d9d3bdabaff51 Mon Sep 17 00:00:00 2001 From: Jie Tan Date: Fri, 10 Mar 2017 13:41:05 -0800 Subject: [PATCH 3/3] remove the stub calls from rllab --- .../gym/minitaurWalk_bullet_gym_example.py | 27 ------------------- .../pybullet/gym/trpo_cartpole_bullet_gym.py | 22 +-------------- .../gym/trpo_tf_cartpole_bullet_gym.py | 15 +---------- 3 files changed, 2 insertions(+), 62 deletions(-) delete mode 100644 examples/pybullet/gym/minitaurWalk_bullet_gym_example.py diff --git a/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py b/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py deleted file mode 100644 index 4da575341..000000000 --- a/examples/pybullet/gym/minitaurWalk_bullet_gym_example.py +++ /dev/null @@ -1,27 +0,0 @@ -"""One-line documentation for gym_example module. - -A detailed description of gym_example. -""" - -import gym -from envs.bullet.minitaur import MinitaurWalkEnv -import setuptools -import time -import numpy as np - - -def main(): - env = gym.make('MinitaurWalkEnv-v0') - for i_episode in range(1): - observation = env.reset() - done = False - while not done: - print(observation) - action = np.array([1.3, 0, 0, 0, 1.3, 0, 0, 0, 1.3, 3.14, 0, 0, 1.3, 3.14, 0, 0]) - print(action) - observation, reward, done, info = env.step(action) - if done: - print("Episode finished after {} timesteps".format(t+1)) - break - -main() diff --git a/examples/pybullet/gym/trpo_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py index 4c257cf6b..bac943217 100644 --- a/examples/pybullet/gym/trpo_cartpole_bullet_gym.py +++ b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py @@ -3,12 +3,7 @@ from rllab.algos.trpo import TRPO from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.envs.gym_env import GymEnv from rllab.envs.normalized_env import normalize -from rllab.misc.instrument import stub, run_experiment_lite from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy -import subprocess -import time - -stub(globals()) env = normalize(GymEnv("CartPoleBulletEnv-v0")) @@ -33,19 +28,4 @@ algo = TRPO( # plot=True, ) -#cmdStartBulletServer=['~/Projects/rllab/bullet_examples/run_physics_server.sh'] -#subprocess.Popen(cmdStartBulletServer, shell=True) -#time.sleep(1) - - -run_experiment_lite( - algo.train(), - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=1, - # plot=True, -) +algo.train() diff --git a/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py index 0e6c1353b..6e055be15 100644 --- a/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py +++ b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py @@ -6,9 +6,6 @@ from sandbox.rocky.tf.envs.base import TfEnv from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.envs.gym_env import GymEnv from rllab.envs.normalized_env import normalize -from rllab.misc.instrument import stub, run_experiment_lite - -stub(globals()) env = TfEnv(normalize(GymEnv("CartPoleBulletEnv-v0"))) @@ -35,14 +32,4 @@ algo = TRPO( #plot=True, ) -run_experiment_lite( - algo.train(), - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=1, - #plot=True, -) +algo.train()