diff --git a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py index bf0d85ab9..b1f1a1e35 100644 --- a/examples/pybullet/gym/envs/bullet/cartpole_bullet.py +++ b/examples/pybullet/gym/envs/bullet/cartpole_bullet.py @@ -24,13 +24,8 @@ class CartPoleBulletEnv(gym.Env): def __init__(self): # start the bullet physics server -# cmdStartBulletServer=['/Users/jietan/Projects/bullet3/build_cmake_python3/examples/SharedMemory/App_SharedMemoryPhysics_GUI'] -# subprocess.Popen(cmdStartBulletServer) - # wait to make sure that the physics server is ready -# time.sleep(1) - # connect to the physics server -# p.connect(p.SHARED_MEMORY) p.connect(p.GUI) +# p.connect(p.DIRECT) observation_high = np.array([ np.finfo(np.float32).max, np.finfo(np.float32).max, diff --git a/examples/pybullet/gym/minitaur_bullet_gym_example.py b/examples/pybullet/gym/minitaur_bullet_gym_example.py deleted file mode 100644 index 18c70d43e..000000000 --- a/examples/pybullet/gym/minitaur_bullet_gym_example.py +++ /dev/null @@ -1,27 +0,0 @@ -import gym -import numpy as np -import math - -from envs.bullet.minitaur_bullet import MinitaurBulletEnv - -def main(): - environment = gym.make('MinitaurBulletEnv-v0') - sum_reward = 0 - steps = 1000 - amplitude = 0.5 - speed = 0.3 - - for stepCounter in range(steps): - a1 = math.sin(stepCounter*speed)*amplitude - a2 = math.sin(stepCounter*speed+3.14)*amplitude - action = [a1, 0, a2, 0, 0, a1, 0, a2] - state, reward, done, info = environment.step(action) - sum_reward += reward - print(state) - if done: - environment.reset() - average_reward = sum_reward / steps - print("avg reward: ", average_reward) - - -main() diff --git a/examples/pybullet/gym/trpo_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py new file mode 100644 index 000000000..4c257cf6b --- /dev/null +++ b/examples/pybullet/gym/trpo_cartpole_bullet_gym.py @@ -0,0 +1,51 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from rllab.algos.trpo import TRPO +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.gym_env import GymEnv +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import stub, run_experiment_lite +from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy +import subprocess +import time + +stub(globals()) + +env = normalize(GymEnv("CartPoleBulletEnv-v0")) + +policy = GaussianMLPPolicy( + env_spec=env.spec, + # The neural network policy should have two hidden layers, each with 32 hidden units. + hidden_sizes=(8,) +) + +baseline = LinearFeatureBaseline(env_spec=env.spec) + +algo = TRPO( + env=env, + policy=policy, + baseline=baseline, + batch_size=5000, + max_path_length=env.horizon, + n_itr=50, + discount=0.999, + step_size=0.01, + # Uncomment both lines (this and the plot parameter below) to enable plotting +# plot=True, +) + +#cmdStartBulletServer=['~/Projects/rllab/bullet_examples/run_physics_server.sh'] +#subprocess.Popen(cmdStartBulletServer, shell=True) +#time.sleep(1) + + +run_experiment_lite( + algo.train(), + # Number of parallel workers for sampling + n_parallel=1, + # Only keep the snapshot parameters for the last iteration + snapshot_mode="last", + # Specifies the seed for the experiment. If this is not provided, a random seed + # will be used + seed=1, + # plot=True, +) diff --git a/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py new file mode 100644 index 000000000..0e6c1353b --- /dev/null +++ b/examples/pybullet/gym/trpo_tf_cartpole_bullet_gym.py @@ -0,0 +1,48 @@ +from envs.bullet.cartpole_bullet import CartPoleBulletEnv +from sandbox.rocky.tf.algos.trpo import TRPO +from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy +from sandbox.rocky.tf.envs.base import TfEnv + +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.gym_env import GymEnv +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import stub, run_experiment_lite + +stub(globals()) + +env = TfEnv(normalize(GymEnv("CartPoleBulletEnv-v0"))) + +policy = GaussianMLPPolicy( + name = "tf_gaussian_mlp", + env_spec=env.spec, + # The neural network policy should have two hidden layers, each with 32 hidden units. + hidden_sizes=(8,) +) + +baseline = LinearFeatureBaseline(env_spec=env.spec) + +algo = TRPO( + env=env, + policy=policy, + baseline=baseline, + batch_size=5000, + max_path_length=env.horizon, + n_itr=50, + discount=0.999, + step_size=0.01, + force_batch_sampler=True, + # Uncomment both lines (this and the plot parameter below) to enable plotting + #plot=True, +) + +run_experiment_lite( + algo.train(), + # Number of parallel workers for sampling + n_parallel=1, + # Only keep the snapshot parameters for the last iteration + snapshot_mode="last", + # Specifies the seed for the experiment. If this is not provided, a random seed + # will be used + seed=1, + #plot=True, +)