Merge branch 'master' into merge_upstream
This commit is contained in:
@@ -25,6 +25,14 @@ register(
|
||||
reward_threshold=190.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='CartPoleContinuousBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:CartPoleContinuousBulletEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=190.0,
|
||||
)
|
||||
|
||||
|
||||
register(
|
||||
id='MinitaurBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:MinitaurBulletEnv',
|
||||
|
||||
@@ -26,6 +26,7 @@ from __future__ import print_function
|
||||
import os
|
||||
|
||||
import gym
|
||||
from gym import wrappers
|
||||
import tensorflow as tf
|
||||
|
||||
from . import tools
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
|
||||
from pybullet_envs.bullet.cartpole_bullet import CartPoleContinuousBulletEnv
|
||||
from pybullet_envs.bullet.minitaur_gym_env import MinitaurBulletEnv
|
||||
from pybullet_envs.bullet.minitaur_duck_gym_env import MinitaurBulletDuckEnv
|
||||
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
import functools
|
||||
import inspect
|
||||
import pybullet
|
||||
|
||||
|
||||
class BulletClient(object):
|
||||
"""A wrapper for pybullet to manage different clients."""
|
||||
|
||||
def __init__(self, connection_mode=pybullet.DIRECT, options=""):
|
||||
"""Create a simulation and connect to it."""
|
||||
self._client = pybullet.connect(pybullet.SHARED_MEMORY)
|
||||
if (self._client < 0):
|
||||
print("options=", options)
|
||||
self._client = pybullet.connect(connection_mode, options=options)
|
||||
self._shapes = {}
|
||||
|
||||
def __del__(self):
|
||||
"""Clean up connection if not already done."""
|
||||
try:
|
||||
pybullet.disconnect(physicsClientId=self._client)
|
||||
except pybullet.error:
|
||||
pass
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Inject the client id into Bullet functions."""
|
||||
attribute = getattr(pybullet, name)
|
||||
if inspect.isbuiltin(attribute):
|
||||
attribute = functools.partial(attribute, physicsClientId=self._client)
|
||||
return attribute
|
||||
@@ -15,8 +15,9 @@ from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
import subprocess
|
||||
import pybullet as p
|
||||
import pybullet as p2
|
||||
import pybullet_data
|
||||
import pybullet_utils.bullet_client as bc
|
||||
from pkg_resources import parse_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -25,13 +26,13 @@ logger = logging.getLogger(__name__)
|
||||
class CartPoleBulletEnv(gym.Env):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=True):
|
||||
def __init__(self, renders=False, discrete_actions=True):
|
||||
# start the bullet physics server
|
||||
self._renders = renders
|
||||
if (renders):
|
||||
p.connect(p.GUI)
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
self._discrete_actions = discrete_actions
|
||||
self._render_height = 200
|
||||
self._render_width = 320
|
||||
self._physics_client_id = -1
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 0.4 #2.4
|
||||
high = np.array([
|
||||
@@ -42,7 +43,13 @@ class CartPoleBulletEnv(gym.Env):
|
||||
|
||||
self.force_mag = 10
|
||||
|
||||
self.action_space = spaces.Discrete(2)
|
||||
if self._discrete_actions:
|
||||
self.action_space = spaces.Discrete(2)
|
||||
else:
|
||||
action_dim = 1
|
||||
action_high = np.array([self.force_mag] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high)
|
||||
|
||||
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
@@ -58,7 +65,11 @@ class CartPoleBulletEnv(gym.Env):
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
force = self.force_mag if action == 1 else -self.force_mag
|
||||
p = self._p
|
||||
if self._discrete_actions:
|
||||
force = self.force_mag if action == 1 else -self.force_mag
|
||||
else:
|
||||
force = action[0]
|
||||
|
||||
p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
|
||||
p.stepSimulation()
|
||||
@@ -77,19 +88,27 @@ class CartPoleBulletEnv(gym.Env):
|
||||
|
||||
def reset(self):
|
||||
# print("-----------reset simulation---------------")
|
||||
p.resetSimulation()
|
||||
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
|
||||
[0, 0, 0])
|
||||
p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
|
||||
self.timeStep = 0.02
|
||||
p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
|
||||
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
|
||||
p.setGravity(0, 0, -9.8)
|
||||
p.setTimeStep(self.timeStep)
|
||||
p.setRealTimeSimulation(0)
|
||||
|
||||
if self._physics_client_id < 0:
|
||||
if self._renders:
|
||||
self._p = bc.BulletClient(connection_mode=p2.GUI)
|
||||
else:
|
||||
self._p = bc.BulletClient()
|
||||
self._physics_client_id = self._p._client
|
||||
|
||||
p = self._p
|
||||
p.resetSimulation()
|
||||
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
|
||||
[0, 0, 0])
|
||||
p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
|
||||
self.timeStep = 0.02
|
||||
p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
|
||||
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
|
||||
p.setGravity(0, 0, -9.8)
|
||||
p.setTimeStep(self.timeStep)
|
||||
p.setRealTimeSimulation(0)
|
||||
p = self._p
|
||||
randstate = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
|
||||
p.resetJointState(self.cartpole, 1, randstate[0], randstate[1])
|
||||
p.resetJointState(self.cartpole, 0, randstate[2], randstate[3])
|
||||
@@ -99,4 +118,51 @@ class CartPoleBulletEnv(gym.Env):
|
||||
return np.array(self.state)
|
||||
|
||||
def render(self, mode='human', close=False):
|
||||
return
|
||||
if mode == "human":
|
||||
self._renders = True
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
base_pos=[0,0,0]
|
||||
self._cam_dist = 2
|
||||
self._cam_pitch = 0.3
|
||||
self._cam_yaw = 0
|
||||
if (self._physics_client_id>=0):
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(self._render_width) /
|
||||
self._render_height,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=self._render_width,
|
||||
height=self._render_height,
|
||||
renderer=self._p.ER_BULLET_HARDWARE_OPENGL,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix)
|
||||
else:
|
||||
px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
|
||||
rgb_array = np.array(px, dtype=np.uint8)
|
||||
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
def configure(self, args):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
if self._physics_client_id >= 0:
|
||||
self._p.disconnect()
|
||||
self._physics_client_id = -1
|
||||
|
||||
class CartPoleContinuousBulletEnv(CartPoleBulletEnv):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=False):
|
||||
# start the bullet physics server
|
||||
CartPoleBulletEnv.__init__(self, renders, discrete_actions=False)
|
||||
|
||||
@@ -17,7 +17,7 @@ from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import pybullet
|
||||
from . import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
from . import minitaur
|
||||
import pybullet_data
|
||||
from . import minitaur_env_randomizer
|
||||
@@ -150,9 +150,9 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
self._action_repeat *= NUM_SUBSTEPS
|
||||
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
self._pybullet_client = bc.BulletClient()
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
@@ -14,7 +14,7 @@ from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import pybullet
|
||||
from . import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
from . import minitaur
|
||||
import os
|
||||
import pybullet_data
|
||||
@@ -145,9 +145,9 @@ class MinitaurBulletEnv(gym.Env):
|
||||
self._action_repeat *= NUM_SUBSTEPS
|
||||
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
self._pybullet_client = bc.BulletClient()
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
@@ -12,7 +12,7 @@ import numpy as np
|
||||
import pybullet
|
||||
from . import racecar
|
||||
import random
|
||||
from . import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
import pybullet_data
|
||||
from pkg_resources import parse_version
|
||||
|
||||
@@ -40,9 +40,9 @@ class RacecarGymEnv(gym.Env):
|
||||
self._renders = renders
|
||||
self._isDiscrete = isDiscrete
|
||||
if self._renders:
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
self._p = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
self._p = bc.BulletClient()
|
||||
|
||||
self.seed()
|
||||
#self.reset()
|
||||
|
||||
@@ -10,7 +10,7 @@ from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
import pybullet
|
||||
from . import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
from . import racecar
|
||||
import random
|
||||
import pybullet_data
|
||||
@@ -42,9 +42,9 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
|
||||
self._isDiscrete = isDiscrete
|
||||
if self._renders:
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
self._p = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
self._p = bc.BulletClient()
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
@@ -25,12 +25,12 @@ class HumanoidStablePD(object):
|
||||
self._mocap_data = mocap_data
|
||||
self._arg_parser = arg_parser
|
||||
print("LOADING humanoid!")
|
||||
|
||||
flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER+self._pybullet_client.URDF_USE_SELF_COLLISION+self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
self._sim_model = self._pybullet_client.loadURDF(
|
||||
"humanoid/humanoid.urdf", [0, 0.889540259, 0],
|
||||
globalScaling=0.25,
|
||||
useFixedBase=useFixedBase,
|
||||
flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
|
||||
flags=flags)
|
||||
|
||||
#self._pybullet_client.setCollisionFilterGroupMask(self._sim_model,-1,collisionFilterGroup=0,collisionFilterMask=0)
|
||||
#for j in range (self._pybullet_client.getNumJoints(self._sim_model)):
|
||||
@@ -347,7 +347,6 @@ class HumanoidStablePD(object):
|
||||
#static char* kwlist[] = { "bodyUniqueId",
|
||||
#"jointIndices",
|
||||
#"controlMode", "targetPositions", "targetVelocities", "forces", "positionGains", "velocityGains", "maxVelocities", "physicsClientId", NULL };
|
||||
|
||||
self._pybullet_client.setJointMotorControlMultiDofArray(self._sim_model,
|
||||
indices,
|
||||
self._pybullet_client.STABLE_PD_CONTROL,
|
||||
@@ -811,7 +810,10 @@ class HumanoidStablePD(object):
|
||||
rootPosSim, rootOrnSim = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
|
||||
rootPosKin, rootOrnKin = self._pybullet_client.getBasePositionAndOrientation(self._kin_model)
|
||||
linVelSim, angVelSim = self._pybullet_client.getBaseVelocity(self._sim_model)
|
||||
linVelKin, angVelKin = self._pybullet_client.getBaseVelocity(self._kin_model)
|
||||
#don't read the velocities from the kinematic model (they are zero), use the pose interpolator velocity
|
||||
#see also issue https://github.com/bulletphysics/bullet3/issues/2401
|
||||
linVelKin = self._poseInterpolator._baseLinVel
|
||||
angVelKin = self._poseInterpolator._baseAngVel
|
||||
|
||||
root_rot_err = self.calcRootRotDiff(rootOrnSim, rootOrnKin)
|
||||
pose_err += root_rot_w * root_rot_err
|
||||
|
||||
@@ -37,6 +37,8 @@ def build_arg_parser(args):
|
||||
arg_parser.load_args(args)
|
||||
|
||||
arg_file = arg_parser.parse_string('arg_file', '')
|
||||
if arg_file == '':
|
||||
arg_file = "run_humanoid3d_backflip_args.txt"
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath() + "/args/" + arg_file
|
||||
succ = arg_parser.load_file(path)
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
import gym, gym.spaces, gym.utils, gym.utils.seeding
|
||||
import numpy as np
|
||||
import pybullet
|
||||
import os
|
||||
|
||||
from pybullet_utils import bullet_client
|
||||
|
||||
from pkg_resources import parse_version
|
||||
|
||||
try:
|
||||
if os.environ["PYBULLET_EGL"]:
|
||||
import pkgutil
|
||||
except:
|
||||
pass
|
||||
|
||||
class MJCFBaseBulletEnv(gym.Env):
|
||||
"""
|
||||
@@ -31,6 +38,7 @@ class MJCFBaseBulletEnv(gym.Env):
|
||||
|
||||
self.action_space = robot.action_space
|
||||
self.observation_space = robot.observation_space
|
||||
#self.reset()
|
||||
|
||||
def configure(self, args):
|
||||
self.robot.args = args
|
||||
@@ -48,7 +56,19 @@ class MJCFBaseBulletEnv(gym.Env):
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
|
||||
self._p.resetSimulation()
|
||||
#optionally enable EGL for faster headless rendering
|
||||
try:
|
||||
if os.environ["PYBULLET_EGL"]:
|
||||
con_mode = self._p.getConnectionInfo()['connectionMethod']
|
||||
if con_mode==self._p.DIRECT:
|
||||
egl = pkgutil.get_loader('eglRenderer')
|
||||
if (egl):
|
||||
self._p.loadPlugin(egl.get_filename(), "_eglRendererPlugin")
|
||||
else:
|
||||
self._p.loadPlugin("eglRendererPlugin")
|
||||
except:
|
||||
pass
|
||||
self.physicsClientId = self._p._client
|
||||
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
|
||||
|
||||
@@ -77,24 +97,35 @@ class MJCFBaseBulletEnv(gym.Env):
|
||||
if (hasattr(self, 'robot')):
|
||||
if (hasattr(self.robot, 'body_xyz')):
|
||||
base_pos = self.robot.body_xyz
|
||||
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
if (self.physicsClientId>=0):
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(self._render_width) /
|
||||
self._render_height,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
|
||||
height=self._render_height,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
try:
|
||||
# Keep the previous orientation of the camera set by the user.
|
||||
con_mode = self._p.getConnectionInfo()['connectionMethod']
|
||||
if con_mode==self._p.SHARED_MEMORY or con_mode == self._p.GUI:
|
||||
[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
|
||||
self._p.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
|
||||
except:
|
||||
pass
|
||||
|
||||
else:
|
||||
px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
|
||||
rgb_array = np.array(px, dtype=np.uint8)
|
||||
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
189
examples/pybullet/gym/pybullet_envs/examples/batchsim3.py
Normal file
189
examples/pybullet/gym/pybullet_envs/examples/batchsim3.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
from pybullet_utils import bullet_client
|
||||
from pybullet_envs.examples import panda_sim
|
||||
|
||||
|
||||
import time
|
||||
|
||||
useGUI = False
|
||||
timeStep = 1./60.
|
||||
|
||||
# Importing the libraries
|
||||
import os
|
||||
import time
|
||||
import multiprocessing as mp
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
pandaEndEffectorIndex = 11 #8
|
||||
pandaNumDofs = 7
|
||||
|
||||
|
||||
_RESET = 1
|
||||
_CLOSE = 2
|
||||
_EXPLORE = 3
|
||||
|
||||
|
||||
def ExploreWorker(rank, num_processes, childPipe, args):
|
||||
print("hi:",rank, " out of ", num_processes)
|
||||
import pybullet as op1
|
||||
import pybullet_data as pd
|
||||
logName=""
|
||||
p1=0
|
||||
n = 0
|
||||
space = 2
|
||||
simulations=[]
|
||||
sims_per_worker = 10
|
||||
|
||||
offsetY = rank*space
|
||||
while True:
|
||||
n += 1
|
||||
try:
|
||||
# Only block for short times to have keyboard exceptions be raised.
|
||||
if not childPipe.poll(0.0001):
|
||||
continue
|
||||
message, payload = childPipe.recv()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
break
|
||||
if message == _RESET:
|
||||
if (useGUI):
|
||||
p1 = bullet_client.BulletClient(op1.GUI)
|
||||
else:
|
||||
p1 = bullet_client.BulletClient(op1.DIRECT)
|
||||
p1.setTimeStep(timeStep)
|
||||
|
||||
p1.setPhysicsEngineParameter(numSolverIterations=8)
|
||||
p1.setPhysicsEngineParameter(minimumSolverIslandSize=100)
|
||||
p1.configureDebugVisualizer(p1.COV_ENABLE_Y_AXIS_UP,1)
|
||||
p1.configureDebugVisualizer(p1.COV_ENABLE_RENDERING,0)
|
||||
p1.setAdditionalSearchPath(pd.getDataPath())
|
||||
p1.setGravity(0,-9.8,0)
|
||||
logName = str("batchsim")+str(rank)
|
||||
for j in range (3):
|
||||
offsetX = 0#-sims_per_worker/2.0*space
|
||||
for i in range (sims_per_worker):
|
||||
offset=[offsetX,0, offsetY]
|
||||
sim = panda_sim.PandaSim(p1, offset)
|
||||
simulations.append(sim)
|
||||
offsetX += space
|
||||
offsetY += space
|
||||
childPipe.send(["reset ok"])
|
||||
p1.configureDebugVisualizer(p1.COV_ENABLE_RENDERING,1)
|
||||
for i in range (100):
|
||||
p1.stepSimulation()
|
||||
|
||||
logId = p1.startStateLogging(op1.STATE_LOGGING_PROFILE_TIMINGS,logName)
|
||||
continue
|
||||
if message == _EXPLORE:
|
||||
sum_rewards=rank
|
||||
|
||||
if useGUI:
|
||||
numSteps = int(20000)
|
||||
else:
|
||||
numSteps = int(5)
|
||||
for i in range (numSteps):
|
||||
for s in simulations:
|
||||
s.step()
|
||||
p1.stepSimulation()
|
||||
#print("logId=",logId)
|
||||
#print("numSteps=",numSteps)
|
||||
|
||||
childPipe.send([sum_rewards])
|
||||
continue
|
||||
if message == _CLOSE:
|
||||
p1.stopStateLogging(logId)
|
||||
childPipe.send(["close ok"])
|
||||
break
|
||||
childPipe.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mp.freeze_support()
|
||||
if useGUI:
|
||||
num_processes = 1
|
||||
else:
|
||||
num_processes = 12
|
||||
processes = []
|
||||
args=[0]*num_processes
|
||||
|
||||
childPipes = []
|
||||
parentPipes = []
|
||||
|
||||
for pr in range(num_processes):
|
||||
parentPipe, childPipe = Pipe()
|
||||
parentPipes.append(parentPipe)
|
||||
childPipes.append(childPipe)
|
||||
|
||||
for rank in range(num_processes):
|
||||
p = mp.Process(target=ExploreWorker, args=(rank, num_processes, childPipes[rank], args))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
|
||||
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_RESET, "blaat"])
|
||||
|
||||
positive_rewards = [0]*num_processes
|
||||
for k in range(num_processes):
|
||||
#print("reset msg=",parentPipes[k].recv()[0])
|
||||
msg = parentPipes[k].recv()[0]
|
||||
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_EXPLORE, "blaat"])
|
||||
|
||||
positive_rewards = [0]*num_processes
|
||||
for k in range(num_processes):
|
||||
positive_rewards[k] = parentPipes[k].recv()[0]
|
||||
#print("positive_rewards=",positive_rewards[k])
|
||||
|
||||
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_EXPLORE, "blaat"])
|
||||
|
||||
positive_rewards = [0]*num_processes
|
||||
for k in range(num_processes):
|
||||
positive_rewards[k] = parentPipes[k].recv()[0]
|
||||
#print("positive_rewards=",positive_rewards[k])
|
||||
msg = positive_rewards[k]
|
||||
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_EXPLORE, "blaat"])
|
||||
|
||||
positive_rewards = [0]*num_processes
|
||||
for k in range(num_processes):
|
||||
positive_rewards[k] = parentPipes[k].recv()[0]
|
||||
#print("positive_rewards=",positive_rewards[k])
|
||||
|
||||
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_CLOSE, "pay2"])
|
||||
|
||||
for p in processes:
|
||||
p.join()
|
||||
|
||||
#now we merge the separate json files into a single one
|
||||
fnameout = 'batchsim.json'
|
||||
count = 0
|
||||
outfile = open(fnameout, "w+")
|
||||
outfile.writelines(["{\"traceEvents\":[\n"])
|
||||
numFiles = num_processes
|
||||
for num in range(numFiles):
|
||||
print("num=",num)
|
||||
fname = 'batchsim%d_0.json' % (num)
|
||||
with open(fname) as infile:
|
||||
for line in infile:
|
||||
if "pid" in line:
|
||||
line = line.replace('\"pid\":1', '\"pid\":'+str(num))
|
||||
if num < (numFiles-1) and not "{}}," in line:
|
||||
line = line.replace('{}}', '{}},')
|
||||
print("line[",count,"]=",line)
|
||||
outfile.write(line)
|
||||
count += 1
|
||||
print ("count=",count)
|
||||
outfile.writelines(["],\n"])
|
||||
outfile.writelines(["\"displayTimeUnit\": \"ns\"}\n"])
|
||||
outfile.close()
|
||||
@@ -1,114 +0,0 @@
|
||||
import pybullet as p
|
||||
import pybullet_data as pd
|
||||
|
||||
import time
|
||||
|
||||
p.connect(p.GUI)
|
||||
p.setAdditionalSearchPath(pd.getDataPath())
|
||||
|
||||
plane = p.loadURDF("plane.urdf")
|
||||
p.setGravity(0, 0, -9.8)
|
||||
p.setTimeStep(1. / 500)
|
||||
#p.setDefaultContactERP(0)
|
||||
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
urdfFlags = p.URDF_USE_SELF_COLLISION
|
||||
quadruped = p.loadURDF("laikago/laikago.urdf", [0, 0, .5], [0, 0.5, 0.5, 0],
|
||||
flags=urdfFlags,
|
||||
useFixedBase=False)
|
||||
|
||||
#enable collision between lower legs
|
||||
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
print(p.getJointInfo(quadruped, j))
|
||||
|
||||
#2,5,8 and 11 are the lower legs
|
||||
lower_legs = [2, 5, 8, 11]
|
||||
for l0 in lower_legs:
|
||||
for l1 in lower_legs:
|
||||
if (l1 > l0):
|
||||
enableCollision = 1
|
||||
print("collision for pair", l0, l1,
|
||||
p.getJointInfo(quadruped, l0)[12],
|
||||
p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
|
||||
p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)
|
||||
|
||||
jointIds = []
|
||||
paramIds = []
|
||||
jointOffsets = []
|
||||
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
|
||||
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
for i in range(4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
|
||||
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
|
||||
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
p.getCameraImage(480, 320)
|
||||
p.setRealTimeSimulation(0)
|
||||
|
||||
joints = []
|
||||
|
||||
with open(pd.getDataPath() + "/laikago/data1.txt", "r") as filestream:
|
||||
for line in filestream:
|
||||
print("line=", line)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
currentline = line.split(",")
|
||||
#print (currentline)
|
||||
#print("-----")
|
||||
frame = currentline[0]
|
||||
t = currentline[1]
|
||||
#print("frame[",frame,"]")
|
||||
joints = currentline[2:14]
|
||||
#print("joints=",joints)
|
||||
for j in range(12):
|
||||
targetPos = float(joints[j])
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[j],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[j] * targetPos + jointOffsets[j],
|
||||
force=maxForce)
|
||||
p.stepSimulation()
|
||||
for lower_leg in lower_legs:
|
||||
#print("points for ", quadruped, " link: ", lower_leg)
|
||||
pts = p.getContactPoints(quadruped, -1, lower_leg)
|
||||
#print("num points=",len(pts))
|
||||
#for pt in pts:
|
||||
# print(pt[9])
|
||||
time.sleep(1. / 500.)
|
||||
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
js = p.getJointState(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
paramIds.append(
|
||||
p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
|
||||
(js[0] - jointOffsets[j]) / jointDirections[j]))
|
||||
|
||||
p.setRealTimeSimulation(1)
|
||||
|
||||
while (1):
|
||||
|
||||
for i in range(len(paramIds)):
|
||||
c = paramIds[i]
|
||||
targetPos = p.readUserDebugParameter(c)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[i],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[i] * targetPos + jointOffsets[i],
|
||||
force=maxForce)
|
||||
21
examples/pybullet/gym/pybullet_envs/examples/loadpanda.py
Normal file
21
examples/pybullet/gym/pybullet_envs/examples/loadpanda.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import pybullet as p
|
||||
import pybullet_data as pd
|
||||
import math
|
||||
import time
|
||||
import numpy as np
|
||||
from pybullet_envs.examples import panda_sim
|
||||
|
||||
p.connect(p.GUI)
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_Y_AXIS_UP,1)
|
||||
p.setAdditionalSearchPath(pd.getDataPath())
|
||||
|
||||
timeStep=1./60.
|
||||
p.setTimeStep(timeStep)
|
||||
p.setGravity(0,-9.8,0)
|
||||
|
||||
panda = panda_sim.PandaSim(p,[0,0,0])
|
||||
while (1):
|
||||
panda.step()
|
||||
p.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
|
||||
20
examples/pybullet/gym/pybullet_envs/examples/loadxarm.py
Normal file
20
examples/pybullet/gym/pybullet_envs/examples/loadxarm.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import pybullet as p
|
||||
import pybullet_data as pd
|
||||
import math
|
||||
import time
|
||||
import numpy as np
|
||||
import xarm_sim
|
||||
|
||||
p.connect(p.GUI)
|
||||
p.setAdditionalSearchPath(pd.getDataPath())
|
||||
|
||||
timeStep=1./60.
|
||||
p.setTimeStep(timeStep)
|
||||
p.setGravity(0,0,-9.8)
|
||||
|
||||
xarm = xarm_sim.XArm6Sim(p,[0,0,0])
|
||||
while (1):
|
||||
xarm.step()
|
||||
p.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
|
||||
65
examples/pybullet/gym/pybullet_envs/examples/panda_sim.py
Normal file
65
examples/pybullet/gym/pybullet_envs/examples/panda_sim.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import time
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
useNullSpace = 1
|
||||
ikSolver = 0
|
||||
pandaEndEffectorIndex = 11 #8
|
||||
pandaNumDofs = 7
|
||||
|
||||
ll = [-7]*pandaNumDofs
|
||||
#upper limits for null space (todo: set them to proper range)
|
||||
ul = [7]*pandaNumDofs
|
||||
#joint ranges for null space (todo: set them to proper range)
|
||||
jr = [7]*pandaNumDofs
|
||||
#restposes for null space
|
||||
jointPositions=[0.98, 0.458, 0.31, -2.24, -0.30, 2.66, 2.32, 0.02, 0.02]
|
||||
rp = jointPositions
|
||||
|
||||
class PandaSim(object):
|
||||
def __init__(self, bullet_client, offset):
|
||||
self.bullet_client = bullet_client
|
||||
self.offset = np.array(offset)
|
||||
|
||||
#print("offset=",offset)
|
||||
flags = self.bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES
|
||||
legos=[]
|
||||
self.bullet_client.loadURDF("tray/traybox.urdf", [0+offset[0], 0+offset[1], -0.6+offset[2]], [-0.5, -0.5, -0.5, 0.5], flags=flags)
|
||||
legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([0.1, 0.3, -0.5])+self.offset, flags=flags))
|
||||
legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([-0.1, 0.3, -0.5])+self.offset, flags=flags))
|
||||
legos.append(self.bullet_client.loadURDF("lego/lego.urdf",np.array([0.1, 0.3, -0.7])+self.offset, flags=flags))
|
||||
sphereId = self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.6])+self.offset, flags=flags)
|
||||
self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.5])+self.offset, flags=flags)
|
||||
self.bullet_client.loadURDF("sphere_small.urdf",np.array( [0, 0.3, -0.7])+self.offset, flags=flags)
|
||||
orn=[-0.707107, 0.0, 0.0, 0.707107]#p.getQuaternionFromEuler([-math.pi/2,math.pi/2,0])
|
||||
eul = self.bullet_client.getEulerFromQuaternion([-0.5, -0.5, -0.5, 0.5])
|
||||
self.panda = self.bullet_client.loadURDF("franka_panda/panda.urdf", np.array([0,0,0])+self.offset, orn, useFixedBase=True, flags=flags)
|
||||
index = 0
|
||||
for j in range(self.bullet_client.getNumJoints(self.panda)):
|
||||
self.bullet_client.changeDynamics(self.panda, j, linearDamping=0, angularDamping=0)
|
||||
info = self.bullet_client.getJointInfo(self.panda, j)
|
||||
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == self.bullet_client.JOINT_PRISMATIC):
|
||||
|
||||
self.bullet_client.resetJointState(self.panda, j, jointPositions[index])
|
||||
index=index+1
|
||||
if (jointType == self.bullet_client.JOINT_REVOLUTE):
|
||||
self.bullet_client.resetJointState(self.panda, j, jointPositions[index])
|
||||
index=index+1
|
||||
self.t = 0.
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def step(self):
|
||||
t = self.t
|
||||
self.t += 1./60.
|
||||
pos = [self.offset[0]+0.2 * math.sin(1.5 * t), self.offset[1]+0.044, self.offset[2]+-0.6 + 0.1 * math.cos(1.5 * t)]
|
||||
orn = self.bullet_client.getQuaternionFromEuler([math.pi/2.,0.,0.])
|
||||
jointPoses = self.bullet_client.calculateInverseKinematics(self.panda,pandaEndEffectorIndex, pos, orn, ll, ul,
|
||||
jr, rp, maxNumIterations=5)
|
||||
for i in range(pandaNumDofs):
|
||||
self.bullet_client.setJointMotorControl2(self.panda, i, self.bullet_client.POSITION_CONTROL, jointPoses[i],force=5 * 240.)
|
||||
pass
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import pybullet as p
|
||||
import math
|
||||
import time
|
||||
import pybullet_data
|
||||
|
||||
p.connect(p.GUI)
|
||||
#p.loadURDF("wheel.urdf",[0,0,3])
|
||||
p.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
plane = p.loadURDF("plane100.urdf", [0, 0, 0])
|
||||
timestep = 1. / 240.
|
||||
|
||||
bike = -1
|
||||
for i in range(1):
|
||||
|
||||
bike = p.loadURDF("bicycle/bike.urdf", [0, 0 + 3 * i, 1.5], [0, 0, 0, 1], useFixedBase=False)
|
||||
p.setJointMotorControl2(bike, 0, p.VELOCITY_CONTROL, targetVelocity=0, force=0.05)
|
||||
#p.setJointMotorControl2(bike,1,p.VELOCITY_CONTROL,targetVelocity=5, force=1000)
|
||||
p.setJointMotorControl2(bike, 1, p.VELOCITY_CONTROL, targetVelocity=5, force=0)
|
||||
p.setJointMotorControl2(bike, 2, p.VELOCITY_CONTROL, targetVelocity=15, force=20)
|
||||
|
||||
p.changeDynamics(plane, -1, mass=0, lateralFriction=1, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(bike, 1, lateralFriction=1, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(bike, 2, lateralFriction=1, linearDamping=0, angularDamping=0)
|
||||
#p.resetJointState(bike,1,0,100)
|
||||
#p.resetJointState(bike,2,0,100)
|
||||
#p.resetBaseVelocity(bike,[0,0,0],[0,0,0])
|
||||
#p.setPhysicsEngineParameter(numSubSteps=0)
|
||||
#bike=p.loadURDF("frame.urdf",useFixedBase=True)
|
||||
#bike = p.loadURDF("handlebar.urdf", useFixedBase=True)
|
||||
#p.loadURDF("handlebar.urdf",[0,2,1])
|
||||
#coord = p.loadURDF("handlebar.urdf", [0.7700000000000005, 0, 0.22000000000000006],useFixedBase=True)# p.loadURDF("coordinateframe.urdf",[-2,0,1],useFixedBase=True)
|
||||
#coord = p.loadURDF("coordinateframe.urdf",[-2,0,1],useFixedBase=True)
|
||||
p.setGravity(0, 0, -10)
|
||||
p.setRealTimeSimulation(0)
|
||||
#coordPos = [0,0,0]
|
||||
#coordOrnEuler = [0,0,0]
|
||||
|
||||
#coordPos= [0.7000000000000004, 0, 0.22000000000000006]
|
||||
#coordOrnEuler= [0, -0.2617993877991496, 0]
|
||||
|
||||
coordPos = [0.07, 0, -0.6900000000000004]
|
||||
coordOrnEuler = [0, 0, 0]
|
||||
|
||||
coordPos2 = [0, 0, 0]
|
||||
coordOrnEuler2 = [0, 0, 0]
|
||||
|
||||
invPos, invOrn = p.invertTransform(coordPos, p.getQuaternionFromEuler(coordOrnEuler))
|
||||
mPos, mOrn = p.multiplyTransforms(invPos, invOrn, coordPos2,
|
||||
p.getQuaternionFromEuler(coordOrnEuler2))
|
||||
eul = p.getEulerFromQuaternion(mOrn)
|
||||
print("rpy=\"", eul[0], eul[1], eul[2], "\" xyz=\"", mPos[0], mPos[1], mPos[2])
|
||||
|
||||
shift = 0
|
||||
gui = 1
|
||||
|
||||
frame = 0
|
||||
states = []
|
||||
states.append(p.saveState())
|
||||
#observations=[]
|
||||
#observations.append(obs)
|
||||
|
||||
running = True
|
||||
reverting = False
|
||||
p.getCameraImage(320, 200) #,renderer=p.ER_BULLET_HARDWARE_OPENGL )
|
||||
|
||||
while (1):
|
||||
|
||||
updateCam = 0
|
||||
keys = p.getKeyboardEvents()
|
||||
for k, v in keys.items():
|
||||
if (reverting or (k == p.B3G_LEFT_ARROW and (v & p.KEY_WAS_TRIGGERED))):
|
||||
reverting = True
|
||||
stateIndex = len(states) - 1
|
||||
#print("prestateIndex=",stateIndex)
|
||||
time.sleep(timestep)
|
||||
updateCam = 1
|
||||
if (stateIndex > 0):
|
||||
stateIndex -= 1
|
||||
states = states[:stateIndex + 1]
|
||||
#observations=observations[:stateIndex+1]
|
||||
|
||||
#print("states=",states)
|
||||
#print("stateIndex =",stateIndex )
|
||||
p.restoreState(states[stateIndex])
|
||||
#obs=observations[stateIndex]
|
||||
|
||||
#obs, r, done, _ = env.step(a)
|
||||
if (k == p.B3G_LEFT_ARROW and (v & p.KEY_WAS_RELEASED)):
|
||||
reverting = False
|
||||
|
||||
if (k == ord('1') and (v & p.KEY_WAS_TRIGGERED)):
|
||||
gui = not gui
|
||||
|
||||
if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_WAS_RELEASED)):
|
||||
running = False
|
||||
|
||||
if (running or (k == p.B3G_RIGHT_ARROW and (v & p.KEY_WAS_TRIGGERED))):
|
||||
running = True
|
||||
|
||||
if (running):
|
||||
|
||||
p.stepSimulation()
|
||||
|
||||
updateCam = 1
|
||||
time.sleep(timestep)
|
||||
pts = p.getContactPoints()
|
||||
#print("numPoints=",len(pts))
|
||||
#for point in pts:
|
||||
# print("Point:bodyA=", point[1],"bodyB=",point[2],"linkA=",point[3],"linkB=",point[4],"dist=",point[8],"force=",point[9])
|
||||
|
||||
states.append(p.saveState())
|
||||
#observations.append(obs)
|
||||
stateIndex = len(states)
|
||||
#print("stateIndex =",stateIndex )
|
||||
frame += 1
|
||||
if (updateCam):
|
||||
distance = 5
|
||||
yaw = 0
|
||||
humanPos, humanOrn = p.getBasePositionAndOrientation(bike)
|
||||
humanBaseVel = p.getBaseVelocity(bike)
|
||||
#print("frame",frame, "humanPos=",humanPos, "humanVel=",humanBaseVel)
|
||||
if (gui):
|
||||
|
||||
camInfo = p.getDebugVisualizerCamera()
|
||||
curTargetPos = camInfo[11]
|
||||
distance = camInfo[10]
|
||||
yaw = camInfo[8]
|
||||
pitch = camInfo[9]
|
||||
targetPos = [
|
||||
0.95 * curTargetPos[0] + 0.05 * humanPos[0], 0.95 * curTargetPos[1] + 0.05 * humanPos[1],
|
||||
curTargetPos[2]
|
||||
]
|
||||
p.resetDebugVisualizerCamera(distance, yaw, pitch, targetPos)
|
||||
@@ -9,12 +9,12 @@ class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
|
||||
|
||||
def __init__(self, robot, render=False):
|
||||
# print("WalkerBase::__init__ start")
|
||||
MJCFBaseBulletEnv.__init__(self, robot, render)
|
||||
|
||||
self.camera_x = 0
|
||||
self.walk_target_x = 1e3 # kilometer away
|
||||
self.walk_target_y = 0
|
||||
self.stateId = -1
|
||||
MJCFBaseBulletEnv.__init__(self, robot, render)
|
||||
|
||||
|
||||
def create_single_player_scene(self, bullet_client):
|
||||
self.stadium_scene = SinglePlayerStadiumScene(bullet_client,
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
"""A wrapper for pybullet to manage different clients."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
import functools
|
||||
import inspect
|
||||
import pybullet
|
||||
|
||||
|
||||
class BulletClient(object):
|
||||
"""A wrapper for pybullet to manage different clients."""
|
||||
|
||||
def __init__(self, connection_mode=None):
|
||||
"""Creates a Bullet client and connects to a simulation.
|
||||
|
||||
Args:
|
||||
connection_mode:
|
||||
`None` connects to an existing simulation or, if fails, creates a
|
||||
new headless simulation,
|
||||
`pybullet.GUI` creates a new simulation with a GUI,
|
||||
`pybullet.DIRECT` creates a headless simulation,
|
||||
`pybullet.SHARED_MEMORY` connects to an existing simulation.
|
||||
"""
|
||||
self._shapes = {}
|
||||
|
||||
if connection_mode is None:
|
||||
self._client = pybullet.connect(pybullet.SHARED_MEMORY)
|
||||
if self._client >= 0:
|
||||
return
|
||||
else:
|
||||
connection_mode = pybullet.DIRECT
|
||||
self._client = pybullet.connect(connection_mode)
|
||||
|
||||
def __del__(self):
|
||||
"""Clean up connection if not already done."""
|
||||
try:
|
||||
pybullet.disconnect(physicsClientId=self._client)
|
||||
except pybullet.error:
|
||||
pass
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Inject the client id into Bullet functions."""
|
||||
attribute = getattr(pybullet, name)
|
||||
if inspect.isbuiltin(attribute):
|
||||
if name not in [
|
||||
"invertTransform",
|
||||
"multiplyTransforms",
|
||||
"getMatrixFromQuaternion",
|
||||
"getEulerFromQuaternion",
|
||||
"computeViewMatrixFromYawPitchRoll",
|
||||
"computeProjectionMatrixFOV",
|
||||
"getQuaternionFromEuler",
|
||||
]: # A temporary hack for now.
|
||||
attribute = functools.partial(attribute, physicsClientId=self._client)
|
||||
return attribute
|
||||
@@ -122,7 +122,7 @@ class MinitaurExtendedEnv(MinitaurReactiveEnv):
|
||||
leg_model = []
|
||||
if self._include_leg_model:
|
||||
raw_motor_angles = self.minitaur.GetMotorAngles()
|
||||
leg_model = self._convert_to_leg_model(raw_motor_angles)
|
||||
leg_model = self.convert_to_leg_model(raw_motor_angles)
|
||||
|
||||
observation_list = (
|
||||
[parent_observation] + history_states + history_actions +
|
||||
@@ -185,7 +185,7 @@ class MinitaurExtendedEnv(MinitaurReactiveEnv):
|
||||
if self._never_terminate:
|
||||
return False
|
||||
|
||||
leg_model = self._convert_to_leg_model(self.minitaur.GetMotorAngles())
|
||||
leg_model = self.convert_to_leg_model(self.minitaur.GetMotorAngles())
|
||||
swing0 = leg_model[0]
|
||||
swing1 = leg_model[2]
|
||||
maximum_swing_angle = 0.8
|
||||
|
||||
@@ -14,7 +14,7 @@ from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import pybullet
|
||||
from pybullet_envs.minitaur.envs import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
import pybullet_data
|
||||
from pybullet_envs.minitaur.envs import minitaur
|
||||
from pybullet_envs.minitaur.envs import minitaur_derpy
|
||||
@@ -223,9 +223,9 @@ class MinitaurGymEnv(gym.Env):
|
||||
self._env_randomizers = convert_to_list(env_randomizer) if env_randomizer else []
|
||||
self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
self._pybullet_client = bc.BulletClient()
|
||||
if self._urdf_version is None:
|
||||
self._urdf_version = DEFAULT_URDF_VERSION
|
||||
self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
|
||||
|
||||
@@ -14,7 +14,7 @@ from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import pybullet
|
||||
from pybullet_envs.bullet import bullet_client
|
||||
import pybullet_utils.bullet_client as bc
|
||||
|
||||
from pybullet_envs.prediction import boxstack_pybullet_sim
|
||||
|
||||
@@ -71,10 +71,10 @@ class PyBulletSimGymEnv(gym.Env):
|
||||
print("urdf_root=" + self._urdf_root)
|
||||
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI,
|
||||
self._pybullet_client = bc.BulletClient(connection_mode=pybullet.GUI,
|
||||
options=optionstring)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
self._pybullet_client = bc.BulletClient()
|
||||
|
||||
if (debug_visualization == False):
|
||||
self._pybullet_client.configureDebugVisualizer(flag=self._pybullet_client.COV_ENABLE_GUI,
|
||||
|
||||
@@ -32,7 +32,7 @@ class StadiumScene(Scene):
|
||||
for i in self.ground_plane_mjcf:
|
||||
self._p.changeDynamics(i, -1, lateralFriction=0.8, restitution=0.5)
|
||||
self._p.changeVisualShape(i, -1, rgbaColor=[1, 1, 1, 0.8])
|
||||
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION, 1)
|
||||
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,i)
|
||||
|
||||
# for j in range(p.getNumJoints(i)):
|
||||
# self._p.changeDynamics(i,j,lateralFriction=0)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# Code adapted from https://github.com/araffin/rl-baselines-zoo
|
||||
# it requires stable-baselines to be installed
|
||||
# Colab Notebook: https://colab.research.google.com/drive/1nZkHO4QTYfAksm9ZTaZ5vXyC7szZxC3F
|
||||
# Author: Antonin RAFFIN
|
||||
# MIT License
|
||||
import argparse
|
||||
|
||||
import pybullet_envs
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from stable_baselines import SAC, TD3
|
||||
from stable_baselines.common.noise import NormalActionNoise
|
||||
|
||||
from utils import TimeFeatureWrapper, EvalCallback
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--algo', help='RL Algorithm (Soft Actor-Critic by default)', default='sac',
|
||||
type=str, required=False, choices=['sac', 'td3'])
|
||||
parser.add_argument('--env', type=str, default='HalfCheetahBulletEnv-v0', help='environment ID')
|
||||
parser.add_argument('-n', '--n-timesteps', help='Number of training timesteps', default=int(1e6),
|
||||
type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
env_id = args.env
|
||||
n_timesteps = args.n_timesteps
|
||||
save_path = '{}_{}'.format(args.algo, env_id)
|
||||
|
||||
# Instantiate and wrap the environment
|
||||
env = TimeFeatureWrapper(gym.make(env_id))
|
||||
|
||||
# Create the evaluation environment and callback
|
||||
eval_env = TimeFeatureWrapper(gym.make(env_id))
|
||||
callback = EvalCallback(eval_env, best_model_save_path=save_path + '_best')
|
||||
|
||||
algo = {
|
||||
'sac': SAC,
|
||||
'td3': TD3
|
||||
}[args.algo]
|
||||
|
||||
n_actions = env.action_space.shape[0]
|
||||
|
||||
# Tuned hyperparameters from https://github.com/araffin/rl-baselines-zoo
|
||||
hyperparams = {
|
||||
'sac': dict(batch_size=256, gamma=0.98, policy_kwargs=dict(layers=[256, 256]),
|
||||
learning_starts=10000, buffer_size=int(2e5), tau=0.01),
|
||||
|
||||
'td3': dict(batch_size=100, policy_kwargs=dict(layers=[400, 300]),
|
||||
learning_rate=1e-3, learning_starts=10000, buffer_size=int(1e6),
|
||||
train_freq=1000, gradient_steps=1000,
|
||||
action_noise=NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)))
|
||||
}[args.algo]
|
||||
|
||||
model = algo('MlpPolicy', env, verbose=1, **hyperparams)
|
||||
model.learn(n_timesteps, callback=callback)
|
||||
|
||||
print("Saving to {}.zip".format(save_path))
|
||||
model.save(save_path)
|
||||
114
examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
Normal file
114
examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# Code adapted from https://github.com/araffin/rl-baselines-zoo
|
||||
# it requires stable-baselines to be installed
|
||||
# Author: Antonin RAFFIN
|
||||
# MIT License
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.wrappers import TimeLimit
|
||||
|
||||
from stable_baselines.common.evaluation import evaluate_policy
|
||||
|
||||
|
||||
class TimeFeatureWrapper(gym.Wrapper):
|
||||
"""
|
||||
Add remaining time to observation space for fixed length episodes.
|
||||
See https://arxiv.org/abs/1712.00378 and https://github.com/aravindr93/mjrl/issues/13.
|
||||
|
||||
:param env: (gym.Env)
|
||||
:param max_steps: (int) Max number of steps of an episode
|
||||
if it is not wrapped in a TimeLimit object.
|
||||
:param test_mode: (bool) In test mode, the time feature is constant,
|
||||
equal to zero. This allow to check that the agent did not overfit this feature,
|
||||
learning a deterministic pre-defined sequence of actions.
|
||||
"""
|
||||
def __init__(self, env, max_steps=1000, test_mode=False):
|
||||
assert isinstance(env.observation_space, gym.spaces.Box)
|
||||
# Add a time feature to the observation
|
||||
low, high = env.observation_space.low, env.observation_space.high
|
||||
low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
|
||||
env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
|
||||
super(TimeFeatureWrapper, self).__init__(env)
|
||||
|
||||
if isinstance(env, TimeLimit):
|
||||
self._max_steps = env._max_episode_steps
|
||||
else:
|
||||
self._max_steps = max_steps
|
||||
self._current_step = 0
|
||||
self._test_mode = test_mode
|
||||
|
||||
def reset(self):
|
||||
self._current_step = 0
|
||||
return self._get_obs(self.env.reset())
|
||||
|
||||
def step(self, action):
|
||||
self._current_step += 1
|
||||
obs, reward, done, info = self.env.step(action)
|
||||
return self._get_obs(obs), reward, done, info
|
||||
|
||||
def _get_obs(self, obs):
|
||||
"""
|
||||
Concatenate the time feature to the current observation.
|
||||
|
||||
:param obs: (np.ndarray)
|
||||
:return: (np.ndarray)
|
||||
"""
|
||||
# Remaining time is more general
|
||||
time_feature = 1 - (self._current_step / self._max_steps)
|
||||
if self._test_mode:
|
||||
time_feature = 1.0
|
||||
# Optionnaly: concatenate [time_feature, time_feature ** 2]
|
||||
return np.concatenate((obs, [time_feature]))
|
||||
|
||||
|
||||
class EvalCallback(object):
|
||||
"""
|
||||
Callback for evaluating an agent.
|
||||
|
||||
:param eval_env: (gym.Env) The environment used for initialization
|
||||
:param n_eval_episodes: (int) The number of episodes to test the agent
|
||||
:param eval_freq: (int) Evaluate the agent every eval_freq call of the callback.
|
||||
:param deterministic: (bool)
|
||||
:param best_model_save_path: (str)
|
||||
:param verbose: (int)
|
||||
"""
|
||||
def __init__(self, eval_env, n_eval_episodes=5, eval_freq=10000,
|
||||
deterministic=True, best_model_save_path=None, verbose=1):
|
||||
super(EvalCallback, self).__init__()
|
||||
self.n_eval_episodes = n_eval_episodes
|
||||
self.eval_freq = eval_freq
|
||||
self.best_mean_reward = -np.inf
|
||||
self.deterministic = deterministic
|
||||
self.eval_env = eval_env
|
||||
self.verbose = verbose
|
||||
self.model, self.num_timesteps = None, 0
|
||||
self.best_model_save_path = best_model_save_path
|
||||
self.n_calls = 0
|
||||
|
||||
def __call__(self, locals_, globals_):
|
||||
"""
|
||||
:param locals_: (dict)
|
||||
:param globals_: (dict)
|
||||
:return: (bool)
|
||||
"""
|
||||
self.n_calls += 1
|
||||
self.model = locals_['self']
|
||||
self.num_timesteps = self.model.num_timesteps
|
||||
|
||||
if self.n_calls % self.eval_freq == 0:
|
||||
episode_rewards, _ = evaluate_policy(self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes,
|
||||
deterministic=self.deterministic, return_episode_rewards=True)
|
||||
|
||||
|
||||
mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
|
||||
if self.verbose > 0:
|
||||
print("Eval num_timesteps={}, "
|
||||
"episode_reward={:.2f} +/- {:.2f}".format(self.num_timesteps, mean_reward, std_reward))
|
||||
|
||||
if mean_reward > self.best_mean_reward:
|
||||
if self.best_model_save_path is not None:
|
||||
print("Saving best model")
|
||||
self.model.save(self.best_model_save_path)
|
||||
self.best_mean_reward = mean_reward
|
||||
|
||||
return True
|
||||
Reference in New Issue
Block a user