Files
bullet3/examples/pybullet/gym/pybullet_envs/bullet/minitaurGymEnv.py
Erwin Coumans 21f9d1b816 refactor pybullet/gym to allow instantiating environments directly from a pybullet install:
work-in-progress (need to add missing data files, fix paths etc)

example:

pip install pybullet
pip install gym

python
import gym
import pybullet
import pybullet_envs
env = gym.make("HumanoidBulletEnv-v0")
2017-08-22 00:42:02 -07:00

114 lines
3.7 KiB
Python

import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import pybullet as p
import minitaur_new
class MinitaurGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self,
urdfRoot="",
actionRepeat=1,
isEnableSelfCollision=True,
motorVelocityLimit=10.0,
render=False):
self._timeStep = 0.01
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._motorVelocityLimit = motorVelocityLimit
self._isEnableSelfCollision = isEnableSelfCollision
self._observation = []
self._envStepCounter = 0
self._render = render
self._lastBasePosition = [0, 0, 0]
if self._render:
p.connect(p.GUI)
else:
p.connect(p.DIRECT)
self._seed()
self.reset()
observationDim = self._minitaur.getObservationDimension()
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
actionDim = 8
action_high = np.array([1] * actionDim)
self.action_space = spaces.Box(-action_high, action_high)
self.observation_space = spaces.Box(-observation_high, observation_high)
self.viewer = None
def _reset(self):
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=300)
p.setTimeStep(self._timeStep)
p.loadURDF("%splane.urdf" % self._urdfRoot)
p.setGravity(0,0,-10)
self._minitaur = minitaur_new.Minitaur(urdfRootPath=self._urdfRoot, timeStep=self._timeStep, isEnableSelfCollision=self._isEnableSelfCollision, motorVelocityLimit=self._motorVelocityLimit)
self._envStepCounter = 0
self._lastBasePosition = [0, 0, 0]
for i in range(100):
p.stepSimulation()
self._observation = self._minitaur.getObservation()
return self._observation
def __del__(self):
p.disconnect()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _step(self, action):
if (self._render):
basePos = self._minitaur.getBasePosition()
p.resetDebugVisualizerCamera(1, 30, 40, basePos)
if len(action) != self._minitaur.getActionDimension():
raise ValueError("We expect {} continuous action not {}.".format(self._minitaur.getActionDimension(), len(action)))
for i in range(len(action)):
if not -1.01 <= action[i] <= 1.01:
raise ValueError("{}th action should be between -1 and 1 not {}.".format(i, action[i]))
realAction = self._minitaur.convertFromLegModel(action)
self._minitaur.applyAction(realAction)
for i in range(self._actionRepeat):
p.stepSimulation()
if self._render:
time.sleep(self._timeStep)
self._observation = self._minitaur.getObservation()
if self._termination():
break
self._envStepCounter += 1
reward = self._reward()
done = self._termination()
return np.array(self._observation), reward, done, {}
def _render(self, mode='human', close=False):
return
def is_fallen(self):
orientation = self._minitaur.getBaseOrientation()
rotMat = p.getMatrixFromQuaternion(orientation)
localUp = rotMat[6:]
return np.dot(np.asarray([0, 0, 1]), np.asarray(localUp)) < 0 or self._observation[-1] < 0.1
def _termination(self):
return self.is_fallen()
def _reward(self):
currentBasePosition = self._minitaur.getBasePosition()
forward_reward = currentBasePosition[0] - self._lastBasePosition[0]
self._lastBasePosition = currentBasePosition
energyWeight = 0.001
energy = np.abs(np.dot(self._minitaur.getMotorTorques(), self._minitaur.getMotorVelocities())) * self._timeStep
energy_reward = energyWeight * energy
reward = forward_reward - energy_reward
return reward