import os import math import gym from gym import spaces from gym.utils import seeding import numpy as np import time import pybullet as p from . import simpleHumanoid import random class SimpleHumanoidGymEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : 50 } def __init__(self, urdfRoot="", actionRepeat=50, isEnableSelfCollision=True, renders=True): print("init") self._timeStep = 0.01 self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._isEnableSelfCollision = isEnableSelfCollision self._observation = [] self._envStepCounter = 0 self._renders = renders self._p = p if self._renders: p.connect(p.GUI) else: p.connect(p.DIRECT) self._seed() self.reset() observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim) self.action_space = spaces.Discrete(9) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None def _reset(self): p.resetSimulation() #p.setPhysicsEngineParameter(numSolverIterations=300) p.setTimeStep(self._timeStep) p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf")) dist = 5 +2.*random.random() ang = 2.*3.1415925438*random.random() ballx = dist * math.sin(ang) bally = dist * math.cos(ang) ballz = 1 p.setGravity(0,0,-10) self._humanoid = simpleHumanoid.SimpleHumanoid(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 p.stepSimulation() self._observation = self.getExtendedObservation() return np.array(self._observation) def __del__(self): p.disconnect() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def getExtendedObservation(self): self._observation = self._humanoid.getObservation() return self._observation def _step(self, action): self._humanoid.applyAction(action) for i in range(self._actionRepeat): p.stepSimulation() if self._renders: time.sleep(self._timeStep) self._observation = self.getExtendedObservation() if self._termination(): break self._envStepCounter += 1 reward = self._reward() done = self._termination() #print("len=%r" % len(self._observation)) return np.array(self._observation), reward, done, {} def _render(self, mode='human', close=False): return def _termination(self): return self._envStepCounter>1000 def _reward(self): reward=self._humanoid.distance print(reward) return reward