Split locomotion environments.

2017-08-15 21:33:55 +02:00
parent cab3de35e4
commit 68106d66dd
10 changed files with 525 additions and 444 deletions
--- a/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py
+++ b/examples/pybullet/gym/enjoy_TF_HopperBulletEnv_v0_2017may.py
@@ -8,7 +8,9 @@ def relu(x):
    return np.maximum(x, 0)

 class SmallReactivePolicy:
-    "Simple multi-layer perceptron policy, no internal state"
+    """
+    Simple multi-layer perceptron policy, no internal state
+    """
    def __init__(self, observation_space, action_space):
        assert weights_dense1_w.shape == (observation_space.shape[0], 128)
        assert weights_dense2_w.shape == (128, 64)
--- a/examples/pybullet/gym/envs/init.py
+++ b/examples/pybullet/gym/envs/init.py
@@ -53,32 +53,25 @@ register(

 register(
 	id='InvertedPendulumBulletEnv-v0',
-	entry_point='envs.gym_pendula_envs:InvertedPendulumBulletEnv',
+	entry_point='envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=950.0,
 	)

 register(
 	id='InvertedDoublePendulumBulletEnv-v0',
-	entry_point='envs.gym_pendula_envs:InvertedDoublePendulumBulletEnv',
+	entry_point='envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=9100.0,
 	)

 register(
 	id='InvertedPendulumSwingupBulletEnv-v0',
-	entry_point='envs.gym_pendula_envs:InvertedPendulumSwingupBulletEnv',
+	entry_point='envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=800.0,
 	)

-register(
-	id='HopperBulletEnv-v0',
-	entry_point='envs.gym_walker_envs:HopperBulletEnv',
-	max_episode_steps=1000,
-	reward_threshold=2500.0
-	)
-
 register(
 	id='ReacherBulletEnv-v0',
 	entry_point='envs.gym_manipulator_envs:ReacherBulletEnv',
@@ -109,26 +102,33 @@ register(

 register(
 	id='Walker2DBulletEnv-v0',
-	entry_point='envs.gym_walker_envs:Walker2DBulletEnv',
+	entry_point='envs.gym_locomotion_envs:Walker2DBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=2500.0
 	)
 register(
 	id='HalfCheetahBulletEnv-v0',
-	entry_point='envs.gym_walker_envs:HalfCheetahBulletEnv',
+	entry_point='envs.gym_locomotion_envs:HalfCheetahBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=3000.0
 	)

 register(
 	id='AntBulletEnv-v0',
-	entry_point='envs.gym_walker_envs:AntBulletEnv',
+	entry_point='envs.gym_locomotion_envs:AntBulletEnv',
 	max_episode_steps=1000,
 	reward_threshold=2500.0
 	)

 register(
 	id='HumanoidBulletEnv-v0',
-	entry_point='envs.gym_walker_envs:HumanoidBulletEnv',
+	entry_point='envs.gym_locomotion_envs:HumanoidBulletEnv',
 	max_episode_steps=1000
 	)
+
+register(
+	id='HopperBulletEnv-v0',
+	entry_point='envs.gym_locomotion_envs:HopperBulletEnv',
+	max_episode_steps=1000,
+	reward_threshold=2500.0
+	)
--- a/examples/pybullet/gym/envs/env_bases.py
+++ b/examples/pybullet/gym/envs/env_bases.py
@@ -1,12 +1,11 @@
 import gym, gym.spaces, gym.utils, gym.utils.seeding
 import numpy as np
 import pybullet as p
-import os


 class MujocoXmlBaseBulletEnv(gym.Env):
 	"""
-	Base class for MuJoCo .xml actors in a Scene.
+	Base class for MuJoCo .xml environments in a Scene.
 	These environments create single-player scenes and behave like normal Gym environments, if
 	you don't use multiplayer.
 	"""
@@ -16,112 +15,42 @@ class MujocoXmlBaseBulletEnv(gym.Env):
 		'video.frames_per_second': 60
 		}

-	self_collision = False
-
-	def __init__(self, model_xml, robot_name, action_dim, obs_dim):
+	def __init__(self, robot):
 		self.scene = None

-		self.parts = None
-		self.jdict = None
-		self.ordered_joints = None
-		self.robot_body = None
-
-		high = np.ones([action_dim])
-		self.action_space = gym.spaces.Box(-high, high)
-		high = np.inf*np.ones([obs_dim])
-		self.observation_space = gym.spaces.Box(-high, high)
-		self._seed()
-
-		self.model_xml = model_xml
-		self.robot_name = robot_name
-
 		self.camera = Camera()

+		self.robot = robot
+
+		self._seed()
+
+		self.action_space = robot.action_space
+		self.observation_space = robot.observation_space
+
 	def _seed(self, seed=None):
 		self.np_random, seed = gym.utils.seeding.np_random(seed)
+		self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
 		return [seed]

-	def addToScene(self, bodies):
-		if self.parts is not None:
-			parts = self.parts
-		else:
-			parts = {}
-
-		if self.jdict is not None:
-			joints = self.jdict
-		else:
-			joints = {}
-
-		if self.ordered_joints is not None:
-			ordered_joints = self.ordered_joints
-		else:
-			ordered_joints = []
-
-		dump = 0
-		for i in range(len(bodies)):
-			if p.getNumJoints(bodies[i]) == 0:
-				part_name, robot_name = p.getBodyInfo(bodies[i], 0)
-				robot_name = robot_name.decode("utf8")
-				part_name = part_name.decode("utf8")
-				parts[part_name] = BodyPart(part_name, bodies, i, -1)
-			for j in range(p.getNumJoints(bodies[i])):
-				_,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j)
-
-				joint_name = joint_name.decode("utf8")
-				part_name = part_name.decode("utf8")
-
-				if dump: print("ROBOT PART '%s'" % part_name)
-				if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) )
-
-				parts[part_name] = BodyPart(part_name, bodies, i, j)
-
-				if part_name == self.robot_name:
-					self.robot_body = parts[part_name]
-
-				if i == 0 and j == 0 and self.robot_body is None:  # if nothing else works, we take this as robot_body
-					parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1)
-					self.robot_body = parts[self.robot_name]
-
-				if joint_name[:6] == "ignore":
-					Joint(joint_name, bodies, i, j).disable_motor()
-					continue
-
-				if joint_name[:8] != "jointfix":
-					joints[joint_name] = Joint(joint_name, bodies, i, j)
-					ordered_joints.append(joints[joint_name])
-
-					joints[joint_name].power_coef = 100.0
-
-		return parts, joints, ordered_joints, self.robot_body
-
 	def _reset(self):
 		if self.scene is None:
 			self.scene = self.create_single_player_scene()
 		if not self.scene.multiplayer:
 			self.scene.episode_restart()

-		self.ordered_joints = []
+		self.robot.scene = self.scene
+
 		self.frame = 0
 		self.done = 0
 		self.reward = 0
 		dump = 0
-
-		if self.self_collision:
-			self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml), flags = p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS))
-		else:
-			self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(p.loadMJCF(os.path.join("mjcf", self.model_xml)))
-
-		self.robot_specific_reset()
-		s = self.calc_state()	# optimization: calc_state() can calculate something in self.* for calc_potential() to use
-		self.potential = self.calc_potential()
+		s = self.robot.reset()
+		self.potential = self.robot.calc_potential()
 		return s

 	def _render(self, mode, close):
 		pass

-	def calc_potential(self):
-		return 0
-
 	def HUD(self, state, a, done):
 		pass

@@ -134,113 +63,3 @@ class Camera:
 		distance = 10
 		yaw = 10
 		p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
-
-class Pose_Helper: # dummy class to comply to original interface
-	def __init__(self, body_part):
-		self.body_part = body_part
-
-	def xyz(self):
-		return self.body_part.current_position()
-
-	def rpy(self):
-		return p.getEulerFromQuaternion(self.body_part.current_orientation())
-
-	def orientation(self):
-		return self.body_part.current_orientation()
-
-class BodyPart:
-	def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex):
-		self.bodies = bodies
-		self.bodyIndex = bodyIndex
-		self.bodyPartIndex = bodyPartIndex
-		self.initialPosition = self.current_position()
-		self.initialOrientation = self.current_orientation()
-		self.bp_pose = Pose_Helper(self)
-
-	def state_fields_of_pose_of(self, body_id, link_id=-1):  # a method you will most probably need a lot to get pose and orientation
-		if link_id == -1:
-			(x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id)
-		else:
-			(x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id)
-		return np.array([x, y, z, a, b, c, d])
-
-	def get_pose(self):
-		return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex)
-
-	def speed(self):
-		if self.bodyPartIndex == -1:
-			(vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex])
-		else:
-			(x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
-		return np.array([vx, vy, vz])
-
-	def current_position(self):
-		return self.get_pose()[:3]
-
-	def current_orientation(self):
-		return self.get_pose()[3:]
-
-	def reset_position(self, position):
-		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation())
-
-	def reset_orientation(self, orientation):
-		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation)
-
-	def reset_pose(self, position, orientation):
-		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation)
-
-	def pose(self):
-		return self.bp_pose
-
-	def contact_list(self):
-		return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1)
-
-
-class Joint:
-	def __init__(self, joint_name, bodies, bodyIndex, jointIndex):
-		self.bodies = bodies
-		self.bodyIndex = bodyIndex
-		self.jointIndex = jointIndex
-		self.joint_name = joint_name
-		_,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex)
-		self.power_coeff = 0
-
-	def set_state(self, x, vx):
-		p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx)
-
-	def current_position(self): # just some synonyme method
-		return self.get_state()
-
-	def current_relative_position(self):
-		pos, vel = self.get_state()
-		pos_mid = 0.5 * (self.lowerLimit + self.upperLimit);
-		return (
-			2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit),
-			0.1 * vel
-		)
-
-	def get_state(self):
-		x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex)
-		return x, vx
-
-	def set_position(self, position):
-		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position)
-
-	def set_velocity(self, velocity):
-		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity)
-
-	def set_motor_torque(self, torque): # just some synonyme method
-		self.set_torque(torque)
-
-	def set_torque(self, torque):
-		p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1)
-
-	def reset_current_position(self, position, velocity): # just some synonyme method
-		self.reset_position(position, velocity)
-
-	def reset_position(self, position, velocity):
-		p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity)
-		self.disable_motor()
-
-	def disable_motor(self):
-		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0)
--- a/examples/pybullet/gym/envs/gym_locomotion_envs.py
+++ b/examples/pybullet/gym/envs/gym_locomotion_envs.py
@@ -0,0 +1,110 @@
+from scene_stadium import SinglePlayerStadiumScene
+from env_bases import MujocoXmlBaseBulletEnv
+import numpy as np
+from robot_locomotors import Hopper, Walker2D, HalfCheetah, Ant, Humanoid
+
+
+class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv):
+	def __init__(self, robot):
+		MujocoXmlBaseBulletEnv.__init__(self, robot)
+		self.camera_x = 0
+		self.walk_target_x = 1e3  # kilometer away
+		self.walk_target_y = 0
+
+	def create_single_player_scene(self):
+		self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
+		return self.stadium_scene
+
+	def _reset(self):
+		r = MujocoXmlBaseBulletEnv._reset(self)
+		self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(
+			self.stadium_scene.ground_plane_mjcf)
+		self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in
+							   self.foot_ground_object_names])
+		return r
+
+	def move_robot(self, init_x, init_y, init_z):
+		"Used by multiplayer stadium to move sideways, to another running lane."
+		self.cpp_robot.query_position()
+		pose = self.cpp_robot.root_part.pose()
+		pose.move_xyz(init_x, init_y, init_z)  # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
+		self.cpp_robot.set_pose(pose)
+
+	electricity_cost	 = -2.0	# cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
+	stall_torque_cost	= -0.1	# cost for running electric current through a motor even at zero rotational speed, small
+	foot_collision_cost  = -1.0	# touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
+	foot_ground_object_names = set(["floor"])  # to distinguish ground and other objects
+	joints_at_limit_cost = -0.1	# discourage stuck joints
+
+	def _step(self, a):
+		if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
+			self.robot.apply_action(a)
+			self.scene.global_step()
+
+		state = self.robot.calc_state()  # also calculates self.joints_at_limit
+
+		alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1]))   # state[0] is body height above ground, body_rpy[1] is pitch
+		done = alive < 0
+		if not np.isfinite(state).all():
+			print("~INF~", state)
+			done = True
+
+		potential_old = self.potential
+		self.potential = self.robot.calc_potential()
+		progress = float(self.potential - potential_old)
+
+		feet_collision_cost = 0.0
+		for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
+			contact_ids = set((x[2], x[4]) for x in f.contact_list())
+			#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
+			self.robot.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
+			if contact_ids - self.ground_ids:
+				feet_collision_cost += self.foot_collision_cost
+
+		electricity_cost  = self.electricity_cost  * float(np.abs(a*self.robot.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
+		electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
+
+		joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
+
+		self.rewards = [
+			alive,
+			progress,
+			electricity_cost,
+			joints_at_limit_cost,
+			feet_collision_cost
+			]
+
+		self.HUD(state, a, done)
+		return state, sum(self.rewards), bool(done), {}
+
+	def camera_adjust(self):
+		x, y, z = self.body_xyz
+		self.camera_x = 0.98*self.camera_x + (1-0.98)*x
+		self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
+
+class HopperBulletEnv(WalkerBaseBulletEnv):
+	def __init__(self):
+		self.robot = Hopper()
+		WalkerBaseBulletEnv.__init__(self, self.robot)
+
+class Walker2DBulletEnv(WalkerBaseBulletEnv):
+	def __init__(self):
+		self.robot = Walker2D()
+		WalkerBaseBulletEnv.__init__(self, self.robot)
+
+class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
+	def __init__(self):
+		self.robot = HalfCheetah()
+		WalkerBaseBulletEnv.__init__(self, self.robot)
+
+class AntBulletEnv(WalkerBaseBulletEnv):
+	def __init__(self):
+		self.robot = Ant()
+		WalkerBaseBulletEnv.__init__(self, self.robot)
+
+class HumanoidBulletEnv(WalkerBaseBulletEnv):
+	def __init__(self):
+		self.robot = Humanoid()
+		WalkerBaseBulletEnv.__init__(self, self.robot)
+		self.electricity_cost  = 4.25*WalkerBaseBulletEnv.electricity_cost
+		self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost
--- a/examples/pybullet/gym/envs/gym_manipulator_envs.py
+++ b/examples/pybullet/gym/envs/gym_manipulator_envs.py
@@ -1,4 +1,4 @@
-from pybulletgym.envs.scene_abstract import SingleRobotEmptyScene
+from scene_abstract import SingleRobotEmptyScene
 from env_bases import MujocoXmlBaseBulletEnv
 import gym, gym.spaces, gym.utils, gym.utils.seeding
 import numpy as np
--- a/examples/pybullet/gym/envs/gym_pendulum_envs.py
+++ b/examples/pybullet/gym/envs/gym_pendulum_envs.py
--- a/examples/pybullet/gym/envs/gym_walker_envs.py
+++ b/examples/pybullet/gym/envs/gym_walker_envs.py
@@ -1,233 +0,0 @@
-from distutils.command.config import config
-
-from scene_abstract import SingleRobotEmptyScene
-from scene_stadium import SinglePlayerStadiumScene
-from env_bases import MujocoXmlBaseBulletEnv
-import gym, gym.spaces, gym.utils, gym.utils.seeding
-import numpy as np
-import os, sys
-
-class WalkerBaseBulletEnv(MujocoXmlBaseBulletEnv):
-	def __init__(self, fn, robot_name, action_dim, obs_dim, power):
-		MujocoXmlBaseBulletEnv.__init__(self, fn, robot_name, action_dim, obs_dim)
-		self.power = power
-		self.camera_x = 0
-		self.walk_target_x = 1e3  # kilometer away
-		self.walk_target_y = 0
-
-	def create_single_player_scene(self):
-		self.stadium_scene = SinglePlayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4)
-		return self.stadium_scene
-
-	def robot_specific_reset(self):
-		for j in self.ordered_joints:
-			j.reset_current_position(self.np_random.uniform( low=-0.1, high=0.1 ), 0)
-
-		self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self.stadium_scene.ground_plane_mjcf)
-		self.feet = [self.parts[f] for f in self.foot_list]
-		self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
-		self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in self.foot_ground_object_names])
-		self.scene.actor_introduce(self)
-		self.initial_z = None
-
-	def move_robot(self, init_x, init_y, init_z):
-		"Used by multiplayer stadium to move sideways, to another running lane."
-		self.cpp_robot.query_position()
-		pose = self.cpp_robot.root_part.pose()
-		pose.move_xyz(init_x, init_y, init_z)  # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
-		self.cpp_robot.set_pose(pose)
-
-	def apply_action(self, a):
-		assert( np.isfinite(a).all() )
-		for n,j in enumerate(self.ordered_joints):
-			j.set_motor_torque( self.power*j.power_coef*float(np.clip(a[n], -1, +1)) )
-
-	def calc_state(self):
-		j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
-		# even elements [0::2] position, scaled to -1..+1 between limits
-		# odd elements  [1::2] angular speed, scaled to show -1..+1
-		self.joint_speeds = j[1::2]
-		self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
-
-		body_pose = self.robot_body.pose()
-		parts_xyz = np.array( [p.pose().xyz() for p in self.parts.values()] ).flatten()
-		self.body_xyz = (parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2])  # torso z is more informative than mean z
-		self.body_rpy = body_pose.rpy()
-		z = self.body_xyz[2]
-		r, p, yaw = self.body_rpy
-		if self.initial_z==None:
-			self.initial_z = z
-		self.walk_target_theta = np.arctan2( self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0] )
-		self.walk_target_dist  = np.linalg.norm( [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]] )
-		angle_to_target = self.walk_target_theta - yaw
-
-		rot_speed = np.array(
-			[[np.cos(-yaw), -np.sin(-yaw), 0],
-			 [np.sin(-yaw),  np.cos(-yaw), 0],
-			 [		   0,			 0, 1]]
-			)
-		vx, vy, vz = np.dot(rot_speed, self.robot_body.speed())  # rotate speed back to body point of view
-
-		more = np.array([
-			z-self.initial_z,
-			np.sin(angle_to_target), np.cos(angle_to_target),
-			0.3*vx, 0.3*vy, 0.3*vz,	# 0.3 is just scaling typical speed into -1..+1, no physical sense here
-			r, p], dtype=np.float32)
-		return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
-
-	def calc_potential(self):
-		# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
-		# all rewards have rew/frame units and close to 1.0
-		return - self.walk_target_dist / self.scene.dt
-
-	electricity_cost	 = -2.0	# cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
-	stall_torque_cost	= -0.1	# cost for running electric current through a motor even at zero rotational speed, small
-	foot_collision_cost  = -1.0	# touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
-	foot_ground_object_names = set(["floor"])  # to distinguish ground and other objects
-	joints_at_limit_cost = -0.1	# discourage stuck joints
-
-	def _step(self, a):
-		if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
-			self.apply_action(a)
-			self.scene.global_step()
-
-		state = self.calc_state()  # also calculates self.joints_at_limit
-
-		alive = float(self.alive_bonus(state[0]+self.initial_z, self.body_rpy[1]))   # state[0] is body height above ground, body_rpy[1] is pitch
-		done = alive < 0
-		if not np.isfinite(state).all():
-			print("~INF~", state)
-			done = True
-
-		potential_old = self.potential
-		self.potential = self.calc_potential()
-		progress = float(self.potential - potential_old)
-
-		feet_collision_cost = 0.0
-		for i,f in enumerate(self.feet):
-			contact_ids = set((x[2], x[4]) for x in f.contact_list())
-			#print("CONTACT OF '%s' WITH %s" % (f.name, ",".join(contact_names)) )
-			self.feet_contact[i] = 1.0 if (self.ground_ids & contact_ids) else 0.0
-			if contact_ids - self.ground_ids:
-				feet_collision_cost += self.foot_collision_cost
-
-		electricity_cost  = self.electricity_cost  * float(np.abs(a*self.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
-		electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
-
-		joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit)
-
-		self.rewards = [
-			alive,
-			progress,
-			electricity_cost,
-			joints_at_limit_cost,
-			feet_collision_cost
-			]
-
-		self.HUD(state, a, done)
-		return state, sum(self.rewards), bool(done), {}
-
-	def camera_adjust(self):
-		x, y, z = self.body_xyz
-		self.camera_x = 0.98*self.camera_x + (1-0.98)*x
-		self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
-
-class HopperBulletEnv(WalkerBaseBulletEnv):
-	foot_list = ["foot"]
-	def __init__(self):
-		WalkerBaseBulletEnv.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75)
-	def alive_bonus(self, z, pitch):
-		return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
-
-class Walker2DBulletEnv(WalkerBaseBulletEnv):
-	foot_list = ["foot", "foot_left"]
-	def __init__(self):
-		WalkerBaseBulletEnv.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40)
-	def alive_bonus(self, z, pitch):
-		return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
-	def robot_specific_reset(self):
-		WalkerBaseBulletEnv.robot_specific_reset(self)
-		for n in ["foot_joint", "foot_left_joint"]:
-			self.jdict[n].power_coef = 30.0
-
-class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
-	foot_list = ["ffoot", "fshin", "fthigh",  "bfoot", "bshin", "bthigh"]  # track these contacts with ground
-	def __init__(self):
-		WalkerBaseBulletEnv.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90)
-	def alive_bonus(self, z, pitch):
-		# Use contact other than feet to terminate episode: due to a lot of strange walks using knees
-		return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
-	def robot_specific_reset(self):
-		WalkerBaseBulletEnv.robot_specific_reset(self)
-		self.jdict["bthigh"].power_coef = 120.0
-		self.jdict["bshin"].power_coef  = 90.0
-		self.jdict["bfoot"].power_coef  = 60.0
-		self.jdict["fthigh"].power_coef = 140.0
-		self.jdict["fshin"].power_coef  = 60.0
-		self.jdict["ffoot"].power_coef  = 30.0
-
-class AntBulletEnv(WalkerBaseBulletEnv):
-	foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
-	def __init__(self):
-		WalkerBaseBulletEnv.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5)
-	def alive_bonus(self, z, pitch):
-		return +1 if z > 0.26 else -1  # 0.25 is central sphere rad, die if it scrapes the ground
-
-
-## 3d Humanoid ##
-
-class HumanoidBulletEnv(WalkerBaseBulletEnv):
-	self_collision = True
-	foot_list = ["right_foot", "left_foot"]  # "left_hand", "right_hand"
-
-	def __init__(self):
-		WalkerBaseBulletEnv.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41)
-		# 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
-		self.electricity_cost  = 4.25*WalkerBaseBulletEnv.electricity_cost
-		self.stall_torque_cost = 4.25*WalkerBaseBulletEnv.stall_torque_cost
-
-	def robot_specific_reset(self):
-		WalkerBaseBulletEnv.robot_specific_reset(self)
-		self.motor_names  = ["abdomen_z", "abdomen_y", "abdomen_x"]
-		self.motor_power  = [100, 100, 100]
-		self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
-		self.motor_power += [100, 100, 300, 200]
-		self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
-		self.motor_power += [100, 100, 300, 200]
-		self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
-		self.motor_power += [75, 75, 75]
-		self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
-		self.motor_power += [75, 75, 75]
-		self.motors = [self.jdict[n] for n in self.motor_names]
-		# if self.random_yaw: # TODO: Make leaning work as soon as the rest works
-		# 	cpose = cpp_household.Pose()
-		# 	yaw = self.np_random.uniform(low=-3.14, high=3.14)
-		# 	if self.random_lean and self.np_random.randint(2)==0:
-		# 		cpose.set_xyz(0, 0, 1.4)
-		# 		if self.np_random.randint(2)==0:
-		# 			pitch = np.pi/2
-		# 			cpose.set_xyz(0, 0, 0.45)
-		# 		else:
-		# 			pitch = np.pi*3/2
-		# 			cpose.set_xyz(0, 0, 0.25)
-		# 		roll = 0
-		# 		cpose.set_rpy(roll, pitch, yaw)
-		# 	else:
-		# 		cpose.set_xyz(0, 0, 1.4)
-		# 		cpose.set_rpy(0, 0, yaw)  # just face random direction, but stay straight otherwise
-		# 	self.cpp_robot.set_pose_and_speed(cpose, 0,0,0)
-		self.initial_z = 0.8
-
-	random_yaw = False
-	random_lean = False
-
-	def apply_action(self, a):
-		assert( np.isfinite(a).all() )
-		force_gain = 1
-		for i, m, power in zip(range(17), self.motors, self.motor_power):
-			m.set_motor_torque( float(force_gain * power*self.power*a[i]) )
-			#m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
-
-	def alive_bonus(self, z, pitch):
-		return +2 if z > 0.78 else -1   # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
-
--- a/examples/pybullet/gym/envs/robot_bases.py
+++ b/examples/pybullet/gym/envs/robot_bases.py
@@ -0,0 +1,208 @@
+import pybullet as p
+import gym, gym.spaces, gym.utils
+import numpy as np
+import os
+
+
+class MujocoXmlBasedRobot:
+	"""
+	Base class for mujoco .xml based agents.
+	"""
+
+	self_collision = False
+
+	def __init__(self, model_xml, robot_name, action_dim, obs_dim):
+		self.parts = None
+		self.jdict = None
+		self.ordered_joints = None
+		self.robot_body = None
+
+		high = np.ones([action_dim])
+		self.action_space = gym.spaces.Box(-high, high)
+		high = np.inf * np.ones([obs_dim])
+		self.observation_space = gym.spaces.Box(-high, high)
+
+		self.model_xml = model_xml
+		self.robot_name = robot_name
+
+	def addToScene(self, bodies):
+		if self.parts is not None:
+			parts = self.parts
+		else:
+			parts = {}
+
+		if self.jdict is not None:
+			joints = self.jdict
+		else:
+			joints = {}
+
+		if self.ordered_joints is not None:
+			ordered_joints = self.ordered_joints
+		else:
+			ordered_joints = []
+
+		dump = 0
+		for i in range(len(bodies)):
+			if p.getNumJoints(bodies[i]) == 0:
+				part_name, robot_name = p.getBodyInfo(bodies[i], 0)
+				robot_name = robot_name.decode("utf8")
+				part_name = part_name.decode("utf8")
+				parts[part_name] = BodyPart(part_name, bodies, i, -1)
+			for j in range(p.getNumJoints(bodies[i])):
+				_,joint_name,_,_,_,_,_,_,_,_,_,_,part_name = p.getJointInfo(bodies[i], j)
+
+				joint_name = joint_name.decode("utf8")
+				part_name = part_name.decode("utf8")
+
+				if dump: print("ROBOT PART '%s'" % part_name)
+				if dump: print("ROBOT JOINT '%s'" % joint_name) # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) )
+
+				parts[part_name] = BodyPart(part_name, bodies, i, j)
+
+				if part_name == self.robot_name:
+					self.robot_body = parts[part_name]
+
+				if i == 0 and j == 0 and self.robot_body is None:  # if nothing else works, we take this as robot_body
+					parts[self.robot_name] = BodyPart(self.robot_name, bodies, 0, -1)
+					self.robot_body = parts[self.robot_name]
+
+				if joint_name[:6] == "ignore":
+					Joint(joint_name, bodies, i, j).disable_motor()
+					continue
+
+				if joint_name[:8] != "jointfix":
+					joints[joint_name] = Joint(joint_name, bodies, i, j)
+					ordered_joints.append(joints[joint_name])
+
+					joints[joint_name].power_coef = 100.0
+
+		return parts, joints, ordered_joints, self.robot_body
+
+	def reset(self):
+		self.ordered_joints = []
+
+		if self.self_collision:
+			self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
+				p.loadMJCF(os.path.join("mjcf", self.model_xml), flags=p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS))
+		else:
+			self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(
+				p.loadMJCF(os.path.join("mjcf", self.model_xml)))
+
+		self.robot_specific_reset()
+
+		s = self.calc_state()  # optimization: calc_state() can calculate something in self.* for calc_potential() to use
+
+		return s
+
+	def calc_potential(self):
+		return 0
+
+class Pose_Helper: # dummy class to comply to original interface
+	def __init__(self, body_part):
+		self.body_part = body_part
+
+	def xyz(self):
+		return self.body_part.current_position()
+
+	def rpy(self):
+		return p.getEulerFromQuaternion(self.body_part.current_orientation())
+
+	def orientation(self):
+		return self.body_part.current_orientation()
+
+class BodyPart:
+	def __init__(self, body_name, bodies, bodyIndex, bodyPartIndex):
+		self.bodies = bodies
+		self.bodyIndex = bodyIndex
+		self.bodyPartIndex = bodyPartIndex
+		self.initialPosition = self.current_position()
+		self.initialOrientation = self.current_orientation()
+		self.bp_pose = Pose_Helper(self)
+
+	def state_fields_of_pose_of(self, body_id, link_id=-1):  # a method you will most probably need a lot to get pose and orientation
+		if link_id == -1:
+			(x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id)
+		else:
+			(x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id)
+		return np.array([x, y, z, a, b, c, d])
+
+	def get_pose(self):
+		return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex)
+
+	def speed(self):
+		if self.bodyPartIndex == -1:
+			(vx, vy, vz), _ = p.getBaseVelocity(self.bodies[self.bodyIndex])
+		else:
+			(x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
+		return np.array([vx, vy, vz])
+
+	def current_position(self):
+		return self.get_pose()[:3]
+
+	def current_orientation(self):
+		return self.get_pose()[3:]
+
+	def reset_position(self, position):
+		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation())
+
+	def reset_orientation(self, orientation):
+		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation)
+
+	def reset_pose(self, position, orientation):
+		p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation)
+
+	def pose(self):
+		return self.bp_pose
+
+	def contact_list(self):
+		return p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1)
+
+
+class Joint:
+	def __init__(self, joint_name, bodies, bodyIndex, jointIndex):
+		self.bodies = bodies
+		self.bodyIndex = bodyIndex
+		self.jointIndex = jointIndex
+		self.joint_name = joint_name
+		_,_,_,_,_,_,_,_,self.lowerLimit, self.upperLimit,_,_,_ = p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex)
+		self.power_coeff = 0
+
+	def set_state(self, x, vx):
+		p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx)
+
+	def current_position(self): # just some synonyme method
+		return self.get_state()
+
+	def current_relative_position(self):
+		pos, vel = self.get_state()
+		pos_mid = 0.5 * (self.lowerLimit + self.upperLimit);
+		return (
+			2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit),
+			0.1 * vel
+		)
+
+	def get_state(self):
+		x, vx,_,_ = p.getJointState(self.bodies[self.bodyIndex],self.jointIndex)
+		return x, vx
+
+	def set_position(self, position):
+		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.POSITION_CONTROL, targetPosition=position)
+
+	def set_velocity(self, velocity):
+		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,p.VELOCITY_CONTROL, targetVelocity=velocity)
+
+	def set_motor_torque(self, torque): # just some synonyme method
+		self.set_torque(torque)
+
+	def set_torque(self, torque):
+		p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=p.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1)
+
+	def reset_current_position(self, position, velocity): # just some synonyme method
+		self.reset_position(position, velocity)
+
+	def reset_position(self, position, velocity):
+		p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity)
+		self.disable_motor()
+
+	def disable_motor(self):
+		p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=p.VELOCITY_CONTROL, force=0)
--- a/examples/pybullet/gym/envs/robot_locomotors.py
+++ b/examples/pybullet/gym/envs/robot_locomotors.py
@@ -0,0 +1,175 @@
+from robot_bases import MujocoXmlBasedRobot
+import numpy as np
+
+
+class WalkerBase(MujocoXmlBasedRobot):
+	def __init__(self, fn, robot_name, action_dim, obs_dim, power):
+		MujocoXmlBasedRobot.__init__(self, fn, robot_name, action_dim, obs_dim)
+		self.power = power
+		self.camera_x = 0
+		self.walk_target_x = 1e3  # kilometer away
+		self.walk_target_y = 0
+
+	def robot_specific_reset(self):
+		for j in self.ordered_joints:
+			j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0)
+
+		self.feet = [self.parts[f] for f in self.foot_list]
+		self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
+		self.scene.actor_introduce(self)
+		self.initial_z = None
+
+	def apply_action(self, a):
+		assert (np.isfinite(a).all())
+		for n, j in enumerate(self.ordered_joints):
+			j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1)))
+
+	def calc_state(self):
+		j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
+		# even elements [0::2] position, scaled to -1..+1 between limits
+		# odd elements  [1::2] angular speed, scaled to show -1..+1
+		self.joint_speeds = j[1::2]
+		self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
+
+		body_pose = self.robot_body.pose()
+		parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten()
+		self.body_xyz = (
+		parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2])  # torso z is more informative than mean z
+		self.body_rpy = body_pose.rpy()
+		z = self.body_xyz[2]
+		if self.initial_z == None:
+			self.initial_z = z
+		r, p, yaw = self.body_rpy
+		self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1],
+											self.walk_target_x - self.body_xyz[0])
+		self.walk_target_dist = np.linalg.norm(
+			[self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]])
+		angle_to_target = self.walk_target_theta - yaw
+
+		rot_speed = np.array(
+			[[np.cos(-yaw), -np.sin(-yaw), 0],
+			 [np.sin(-yaw), np.cos(-yaw), 0],
+			 [		0,			 0, 1]]
+		)
+		vx, vy, vz = np.dot(rot_speed, self.robot_body.speed())  # rotate speed back to body point of view
+
+		more = np.array([ z-self.initial_z,
+			np.sin(angle_to_target), np.cos(angle_to_target),
+			0.3* vx , 0.3* vy , 0.3* vz ,  # 0.3 is just scaling typical speed into -1..+1, no physical sense here
+			r, p], dtype=np.float32)
+		return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
+
+	def calc_potential(self):
+		# progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
+		# all rewards have rew/frame units and close to 1.0
+		return - self.walk_target_dist / self.scene.dt
+
+
+class Hopper(WalkerBase):
+	foot_list = ["foot"]
+
+	def __init__(self):
+		WalkerBase.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15, power=0.75)
+
+	def alive_bonus(self, z, pitch):
+		return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
+
+
+class Walker2D(WalkerBase):
+	foot_list = ["foot", "foot_left"]
+
+	def __init__(self):
+		WalkerBase.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22, power=0.40)
+
+	def alive_bonus(self, z, pitch):
+		return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
+
+	def robot_specific_reset(self):
+		WalkerBase.robot_specific_reset(self)
+		for n in ["foot_joint", "foot_left_joint"]:
+			self.jdict[n].power_coef = 30.0
+
+
+class HalfCheetah(WalkerBase):
+	foot_list = ["ffoot", "fshin", "fthigh",  "bfoot", "bshin", "bthigh"]  # track these contacts with ground
+
+	def __init__(self):
+		WalkerBase.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26, power=0.90)
+
+	def alive_bonus(self, z, pitch):
+		# Use contact other than feet to terminate episode: due to a lot of strange walks using knees
+		return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
+
+	def robot_specific_reset(self):
+		WalkerBase.robot_specific_reset(self)
+		self.jdict["bthigh"].power_coef = 120.0
+		self.jdict["bshin"].power_coef  = 90.0
+		self.jdict["bfoot"].power_coef  = 60.0
+		self.jdict["fthigh"].power_coef = 140.0
+		self.jdict["fshin"].power_coef  = 60.0
+		self.jdict["ffoot"].power_coef  = 30.0
+
+
+class Ant(WalkerBase):
+	foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
+
+	def __init__(self):
+		WalkerBase.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28, power=10.5)
+
+	def alive_bonus(self, z, pitch):
+		return +1 if z > 0.26 else -1  # 0.25 is central sphere rad, die if it scrapes the ground
+
+
+class Humanoid(WalkerBase):
+	self_collision = True
+	foot_list = ["right_foot", "left_foot"]  # "left_hand", "right_hand"
+
+	def __init__(self):
+		WalkerBase.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44, power=0.41)
+		# 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
+
+	def robot_specific_reset(self):
+		WalkerBase.robot_specific_reset(self)
+		self.motor_names  = ["abdomen_z", "abdomen_y", "abdomen_x"]
+		self.motor_power  = [100, 100, 100]
+		self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
+		self.motor_power += [100, 100, 300, 200]
+		self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
+		self.motor_power += [100, 100, 300, 200]
+		self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
+		self.motor_power += [75, 75, 75]
+		self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
+		self.motor_power += [75, 75, 75]
+		self.motors = [self.jdict[n] for n in self.motor_names]
+		# if self.random_yaw: # TODO: Make leaning work as soon as the rest works
+		# 	cpose = cpp_household.Pose()
+		# 	yaw = self.np_random.uniform(low=-3.14, high=3.14)
+		# 	if self.random_lean and self.np_random.randint(2)==0:
+		# 		cpose.set_xyz(0, 0, 1.4)
+		# 		if self.np_random.randint(2)==0:
+		# 			pitch = np.pi/2
+		# 			cpose.set_xyz(0, 0, 0.45)
+		# 		else:
+		# 			pitch = np.pi*3/2
+		# 			cpose.set_xyz(0, 0, 0.25)
+		# 		roll = 0
+		# 		cpose.set_rpy(roll, pitch, yaw)
+		# 	else:
+		# 		cpose.set_xyz(0, 0, 1.4)
+		# 		cpose.set_rpy(0, 0, yaw)  # just face random direction, but stay straight otherwise
+		# 	self.cpp_robot.set_pose_and_speed(cpose, 0,0,0)
+		self.initial_z = 0.8
+
+	random_yaw = False
+	random_lean = False
+
+	def apply_action(self, a):
+		assert( np.isfinite(a).all() )
+		force_gain = 1
+		for i, m, power in zip(range(17), self.motors, self.motor_power):
+			m.set_motor_torque( float(force_gain * power*self.power*a[i]) )
+			#m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
+
+	def alive_bonus(self, z, pitch):
+		return +2 if z > 0.78 else -1   # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
+
--- a/examples/pybullet/gym/envs/robot_manipulators.py
+++ b/examples/pybullet/gym/envs/robot_manipulators.py