Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster.

Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray)
2018-01-15 12:48:32 -08:00
parent 9ffb05eb3b
commit 851ca5bfb3
22 changed files with 2173 additions and 102 deletions
--- a/examples/pybullet/gym/pybullet_envs/robot_locomotors.py
+++ b/examples/pybullet/gym/pybullet_envs/robot_locomotors.py
@@ -1,16 +1,20 @@
-from robot_bases import MJCFBasedRobot
+from robot_bases import XmlBasedRobot, MJCFBasedRobot, URDFBasedRobot
 import numpy as np
-
+import pybullet as p
+import os
+import pybullet_data
+from robot_bases import BodyPart

 class WalkerBase(MJCFBasedRobot):
 	def __init__(self, fn, robot_name, action_dim, obs_dim, power):
 		MJCFBasedRobot.__init__(self, fn, robot_name, action_dim, obs_dim)
 		self.power = power
 		self.camera_x = 0
+		self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0
 		self.walk_target_x = 1e3  # kilometer away
 		self.walk_target_y = 0
 		self.body_xyz=[0,0,0]
-	
+
 	def robot_specific_reset(self):
 		for j in self.ordered_joints:
 			j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0)
@@ -186,3 +190,124 @@ class Humanoid(WalkerBase):
 	def alive_bonus(self, z, pitch):
 		return +2 if z > 0.78 else -1   # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying

+
+
+
+def get_cube(x, y, z):	
+	body = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cube_small.urdf"), x, y, z)
+	part_name, _ = p.getBodyInfo(body, 0)
+	part_name = part_name.decode("utf8")
+	bodies = [body]
+	return BodyPart(part_name, bodies, 0, -1)
+
+
+def get_sphere(x, y, z):
+	body = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"sphere2red.urdf"), x, y, z)
+	part_name, _ = p.getBodyInfo(body, 0)
+	part_name = part_name.decode("utf8")
+	bodies = [body]
+	return BodyPart(part_name, bodies, 0, -1)
+
+class HumanoidFlagrun(Humanoid):
+	def __init__(self):
+		Humanoid.__init__(self)
+		self.flag = None
+		
+	def robot_specific_reset(self):
+		Humanoid.robot_specific_reset(self)
+		self.flag_reposition()
+
+	def flag_reposition(self):
+		self.walk_target_x = self.np_random.uniform(low=-self.scene.stadium_halflen,   high=+self.scene.stadium_halflen)
+		self.walk_target_y = self.np_random.uniform(low=-self.scene.stadium_halfwidth, high=+self.scene.stadium_halfwidth)
+		more_compact = 0.5  # set to 1.0 whole football field
+		self.walk_target_x *= more_compact
+		self.walk_target_y *= more_compact
+		
+		if (self.flag):
+			#for b in self.flag.bodies:
+			#	print("remove body uid",b)
+			#	p.removeBody(b)
+			p.resetBasePositionAndOrientation(self.flag.bodies[0],[self.walk_target_x, self.walk_target_y, 0.7],[0,0,0,1])
+		else:
+			self.flag = get_sphere(self.walk_target_x, self.walk_target_y, 0.7)
+		self.flag_timeout = 200
+
+	def calc_state(self):
+		self.flag_timeout -= 1
+		state = Humanoid.calc_state(self)
+		if self.walk_target_dist < 1 or self.flag_timeout <= 0:
+			self.flag_reposition()
+			state = Humanoid.calc_state(self)  # caclulate state again, against new flag pos
+			self.potential = self.calc_potential()	   # avoid reward jump
+		return state
+
+
+class HumanoidFlagrunHarder(HumanoidFlagrun):
+	def __init__(self):
+		HumanoidFlagrun.__init__()
+
+	def robot_specific_reset(self):
+		HumanoidFlagrun.robot_specific_reset(self)
+		if (self.aggressive_cube):
+			p.resetBasePositionAndOrientation(self.aggressive_cube.bodies[0],[-1.5,0,0.05],[0,0,0,1])
+		else:
+			self.aggressive_cube = get_cube(-1.5,0,0.05)
+		self.on_ground_frame_counter = 0
+		self.crawl_start_potential = None
+		self.crawl_ignored_potential = 0.0
+		self.initial_z = 0.8
+
+	def alive_bonus(self, z, pitch):
+		if self.frame%30==0 and self.frame>100 and self.on_ground_frame_counter==0:
+			target_xyz  = np.array(self.body_xyz)
+			robot_speed = np.array(self.robot_body.speed())
+			angle = self.np_random.uniform(low=-3.14, high=3.14)
+			from_dist   = 4.0
+			attack_speed   = self.np_random.uniform(low=20.0, high=30.0)  # speed 20..30 (* mass in cube.urdf = impulse)
+			time_to_travel = from_dist / attack_speed
+			target_xyz += robot_speed*time_to_travel  # predict future position at the moment the cube hits the robot
+			position = [target_xyz[0] + from_dist*np.cos(angle),
+				target_xyz[1] + from_dist*np.sin(angle),
+				target_xyz[2] + 1.0]
+			attack_speed_vector = target_xyz - np.array(position)
+			attack_speed_vector *= attack_speed / np.linalg.norm(attack_speed_vector)
+			attack_speed_vector += self.np_random.uniform(low=-1.0, high=+1.0, size=(3,))
+			self.aggressive_cube.reset_position(position)
+			self.aggressive_cube.reset_velocity(linearVelocity=attack_speed_vector)
+		if z < 0.8:
+			self.on_ground_frame_counter += 1
+		elif self.on_ground_frame_counter > 0:
+			self.on_ground_frame_counter -= 1
+		# End episode if the robot can't get up in 170 frames, to save computation and decorrelate observations.
+		return self.potential_leak() if self.on_ground_frame_counter<170 else -1
+
+	def potential_leak(self):
+		z = self.body_xyz[2]		  # 0.00 .. 0.8 .. 1.05 normal walk, 1.2 when jumping
+		z = np.clip(z, 0, 0.8)
+		return z/0.8 + 1.0			# 1.00 .. 2.0
+
+	def calc_potential(self):
+		# We see alive bonus here as a leak from potential field. Value V(s) of a given state equals
+		# potential, if it is topped up with gamma*potential every frame. Gamma is assumed 0.99.
+		#
+		# 2.0 alive bonus if z>0.8, potential is 200, leak gamma=0.99, (1-0.99)*200==2.0
+		# 1.0 alive bonus on the ground z==0, potential is 100, leak (1-0.99)*100==1.0
+		#
+		# Why robot whould stand up: to receive 100 points in potential field difference.
+		flag_running_progress = Humanoid.calc_potential(self)
+
+		# This disables crawl.
+		if self.body_xyz[2] < 0.8:
+			if self.crawl_start_potential is None:
+				self.crawl_start_potential = flag_running_progress - self.crawl_ignored_potential
+				#print("CRAWL START %+0.1f %+0.1f" % (self.crawl_start_potential, flag_running_progress))
+			self.crawl_ignored_potential = flag_running_progress - self.crawl_start_potential
+			flag_running_progress  = self.crawl_start_potential
+		else:
+			#print("CRAWL STOP %+0.1f %+0.1f" % (self.crawl_ignored_potential, flag_running_progress))
+			flag_running_progress -= self.crawl_ignored_potential
+			self.crawl_start_potential = None
+
+		return flag_running_progress + self.potential_leak()*100
+