add yapf style and apply yapf to format all Python files

This recreates pull request #2192
This commit is contained in:
Erwin Coumans
2019-04-27 07:31:15 -07:00
parent c591735042
commit ef9570c315
347 changed files with 70304 additions and 22752 deletions

View File

@@ -4,7 +4,7 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
# Importing the libraries
import os
@@ -20,17 +20,17 @@ import argparse
# Setting the Hyper Parameters
class Hp():
def __init__(self):
self.nb_steps = 10000
self.episode_length = 1000
self.learning_rate = 0.02
self.nb_directions = 16
self.nb_best_directions = 16
assert self.nb_best_directions <= self.nb_directions
self.noise = 0.03
self.seed = 1
self.env_name = 'HalfCheetahBulletEnv-v0'
def __init__(self):
self.nb_steps = 10000
self.episode_length = 1000
self.learning_rate = 0.02
self.nb_directions = 16
self.nb_best_directions = 16
assert self.nb_best_directions <= self.nb_directions
self.noise = 0.03
self.seed = 1
self.env_name = 'HalfCheetahBulletEnv-v0'
# Multiprocess Exploring the policy on one specific direction and over one episode
@@ -39,111 +39,37 @@ _RESET = 1
_CLOSE = 2
_EXPLORE = 3
def ExploreWorker(rank,childPipe, envname, args):
env = gym.make(envname)
nb_inputs = env.observation_space.shape[0]
normalizer = Normalizer(nb_inputs)
observation_n = env.reset()
n=0
while True:
n+=1
try:
# Only block for short times to have keyboard exceptions be raised.
if not childPipe.poll(0.001):
continue
message, payload = childPipe.recv()
except (EOFError, KeyboardInterrupt):
break
if message == _RESET:
observation_n = env.reset()
childPipe.send(["reset ok"])
def ExploreWorker(rank, childPipe, envname, args):
env = gym.make(envname)
nb_inputs = env.observation_space.shape[0]
normalizer = Normalizer(nb_inputs)
observation_n = env.reset()
n = 0
while True:
n += 1
try:
# Only block for short times to have keyboard exceptions be raised.
if not childPipe.poll(0.001):
continue
if message == _EXPLORE:
#normalizer = payload[0] #use our local normalizer
policy = payload[1]
hp = payload[2]
direction = payload[3]
delta = payload[4]
state = env.reset()
done = False
num_plays = 0.
sum_rewards = 0
while not done and num_plays < hp.episode_length:
normalizer.observe(state)
state = normalizer.normalize(state)
action = policy.evaluate(state, delta, direction,hp)
state, reward, done, _ = env.step(action)
reward = max(min(reward, 1), -1)
sum_rewards += reward
num_plays += 1
childPipe.send([sum_rewards])
continue
if message == _CLOSE:
childPipe.send(["close ok"])
break
childPipe.close()
# Normalizing the states
class Normalizer():
def __init__(self, nb_inputs):
self.n = np.zeros(nb_inputs)
self.mean = np.zeros(nb_inputs)
self.mean_diff = np.zeros(nb_inputs)
self.var = np.zeros(nb_inputs)
def observe(self, x):
self.n += 1.
last_mean = self.mean.copy()
self.mean += (x - self.mean) / self.n
self.mean_diff += (x - last_mean) * (x - self.mean)
self.var = (self.mean_diff / self.n).clip(min = 1e-2)
def normalize(self, inputs):
obs_mean = self.mean
obs_std = np.sqrt(self.var)
return (inputs - obs_mean) / obs_std
# Building the AI
class Policy():
def __init__(self, input_size, output_size, env_name, args):
try:
self.theta = np.load(args.policy)
except:
self.theta = np.zeros((output_size, input_size))
self.env_name = env_name
print("Starting policy theta=",self.theta)
def evaluate(self, input, delta, direction, hp):
if direction is None:
return np.clip(self.theta.dot(input), -1.0, 1.0)
elif direction == "positive":
return np.clip((self.theta + hp.noise*delta).dot(input), -1.0, 1.0)
else:
return np.clip((self.theta - hp.noise*delta).dot(input), -1.0, 1.0)
def sample_deltas(self):
return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
def update(self, rollouts, sigma_r, args):
step = np.zeros(self.theta.shape)
for r_pos, r_neg, d in rollouts:
step += (r_pos - r_neg) * d
self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
timestr = time.strftime("%Y%m%d-%H%M%S")
np.save(args.logdir+"/policy_"+self.env_name+"_"+timestr+".npy", self.theta)
# Exploring the policy on one specific direction and over one episode
def explore(env, normalizer, policy, direction, delta, hp):
state = env.reset()
done = False
num_plays = 0.
sum_rewards = 0
while not done and num_plays < hp.episode_length:
message, payload = childPipe.recv()
except (EOFError, KeyboardInterrupt):
break
if message == _RESET:
observation_n = env.reset()
childPipe.send(["reset ok"])
continue
if message == _EXPLORE:
#normalizer = payload[0] #use our local normalizer
policy = payload[1]
hp = payload[2]
direction = payload[3]
delta = payload[4]
state = env.reset()
done = False
num_plays = 0.
sum_rewards = 0
while not done and num_plays < hp.episode_length:
normalizer.observe(state)
state = normalizer.normalize(state)
action = policy.evaluate(state, delta, direction, hp)
@@ -151,127 +77,217 @@ def explore(env, normalizer, policy, direction, delta, hp):
reward = max(min(reward, 1), -1)
sum_rewards += reward
num_plays += 1
return sum_rewards
childPipe.send([sum_rewards])
continue
if message == _CLOSE:
childPipe.send(["close ok"])
break
childPipe.close()
# Normalizing the states
class Normalizer():
def __init__(self, nb_inputs):
self.n = np.zeros(nb_inputs)
self.mean = np.zeros(nb_inputs)
self.mean_diff = np.zeros(nb_inputs)
self.var = np.zeros(nb_inputs)
def observe(self, x):
self.n += 1.
last_mean = self.mean.copy()
self.mean += (x - self.mean) / self.n
self.mean_diff += (x - last_mean) * (x - self.mean)
self.var = (self.mean_diff / self.n).clip(min=1e-2)
def normalize(self, inputs):
obs_mean = self.mean
obs_std = np.sqrt(self.var)
return (inputs - obs_mean) / obs_std
# Building the AI
class Policy():
def __init__(self, input_size, output_size, env_name, args):
try:
self.theta = np.load(args.policy)
except:
self.theta = np.zeros((output_size, input_size))
self.env_name = env_name
print("Starting policy theta=", self.theta)
def evaluate(self, input, delta, direction, hp):
if direction is None:
return np.clip(self.theta.dot(input), -1.0, 1.0)
elif direction == "positive":
return np.clip((self.theta + hp.noise * delta).dot(input), -1.0, 1.0)
else:
return np.clip((self.theta - hp.noise * delta).dot(input), -1.0, 1.0)
def sample_deltas(self):
return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
def update(self, rollouts, sigma_r, args):
step = np.zeros(self.theta.shape)
for r_pos, r_neg, d in rollouts:
step += (r_pos - r_neg) * d
self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
timestr = time.strftime("%Y%m%d-%H%M%S")
np.save(args.logdir + "/policy_" + self.env_name + "_" + timestr + ".npy", self.theta)
# Exploring the policy on one specific direction and over one episode
def explore(env, normalizer, policy, direction, delta, hp):
state = env.reset()
done = False
num_plays = 0.
sum_rewards = 0
while not done and num_plays < hp.episode_length:
normalizer.observe(state)
state = normalizer.normalize(state)
action = policy.evaluate(state, delta, direction, hp)
state, reward, done, _ = env.step(action)
reward = max(min(reward, 1), -1)
sum_rewards += reward
num_plays += 1
return sum_rewards
# Training the AI
def train(env, policy, normalizer, hp, parentPipes, args):
for step in range(hp.nb_steps):
# Initializing the perturbations deltas and the positive/negative rewards
deltas = policy.sample_deltas()
positive_rewards = [0] * hp.nb_directions
negative_rewards = [0] * hp.nb_directions
if parentPipes:
for k in range(hp.nb_directions):
parentPipe = parentPipes[k]
parentPipe.send([_EXPLORE,[normalizer, policy, hp, "positive", deltas[k]]])
for k in range(hp.nb_directions):
positive_rewards[k] = parentPipes[k].recv()[0]
for k in range(hp.nb_directions):
parentPipe = parentPipes[k]
parentPipe.send([_EXPLORE,[normalizer, policy, hp, "negative", deltas[k]]])
for k in range(hp.nb_directions):
negative_rewards[k] = parentPipes[k].recv()[0]
else:
# Getting the positive rewards in the positive directions
for k in range(hp.nb_directions):
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
# Getting the negative rewards in the negative/opposite directions
for k in range(hp.nb_directions):
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
all_rewards = np.array(positive_rewards + negative_rewards)
sigma_r = all_rewards.std()
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))}
order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions]
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
# Updating our policy
policy.update(rollouts, sigma_r, args)
# Printing the final reward of the policy after the update
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
print('Step:', step, 'Reward:', reward_evaluation)
for step in range(hp.nb_steps):
# Initializing the perturbations deltas and the positive/negative rewards
deltas = policy.sample_deltas()
positive_rewards = [0] * hp.nb_directions
negative_rewards = [0] * hp.nb_directions
if parentPipes:
for k in range(hp.nb_directions):
parentPipe = parentPipes[k]
parentPipe.send([_EXPLORE, [normalizer, policy, hp, "positive", deltas[k]]])
for k in range(hp.nb_directions):
positive_rewards[k] = parentPipes[k].recv()[0]
for k in range(hp.nb_directions):
parentPipe = parentPipes[k]
parentPipe.send([_EXPLORE, [normalizer, policy, hp, "negative", deltas[k]]])
for k in range(hp.nb_directions):
negative_rewards[k] = parentPipes[k].recv()[0]
else:
# Getting the positive rewards in the positive directions
for k in range(hp.nb_directions):
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
# Getting the negative rewards in the negative/opposite directions
for k in range(hp.nb_directions):
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
all_rewards = np.array(positive_rewards + negative_rewards)
sigma_r = all_rewards.std()
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
scores = {
k: max(r_pos, r_neg)
for k, (r_pos, r_neg) in enumerate(zip(positive_rewards, negative_rewards))
}
order = sorted(scores.keys(), key=lambda x: scores[x])[:hp.nb_best_directions]
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
# Updating our policy
policy.update(rollouts, sigma_r, args)
# Printing the final reward of the policy after the update
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
print('Step:', step, 'Reward:', reward_evaluation)
# Running the main code
def mkdir(base, name):
path = os.path.join(base, name)
if not os.path.exists(path):
os.makedirs(path)
return path
path = os.path.join(base, name)
if not os.path.exists(path):
os.makedirs(path)
return path
if __name__ == "__main__":
mp.freeze_support()
mp.freeze_support()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--env', help='Gym environment name', type=str, default='HalfCheetahBulletEnv-v0')
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
parser.add_argument('--movie',help='rgb_array gym movie',type=int, default=0)
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
parser.add_argument('--logdir', help='Directory root to log policy files (npy)', type=str, default='.')
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
args = parser.parse_args()
hp = Hp()
hp.env_name = args.env
hp.seed = args.seed
hp.nb_steps = args.steps
print("seed = ", hp.seed)
np.random.seed(hp.seed)
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--env',
help='Gym environment name',
type=str,
default='HalfCheetahBulletEnv-v0')
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
parser.add_argument('--movie', help='rgb_array gym movie', type=int, default=0)
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
parser.add_argument('--logdir',
help='Directory root to log policy files (npy)',
type=str,
default='.')
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
parentPipes = None
if args.mp:
num_processes = hp.nb_directions
processes = []
childPipes = []
parentPipes = []
for pr in range (num_processes):
parentPipe, childPipe = Pipe()
parentPipes.append(parentPipe)
childPipes.append(childPipe)
for rank in range(num_processes):
p = mp.Process(target=ExploreWorker, args=(rank,childPipes[rank], hp.env_name, args))
p.start()
processes.append(p)
work_dir = mkdir('exp', 'brs')
monitor_dir = mkdir(work_dir, 'monitor')
env = gym.make(hp.env_name)
if args.render:
env.render(mode = "human")
if args.movie:
env = wrappers.Monitor(env, monitor_dir, force = True)
nb_inputs = env.observation_space.shape[0]
nb_outputs = env.action_space.shape[0]
policy = Policy(nb_inputs, nb_outputs,hp.env_name, args)
normalizer = Normalizer(nb_inputs)
print("start training")
train(env, policy, normalizer, hp, parentPipes, args)
args = parser.parse_args()
if args.mp:
for parentPipe in parentPipes:
parentPipe.send([_CLOSE,"pay2"])
for p in processes:
p.join()
hp = Hp()
hp.env_name = args.env
hp.seed = args.seed
hp.nb_steps = args.steps
print("seed = ", hp.seed)
np.random.seed(hp.seed)
parentPipes = None
if args.mp:
num_processes = hp.nb_directions
processes = []
childPipes = []
parentPipes = []
for pr in range(num_processes):
parentPipe, childPipe = Pipe()
parentPipes.append(parentPipe)
childPipes.append(childPipe)
for rank in range(num_processes):
p = mp.Process(target=ExploreWorker, args=(rank, childPipes[rank], hp.env_name, args))
p.start()
processes.append(p)
work_dir = mkdir('exp', 'brs')
monitor_dir = mkdir(work_dir, 'monitor')
env = gym.make(hp.env_name)
if args.render:
env.render(mode="human")
if args.movie:
env = wrappers.Monitor(env, monitor_dir, force=True)
nb_inputs = env.observation_space.shape[0]
nb_outputs = env.action_space.shape[0]
policy = Policy(nb_inputs, nb_outputs, hp.env_name, args)
normalizer = Normalizer(nb_inputs)
print("start training")
train(env, policy, normalizer, hp, parentPipes, args)
if args.mp:
for parentPipe in parentPipes:
parentPipe.send([_CLOSE, "pay2"])
for p in processes:
p.join()

View File

@@ -1,25 +1,28 @@
import gym
from gym.envs.registration import registry, make, spec
def register(id,*args,**kvargs):
if id in registry.env_specs:
return
else:
return gym.envs.registration.register(id,*args,**kvargs)
def register(id, *args, **kvargs):
if id in registry.env_specs:
return
else:
return gym.envs.registration.register(id, *args, **kvargs)
# ------------bullet-------------
register(
id='HumanoidDeepMimicBulletEnv-v1',
entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
max_episode_steps=1000,
reward_threshold=20000.0,
id='HumanoidDeepMimicBulletEnv-v1',
entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
max_episode_steps=1000,
reward_threshold=20000.0,
)
register(
id='CartPoleBulletEnv-v1',
entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
max_episode_steps=200,
reward_threshold=190.0,
id='CartPoleBulletEnv-v1',
entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
max_episode_steps=200,
reward_threshold=190.0,
)
register(
@@ -36,7 +39,6 @@ register(
reward_threshold=5.0,
)
register(
id='MinitaurReactiveEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
@@ -44,7 +46,6 @@ register(
reward_threshold=5.0,
)
register(
id='MinitaurBallGymEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv',
@@ -52,7 +53,6 @@ register(
reward_threshold=5.0,
)
register(
id='MinitaurTrottingEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv',
@@ -81,8 +81,6 @@ register(
reward_threshold=5.0,
)
register(
id='RacecarBulletEnv-v0',
entry_point='pybullet_envs.bullet:RacecarGymEnv',
@@ -91,128 +89,113 @@ register(
)
register(
id='RacecarZedBulletEnv-v0',
entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
)
register(
id='KukaBulletEnv-v0',
entry_point='pybullet_envs.bullet:KukaGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
id='RacecarZedBulletEnv-v0',
entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
)
register(
id='KukaCamBulletEnv-v0',
entry_point='pybullet_envs.bullet:KukaCamGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
id='KukaBulletEnv-v0',
entry_point='pybullet_envs.bullet:KukaGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
)
register(
id='KukaDiverseObjectGrasping-v0',
entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
max_episode_steps=1000,
reward_threshold=5.0,
id='KukaCamBulletEnv-v0',
entry_point='pybullet_envs.bullet:KukaCamGymEnv',
max_episode_steps=1000,
reward_threshold=5.0,
)
register(
id='InvertedPendulumBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='InvertedDoublePendulumBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='InvertedPendulumSwingupBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
max_episode_steps=1000,
reward_threshold=800.0,
)
register(
id='ReacherBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
max_episode_steps=150,
reward_threshold=18.0,
)
register(
id='PusherBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
max_episode_steps=150,
reward_threshold=18.0,
id='KukaDiverseObjectGrasping-v0',
entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
max_episode_steps=1000,
reward_threshold=5.0,
)
register(
id='ThrowerBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
max_episode_steps=100,
reward_threshold=18.0,
id='InvertedPendulumBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='StrikerBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
max_episode_steps=100,
reward_threshold=18.0,
id='InvertedDoublePendulumBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='Walker2DBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0
)
register(
id='HalfCheetahBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
max_episode_steps=1000,
reward_threshold=3000.0
)
id='InvertedPendulumSwingupBulletEnv-v0',
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
max_episode_steps=1000,
reward_threshold=800.0,
)
register(
id='AntBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0
)
id='ReacherBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
max_episode_steps=150,
reward_threshold=18.0,
)
register(
id='HopperBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0
)
id='PusherBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
max_episode_steps=150,
reward_threshold=18.0,
)
register(
id='HumanoidBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
max_episode_steps=1000
)
id='ThrowerBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
max_episode_steps=100,
reward_threshold=18.0,
)
register(
id='HumanoidFlagrunBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
max_episode_steps=1000,
reward_threshold=2000.0
)
id='StrikerBulletEnv-v0',
entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
max_episode_steps=100,
reward_threshold=18.0,
)
register(
id='HumanoidFlagrunHarderBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
max_episode_steps=1000
)
register(id='Walker2DBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0)
register(id='HalfCheetahBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
max_episode_steps=1000,
reward_threshold=3000.0)
register(id='AntBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0)
register(id='HopperBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
max_episode_steps=1000,
reward_threshold=2500.0)
register(id='HumanoidBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
max_episode_steps=1000)
register(id='HumanoidFlagrunBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
max_episode_steps=1000,
reward_threshold=2000.0)
register(id='HumanoidFlagrunHarderBulletEnv-v0',
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
max_episode_steps=1000)
#register(
# id='AtlasBulletEnv-v0',
@@ -220,6 +203,7 @@ register(
# max_episode_steps=1000
# )
def getList():
btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet')>=0]
return btenvs
btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet') >= 0]
return btenvs

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Executable scripts for reinforcement learning."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example configurations using the PPO algorithm."""
from __future__ import absolute_import
@@ -29,6 +28,7 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
import pybullet_envs
import tensorflow as tf
def default():
"""Default configuration for PPO."""
# General
@@ -38,10 +38,7 @@ def default():
use_gpu = False
# Network
network = networks.feed_forward_gaussian
weight_summaries = dict(
all=r'.*',
policy=r'.*/policy/.*',
value=r'.*/value/.*')
weight_summaries = dict(all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.1
@@ -52,7 +49,7 @@ def default():
optimizer = tf.train.AdamOptimizer
update_epochs_policy = 64
update_epochs_value = 64
learning_rate = 1e-4
learning_rate = 1e-4
# Losses
discount = 0.995
kl_target = 1e-2
@@ -69,6 +66,7 @@ def pybullet_pendulum():
steps = 5e7 # 50M
return locals()
def pybullet_doublependulum():
locals().update(default())
env = 'InvertedDoublePendulumBulletEnv-v0'
@@ -76,6 +74,7 @@ def pybullet_doublependulum():
steps = 5e7 # 50M
return locals()
def pybullet_pendulumswingup():
locals().update(default())
env = 'InvertedPendulumSwingupBulletEnv-v0'
@@ -83,6 +82,7 @@ def pybullet_pendulumswingup():
steps = 5e7 # 50M
return locals()
def pybullet_cheetah():
"""Configuration for MuJoCo's half cheetah task."""
locals().update(default())
@@ -92,6 +92,7 @@ def pybullet_cheetah():
steps = 1e8 # 100M
return locals()
def pybullet_ant():
locals().update(default())
env = 'AntBulletEnv-v0'
@@ -99,6 +100,7 @@ def pybullet_ant():
steps = 5e7 # 50M
return locals()
def pybullet_kuka_grasping():
"""Configuration for Bullet Kuka grasping task."""
locals().update(default())
@@ -113,7 +115,7 @@ def pybullet_racecar():
"""Configuration for Bullet MIT Racecar task."""
locals().update(default())
# Environment
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
max_length = 10
steps = 1e7 # 10M
return locals()
@@ -132,29 +134,27 @@ def pybullet_minitaur():
"""Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
locals().update(default())
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
env = functools.partial(
minitaur_gym_env.MinitaurBulletEnv,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
pd_control_enabled=True,
env_randomizer=randomizer,
render=False)
env = functools.partial(minitaur_gym_env.MinitaurBulletEnv,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
pd_control_enabled=True,
env_randomizer=randomizer,
render=False)
max_length = 1000
steps = 3e7 # 30M
return locals()
def pybullet_duck_minitaur():
"""Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class."""
locals().update(default())
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
env = functools.partial(
minitaur_gym_env.MinitaurBulletDuckEnv,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
pd_control_enabled=True,
env_randomizer=randomizer,
render=False)
env = functools.partial(minitaur_gym_env.MinitaurBulletDuckEnv,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
pd_control_enabled=True,
env_randomizer=randomizer,
render=False)
max_length = 1000
steps = 3e7 # 30M
return locals()

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Network definitions for the PPO algorithm."""
from __future__ import absolute_import
@@ -24,13 +23,10 @@ import operator
import tensorflow as tf
NetworkOutput = collections.namedtuple(
'NetworkOutput', 'policy, mean, logstd, value, state')
NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')
def feed_forward_gaussian(
config, action_size, observations, unused_length, state=None):
def feed_forward_gaussian(config, action_size, observations, unused_length, state=None):
"""Independent feed forward networks for policy and value.
The policy network outputs the mean action and the log standard deviation
@@ -50,20 +46,22 @@ def feed_forward_gaussian(
factor=config.init_mean_factor)
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
flat_observations = tf.reshape(observations, [
tf.shape(observations)[0], tf.shape(observations)[1],
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
tf.shape(observations)[0],
tf.shape(observations)[1],
functools.reduce(operator.mul,
observations.shape.as_list()[2:], 1)
])
with tf.variable_scope('policy'):
x = flat_observations
for size in config.policy_layers:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
mean = tf.contrib.layers.fully_connected(
x, action_size, tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(
logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
mean = tf.contrib.layers.fully_connected(x,
action_size,
tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
with tf.variable_scope('value'):
x = flat_observations
for size in config.value_layers:
@@ -72,13 +70,11 @@ def feed_forward_gaussian(
mean = tf.check_numerics(mean, 'mean')
logstd = tf.check_numerics(logstd, 'logstd')
value = tf.check_numerics(value, 'value')
policy = tf.contrib.distributions.MultivariateNormalDiag(
mean, tf.exp(logstd))
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
return NetworkOutput(policy, mean, logstd, value, state)
def recurrent_gaussian(
config, action_size, observations, length, state=None):
def recurrent_gaussian(config, action_size, observations, length, state=None):
"""Independent recurrent policy and feed forward value networks.
The policy network outputs the mean action and the log standard deviation
@@ -100,21 +96,23 @@ def recurrent_gaussian(
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
cell = tf.contrib.rnn.GRUBlockCell(config.policy_layers[-1])
flat_observations = tf.reshape(observations, [
tf.shape(observations)[0], tf.shape(observations)[1],
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
tf.shape(observations)[0],
tf.shape(observations)[1],
functools.reduce(operator.mul,
observations.shape.as_list()[2:], 1)
])
with tf.variable_scope('policy'):
x = flat_observations
for size in config.policy_layers[:-1]:
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
x, state = tf.nn.dynamic_rnn(cell, x, length, state, tf.float32)
mean = tf.contrib.layers.fully_connected(
x, action_size, tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable(
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(
logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
mean = tf.contrib.layers.fully_connected(x,
action_size,
tf.tanh,
weights_initializer=mean_weights_initializer)
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
logstd = tf.tile(logstd[None, None],
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
with tf.variable_scope('value'):
x = flat_observations
for size in config.value_layers:
@@ -123,7 +121,6 @@ def recurrent_gaussian(
mean = tf.check_numerics(mean, 'mean')
logstd = tf.check_numerics(logstd, 'logstd')
value = tf.check_numerics(value, 'value')
policy = tf.contrib.distributions.MultivariateNormalDiag(
mean, tf.exp(logstd))
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
# assert state.shape.as_list()[0] is not None
return NetworkOutput(policy, mean, logstd, value, state)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Proximal Policy Optimization algorithm."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Proximal Policy Optimization algorithm.
Based on John Schulman's implementation in Python and Theano:
@@ -49,51 +48,51 @@ class PPOAlgorithm(object):
self._is_training = is_training
self._should_log = should_log
self._config = config
self._observ_filter = normalize.StreamingNormalize(
self._batch_env.observ[0], center=True, scale=True, clip=5,
name='normalize_observ')
self._reward_filter = normalize.StreamingNormalize(
self._batch_env.reward[0], center=False, scale=True, clip=10,
name='normalize_reward')
self._observ_filter = normalize.StreamingNormalize(self._batch_env.observ[0],
center=True,
scale=True,
clip=5,
name='normalize_observ')
self._reward_filter = normalize.StreamingNormalize(self._batch_env.reward[0],
center=False,
scale=True,
clip=10,
name='normalize_reward')
# Memory stores tuple of observ, action, mean, logstd, reward.
template = (
self._batch_env.observ[0], self._batch_env.action[0],
self._batch_env.action[0], self._batch_env.action[0],
self._batch_env.reward[0])
self._memory = memory.EpisodeMemory(
template, config.update_every, config.max_length, 'memory')
template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0],
self._batch_env.action[0], self._batch_env.reward[0])
self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory')
self._memory_index = tf.Variable(0, False)
use_gpu = self._config.use_gpu and utility.available_gpus()
with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
# Create network variables for later calls to reuse.
action_size = self._batch_env.action.shape[1].value
self._network = tf.make_template(
'network', functools.partial(config.network, config, action_size))
self._network = tf.make_template('network',
functools.partial(config.network, config, action_size))
output = self._network(
tf.zeros_like(self._batch_env.observ)[:, None],
tf.ones(len(self._batch_env)))
tf.zeros_like(self._batch_env.observ)[:, None], tf.ones(len(self._batch_env)))
with tf.variable_scope('ppo_temporary'):
self._episodes = memory.EpisodeMemory(
template, len(batch_env), config.max_length, 'episodes')
self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length,
'episodes')
if output.state is None:
self._last_state = None
else:
# Ensure the batch dimension is set.
tf.contrib.framework.nest.map_structure(
lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]),
output.state)
lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]), output.state)
# pylint: disable=undefined-variable
self._last_state = tf.contrib.framework.nest.map_structure(
lambda x: tf.Variable(lambda: tf.zeros_like(x), False),
output.state)
self._last_action = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_action')
self._last_mean = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_mean')
self._last_logstd = tf.Variable(
tf.zeros_like(self._batch_env.action), False, name='last_logstd')
self._penalty = tf.Variable(
self._config.kl_init_penalty, False, dtype=tf.float32)
lambda x: tf.Variable(lambda: tf.zeros_like(x), False), output.state)
self._last_action = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_action')
self._last_mean = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_mean')
self._last_logstd = tf.Variable(tf.zeros_like(self._batch_env.action),
False,
name='last_logstd')
self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32)
self._optimizer = self._config.optimizer(self._config.learning_rate)
def begin_episode(self, agent_indices):
@@ -109,8 +108,7 @@ class PPOAlgorithm(object):
if self._last_state is None:
reset_state = tf.no_op()
else:
reset_state = utility.reinit_nested_vars(
self._last_state, agent_indices)
reset_state = utility.reinit_nested_vars(self._last_state, agent_indices)
reset_buffer = self._episodes.clear(agent_indices)
with tf.control_dependencies([reset_state, reset_buffer]):
return tf.constant('')
@@ -130,36 +128,33 @@ class PPOAlgorithm(object):
if self._last_state is None:
state = None
else:
state = tf.contrib.framework.nest.map_structure(
lambda x: tf.gather(x, agent_indices), self._last_state)
state = tf.contrib.framework.nest.map_structure(lambda x: tf.gather(x, agent_indices),
self._last_state)
output = self._network(observ[:, None], tf.ones(observ.shape[0]), state)
action = tf.cond(
self._is_training, output.policy.sample, lambda: output.mean)
action = tf.cond(self._is_training, output.policy.sample, lambda: output.mean)
logprob = output.policy.log_prob(action)[:, 0]
# pylint: disable=g-long-lambda
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
tf.summary.histogram('mean', output.mean[:, 0]),
tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
tf.summary.histogram('action', action[:, 0]),
tf.summary.histogram('logprob', logprob)]), str)
summary = tf.cond(
self._should_log, lambda: tf.summary.merge([
tf.summary.histogram('mean', output.mean[:, 0]),
tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
tf.summary.histogram('action', action[:, 0]),
tf.summary.histogram('logprob', logprob)
]), str)
# Remember current policy to append to memory in the experience callback.
if self._last_state is None:
assign_state = tf.no_op()
else:
assign_state = utility.assign_nested_vars(
self._last_state, output.state, agent_indices)
assign_state = utility.assign_nested_vars(self._last_state, output.state, agent_indices)
with tf.control_dependencies([
assign_state,
tf.scatter_update(
self._last_action, agent_indices, action[:, 0]),
tf.scatter_update(
self._last_mean, agent_indices, output.mean[:, 0]),
tf.scatter_update(
self._last_logstd, agent_indices, output.logstd[:, 0])]):
tf.scatter_update(self._last_action, agent_indices, action[:, 0]),
tf.scatter_update(self._last_mean, agent_indices, output.mean[:, 0]),
tf.scatter_update(self._last_logstd, agent_indices, output.logstd[:, 0])
]):
return tf.check_numerics(action[:, 0], 'action'), tf.identity(summary)
def experience(
self, agent_indices, observ, action, reward, unused_done, unused_nextob):
def experience(self, agent_indices, observ, action, reward, unused_done, unused_nextob):
"""Process the transition tuple of the current step.
When training, add the current transition tuple to the memory and update
@@ -181,34 +176,36 @@ class PPOAlgorithm(object):
return tf.cond(
self._is_training,
# pylint: disable=g-long-lambda
lambda: self._define_experience(
agent_indices, observ, action, reward), str)
lambda: self._define_experience(agent_indices, observ, action, reward),
str)
def _define_experience(self, agent_indices, observ, action, reward):
"""Implement the branch of experience() entered during training."""
update_filters = tf.summary.merge([
self._observ_filter.update(observ),
self._reward_filter.update(reward)])
update_filters = tf.summary.merge(
[self._observ_filter.update(observ),
self._reward_filter.update(reward)])
with tf.control_dependencies([update_filters]):
if self._config.train_on_agent_action:
# NOTE: Doesn't seem to change much.
action = self._last_action
batch = (
observ, action, tf.gather(self._last_mean, agent_indices),
tf.gather(self._last_logstd, agent_indices), reward)
batch = (observ, action, tf.gather(self._last_mean,
agent_indices), tf.gather(self._last_logstd,
agent_indices), reward)
append = self._episodes.append(batch, agent_indices)
with tf.control_dependencies([append]):
norm_observ = self._observ_filter.transform(observ)
norm_reward = tf.reduce_mean(self._reward_filter.transform(reward))
# pylint: disable=g-long-lambda
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
update_filters,
self._observ_filter.summary(),
self._reward_filter.summary(),
tf.summary.scalar('memory_size', self._memory_index),
tf.summary.histogram('normalized_observ', norm_observ),
tf.summary.histogram('action', self._last_action),
tf.summary.scalar('normalized_reward', norm_reward)]), str)
summary = tf.cond(
self._should_log, lambda: tf.summary.merge([
update_filters,
self._observ_filter.summary(),
self._reward_filter.summary(),
tf.summary.scalar('memory_size', self._memory_index),
tf.summary.histogram('normalized_observ', norm_observ),
tf.summary.histogram('action', self._last_action),
tf.summary.scalar('normalized_reward', norm_reward)
]), str)
return summary
def end_episode(self, agent_indices):
@@ -226,20 +223,16 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('end_episode/'):
return tf.cond(
self._is_training,
lambda: self._define_end_episode(agent_indices), str)
return tf.cond(self._is_training, lambda: self._define_end_episode(agent_indices), str)
def _define_end_episode(self, agent_indices):
"""Implement the branch of end_episode() entered during training."""
episodes, length = self._episodes.data(agent_indices)
space_left = self._config.update_every - self._memory_index
use_episodes = tf.range(tf.minimum(
tf.shape(agent_indices)[0], space_left))
use_episodes = tf.range(tf.minimum(tf.shape(agent_indices)[0], space_left))
episodes = [tf.gather(elem, use_episodes) for elem in episodes]
append = self._memory.replace(
episodes, tf.gather(length, use_episodes),
use_episodes + self._memory_index)
append = self._memory.replace(episodes, tf.gather(length, use_episodes),
use_episodes + self._memory_index)
with tf.control_dependencies([append]):
inc_index = self._memory_index.assign_add(tf.shape(use_episodes)[0])
with tf.control_dependencies([inc_index]):
@@ -256,8 +249,7 @@ class PPOAlgorithm(object):
Summary tensor.
"""
with tf.name_scope('training'):
assert_full = tf.assert_equal(
self._memory_index, self._config.update_every)
assert_full = tf.assert_equal(self._memory_index, self._config.update_every)
with tf.control_dependencies([assert_full]):
data = self._memory.data()
(observ, action, old_mean, old_logstd, reward), length = data
@@ -265,22 +257,18 @@ class PPOAlgorithm(object):
length = tf.identity(length)
observ = self._observ_filter.transform(observ)
reward = self._reward_filter.transform(reward)
update_summary = self._perform_update_steps(
observ, action, old_mean, old_logstd, reward, length)
update_summary = self._perform_update_steps(observ, action, old_mean, old_logstd, reward,
length)
with tf.control_dependencies([update_summary]):
penalty_summary = self._adjust_penalty(
observ, old_mean, old_logstd, length)
penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length)
with tf.control_dependencies([penalty_summary]):
clear_memory = tf.group(
self._memory.clear(), self._memory_index.assign(0))
clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0))
with tf.control_dependencies([clear_memory]):
weight_summary = utility.variable_summaries(
tf.trainable_variables(), self._config.weight_summaries)
return tf.summary.merge([
update_summary, penalty_summary, weight_summary])
weight_summary = utility.variable_summaries(tf.trainable_variables(),
self._config.weight_summaries)
return tf.summary.merge([update_summary, penalty_summary, weight_summary])
def _perform_update_steps(
self, observ, action, old_mean, old_logstd, reward, length):
def _perform_update_steps(self, observ, action, old_mean, old_logstd, reward, length):
"""Perform multiple update steps of value function and policy.
The advantage is computed once at the beginning and shared across
@@ -298,37 +286,29 @@ class PPOAlgorithm(object):
Returns:
Summary tensor.
"""
return_ = utility.discounted_return(
reward, length, self._config.discount)
return_ = utility.discounted_return(reward, length, self._config.discount)
value = self._network(observ, length).value
if self._config.gae_lambda:
advantage = utility.lambda_return(
reward, value, length, self._config.discount,
self._config.gae_lambda)
advantage = utility.lambda_return(reward, value, length, self._config.discount,
self._config.gae_lambda)
else:
advantage = return_ - value
mean, variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True)
advantage = (advantage - mean) / (tf.sqrt(variance) + 1e-8)
advantage = tf.Print(
advantage, [tf.reduce_mean(return_), tf.reduce_mean(value)],
'return and value: ')
advantage = tf.Print(
advantage, [tf.reduce_mean(advantage)],
'normalized advantage: ')
advantage = tf.Print(advantage,
[tf.reduce_mean(return_), tf.reduce_mean(value)], 'return and value: ')
advantage = tf.Print(advantage, [tf.reduce_mean(advantage)], 'normalized advantage: ')
# pylint: disable=g-long-lambda
value_loss, policy_loss, summary = tf.scan(
lambda _1, _2: self._update_step(
observ, action, old_mean, old_logstd, reward, advantage, length),
tf.range(self._config.update_epochs),
[0., 0., ''], parallel_iterations=1)
print_losses = tf.group(
tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
value_loss, policy_loss, summary = tf.scan(lambda _1, _2: self._update_step(
observ, action, old_mean, old_logstd, reward, advantage, length),
tf.range(self._config.update_epochs), [0., 0., ''],
parallel_iterations=1)
print_losses = tf.group(tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
with tf.control_dependencies([value_loss, policy_loss, print_losses]):
return summary[self._config.update_epochs // 2]
def _update_step(
self, observ, action, old_mean, old_logstd, reward, advantage, length):
def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length):
"""Compute the current combined loss and perform a gradient update step.
Args:
@@ -345,27 +325,20 @@ class PPOAlgorithm(object):
"""
value_loss, value_summary = self._value_loss(observ, reward, length)
network = self._network(observ, length)
policy_loss, policy_summary = self._policy_loss(
network.mean, network.logstd, old_mean, old_logstd, action,
advantage, length)
value_gradients, value_variables = (
zip(*self._optimizer.compute_gradients(value_loss)))
policy_gradients, policy_variables = (
zip(*self._optimizer.compute_gradients(policy_loss)))
policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean,
old_logstd, action, advantage, length)
value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss)))
policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss)))
all_gradients = value_gradients + policy_gradients
all_variables = value_variables + policy_variables
optimize = self._optimizer.apply_gradients(
zip(all_gradients, all_variables))
optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables))
summary = tf.summary.merge([
value_summary, policy_summary,
tf.summary.scalar(
'value_gradient_norm', tf.global_norm(value_gradients)),
tf.summary.scalar(
'policy_gradient_norm', tf.global_norm(policy_gradients)),
utility.gradient_summaries(
zip(value_gradients, value_variables), dict(value=r'.*')),
utility.gradient_summaries(
zip(policy_gradients, policy_variables), dict(policy=r'.*'))])
tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)),
tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)),
utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')),
utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*'))
])
with tf.control_dependencies([optimize]):
return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
@@ -385,18 +358,17 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('value_loss'):
value = self._network(observ, length).value
return_ = utility.discounted_return(
reward, length, self._config.discount)
return_ = utility.discounted_return(reward, length, self._config.discount)
advantage = return_ - value
value_loss = 0.5 * self._mask(advantage ** 2, length)
value_loss = 0.5 * self._mask(advantage**2, length)
summary = tf.summary.merge([
tf.summary.histogram('value_loss', value_loss),
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))])
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))
])
value_loss = tf.reduce_mean(value_loss)
return tf.check_numerics(value_loss, 'value_loss'), summary
def _policy_loss(
self, mean, logstd, old_mean, old_logstd, action, advantage, length):
def _policy_loss(self, mean, logstd, old_mean, old_logstd, action, advantage, length):
"""Compute the policy loss composed of multiple components.
1. The policy gradient loss is importance sampled from the data-collecting
@@ -420,24 +392,20 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('policy_loss'):
entropy = utility.diag_normal_entropy(mean, logstd)
kl = tf.reduce_mean(self._mask(utility.diag_normal_kl(
old_mean, old_logstd, mean, logstd), length), 1)
kl = tf.reduce_mean(
self._mask(utility.diag_normal_kl(old_mean, old_logstd, mean, logstd), length), 1)
policy_gradient = tf.exp(
utility.diag_normal_logpdf(mean, logstd, action) -
utility.diag_normal_logpdf(old_mean, old_logstd, action))
surrogate_loss = -tf.reduce_mean(self._mask(
policy_gradient * tf.stop_gradient(advantage), length), 1)
surrogate_loss = -tf.reduce_mean(
self._mask(policy_gradient * tf.stop_gradient(advantage), length), 1)
kl_penalty = self._penalty * kl
cutoff_threshold = self._config.kl_target * self._config.kl_cutoff_factor
cutoff_count = tf.reduce_sum(
tf.cast(kl > cutoff_threshold, tf.int32))
with tf.control_dependencies([tf.cond(
cutoff_count > 0,
lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
kl_cutoff = (
self._config.kl_cutoff_coef *
tf.cast(kl > cutoff_threshold, tf.float32) *
(kl - cutoff_threshold) ** 2)
cutoff_count = tf.reduce_sum(tf.cast(kl > cutoff_threshold, tf.int32))
with tf.control_dependencies(
[tf.cond(cutoff_count > 0, lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
kl_cutoff = (self._config.kl_cutoff_coef * tf.cast(kl > cutoff_threshold, tf.float32) *
(kl - cutoff_threshold)**2)
policy_loss = surrogate_loss + kl_penalty + kl_cutoff
summary = tf.summary.merge([
tf.summary.histogram('entropy', entropy),
@@ -449,7 +417,8 @@ class PPOAlgorithm(object):
tf.summary.histogram('policy_loss', policy_loss),
tf.summary.scalar('avg_surr_loss', tf.reduce_mean(surrogate_loss)),
tf.summary.scalar('avg_kl_penalty', tf.reduce_mean(kl_penalty)),
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))])
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))
])
policy_loss = tf.reduce_mean(policy_loss, 0)
return tf.check_numerics(policy_loss, 'policy_loss'), summary
@@ -471,30 +440,30 @@ class PPOAlgorithm(object):
"""
with tf.name_scope('adjust_penalty'):
network = self._network(observ, length)
assert_change = tf.assert_equal(
tf.reduce_all(tf.equal(network.mean, old_mean)), False,
message='policy should change')
assert_change = tf.assert_equal(tf.reduce_all(tf.equal(network.mean, old_mean)),
False,
message='policy should change')
print_penalty = tf.Print(0, [self._penalty], 'current penalty: ')
with tf.control_dependencies([assert_change, print_penalty]):
kl_change = tf.reduce_mean(self._mask(utility.diag_normal_kl(
old_mean, old_logstd, network.mean, network.logstd), length))
kl_change = tf.reduce_mean(
self._mask(utility.diag_normal_kl(old_mean, old_logstd, network.mean, network.logstd),
length))
kl_change = tf.Print(kl_change, [kl_change], 'kl change: ')
maybe_increase = tf.cond(
kl_change > 1.3 * self._config.kl_target,
# pylint: disable=g-long-lambda
lambda: tf.Print(self._penalty.assign(
self._penalty * 1.5), [0], 'increase penalty '),
lambda: tf.Print(self._penalty.assign(self._penalty * 1.5), [0], 'increase penalty '),
float)
maybe_decrease = tf.cond(
kl_change < 0.7 * self._config.kl_target,
# pylint: disable=g-long-lambda
lambda: tf.Print(self._penalty.assign(
self._penalty / 1.5), [0], 'decrease penalty '),
lambda: tf.Print(self._penalty.assign(self._penalty / 1.5), [0], 'decrease penalty '),
float)
with tf.control_dependencies([maybe_increase, maybe_decrease]):
return tf.summary.merge([
tf.summary.scalar('kl_change', kl_change),
tf.summary.scalar('penalty', self._penalty)])
tf.summary.scalar('penalty', self._penalty)
])
def _mask(self, tensor, length):
"""Set padding elements of a batch of sequences to zero.

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Memory that stores episodes."""
from __future__ import absolute_import
@@ -43,10 +42,9 @@ class EpisodeMemory(object):
self._scope = var_scope
self._length = tf.Variable(tf.zeros(capacity, tf.int32), False)
self._buffers = [
tf.Variable(tf.zeros(
[capacity, max_length] + elem.shape.as_list(),
elem.dtype), False)
for elem in template]
tf.Variable(tf.zeros([capacity, max_length] + elem.shape.as_list(), elem.dtype), False)
for elem in template
]
def length(self, rows=None):
"""Tensor holding the current length of episodes.
@@ -72,13 +70,11 @@ class EpisodeMemory(object):
"""
rows = tf.range(self._capacity) if rows is None else rows
assert rows.shape.ndims == 1
assert_capacity = tf.assert_less(
rows, self._capacity,
message='capacity exceeded')
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
with tf.control_dependencies([assert_capacity]):
assert_max_length = tf.assert_less(
tf.gather(self._length, rows), self._max_length,
message='max length exceeded')
assert_max_length = tf.assert_less(tf.gather(self._length, rows),
self._max_length,
message='max length exceeded')
append_ops = []
with tf.control_dependencies([assert_max_length]):
for buffer_, elements in zip(self._buffers, transitions):
@@ -86,8 +82,7 @@ class EpisodeMemory(object):
indices = tf.stack([rows, timestep], 1)
append_ops.append(tf.scatter_nd_update(buffer_, indices, elements))
with tf.control_dependencies(append_ops):
episode_mask = tf.reduce_sum(tf.one_hot(
rows, self._capacity, dtype=tf.int32), 0)
episode_mask = tf.reduce_sum(tf.one_hot(rows, self._capacity, dtype=tf.int32), 0)
return self._length.assign_add(episode_mask)
def replace(self, episodes, length, rows=None):
@@ -103,11 +98,11 @@ class EpisodeMemory(object):
"""
rows = tf.range(self._capacity) if rows is None else rows
assert rows.shape.ndims == 1
assert_capacity = tf.assert_less(
rows, self._capacity, message='capacity exceeded')
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
with tf.control_dependencies([assert_capacity]):
assert_max_length = tf.assert_less_equal(
length, self._max_length, message='max length exceeded')
assert_max_length = tf.assert_less_equal(length,
self._max_length,
message='max length exceeded')
replace_ops = []
with tf.control_dependencies([assert_max_length]):
for buffer_, elements in zip(self._buffers, episodes):

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize tensors based on streaming estimates of mean and variance."""
from __future__ import absolute_import
@@ -24,8 +23,7 @@ import tensorflow as tf
class StreamingNormalize(object):
"""Normalize tensors based on streaming estimates of mean and variance."""
def __init__(
self, template, center=True, scale=True, clip=10, name='normalize'):
def __init__(self, template, center=True, scale=True, clip=10, name='normalize'):
"""Normalize tensors based on streaming estimates of mean and variance.
Centering the value, scaling it by the standard deviation, and clipping
@@ -69,8 +67,7 @@ class StreamingNormalize(object):
if self._scale:
# We cannot scale before seeing at least two samples.
value /= tf.cond(
self._count > 1, lambda: self._std() + 1e-8,
lambda: tf.ones_like(self._var_sum))[None]
self._count > 1, lambda: self._std() + 1e-8, lambda: tf.ones_like(self._var_sum))[None]
if self._clip:
value = tf.clip_by_value(value, -self._clip, self._clip)
# Remove batch dimension if necessary.
@@ -97,8 +94,7 @@ class StreamingNormalize(object):
mean_delta = tf.reduce_sum(value - self._mean[None, ...], 0)
new_mean = self._mean + mean_delta / step
new_mean = tf.cond(self._count > 1, lambda: new_mean, lambda: value[0])
var_delta = (
value - self._mean[None, ...]) * (value - new_mean[None, ...])
var_delta = (value - self._mean[None, ...]) * (value - new_mean[None, ...])
new_var_sum = self._var_sum + tf.reduce_sum(var_delta, 0)
with tf.control_dependencies([new_mean, new_var_sum]):
update = self._mean.assign(new_mean), self._var_sum.assign(new_var_sum)
@@ -116,10 +112,8 @@ class StreamingNormalize(object):
Operation.
"""
with tf.name_scope(self._name + '/reset'):
return tf.group(
self._count.assign(0),
self._mean.assign(tf.zeros_like(self._mean)),
self._var_sum.assign(tf.zeros_like(self._var_sum)))
return tf.group(self._count.assign(0), self._mean.assign(tf.zeros_like(self._mean)),
self._var_sum.assign(tf.zeros_like(self._var_sum)))
def summary(self):
"""Summary string of mean and standard deviation.
@@ -128,10 +122,8 @@ class StreamingNormalize(object):
Summary tensor.
"""
with tf.name_scope(self._name + '/summary'):
mean_summary = tf.cond(
self._count > 0, lambda: self._summary('mean', self._mean), str)
std_summary = tf.cond(
self._count > 1, lambda: self._summary('stddev', self._std()), str)
mean_summary = tf.cond(self._count > 0, lambda: self._summary('mean', self._mean), str)
std_summary = tf.cond(self._count > 1, lambda: self._summary('stddev', self._std()), str)
return tf.summary.merge([mean_summary, std_summary])
def _std(self):
@@ -143,10 +135,8 @@ class StreamingNormalize(object):
Returns:
Tensor of current variance.
"""
variance = tf.cond(
self._count > 1,
lambda: self._var_sum / tf.cast(self._count - 1, tf.float32),
lambda: tf.ones_like(self._var_sum) * float('nan'))
variance = tf.cond(self._count > 1, lambda: self._var_sum / tf.cast(
self._count - 1, tf.float32), lambda: tf.ones_like(self._var_sum) * float('nan'))
# The epsilon corrects for small negative variance values caused by
# the algorithm. It was empirically chosen to work with all environments
# tested.

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for the PPO algorithm."""
from __future__ import absolute_import
@@ -37,8 +36,7 @@ def reinit_nested_vars(variables, indices=None):
Operation.
"""
if isinstance(variables, (tuple, list)):
return tf.group(*[
reinit_nested_vars(variable, indices) for variable in variables])
return tf.group(*[reinit_nested_vars(variable, indices) for variable in variables])
if indices is None:
return variables.assign(tf.zeros_like(variables))
else:
@@ -58,9 +56,8 @@ def assign_nested_vars(variables, tensors, indices=None):
Operation.
"""
if isinstance(variables, (tuple, list)):
return tf.group(*[
assign_nested_vars(variable, tensor)
for variable, tensor in zip(variables, tensors)])
return tf.group(
*[assign_nested_vars(variable, tensor) for variable, tensor in zip(variables, tensors)])
if indices is None:
return variables.assign(tensors)
else:
@@ -71,10 +68,11 @@ def discounted_return(reward, length, discount):
"""Discounted Monte-Carlo returns."""
timestep = tf.range(reward.shape[1].value)
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
return_ = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
return_ = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(return_), 'return')
@@ -85,9 +83,8 @@ def fixed_step_return(reward, value, length, discount, window):
return_ = tf.zeros_like(reward)
for _ in range(window):
return_ += reward
reward = discount * tf.concat(
[reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
return_ += discount ** window * tf.concat(
reward = discount * tf.concat([reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
return_ += discount**window * tf.concat(
[value[:, window:], tf.zeros_like(value[:, -window:]), 1])
return tf.check_numerics(tf.stop_gradient(mask * return_), 'return')
@@ -99,10 +96,11 @@ def lambda_return(reward, value, length, discount, lambda_):
sequence = mask * reward + discount * value * (1 - lambda_)
discount = mask * discount * lambda_
sequence = tf.stack([sequence, discount], 2)
return_ = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur[0] + cur[1] * agg,
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]),
tf.zeros_like(value[:, -1]), 1, False), [1, 0]), [1])
return_ = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur[0] + cur[1] * agg,
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]), tf.zeros_like(value[:, -1]),
1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(return_), 'return')
@@ -112,27 +110,26 @@ def lambda_advantage(reward, value, length, discount):
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
delta = reward + discount * next_value - value
advantage = tf.reverse(tf.transpose(tf.scan(
lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]),
tf.zeros_like(delta[:, -1]), 1, False), [1, 0]), [1])
advantage = tf.reverse(
tf.transpose(
tf.scan(lambda agg, cur: cur + discount * agg,
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]), tf.zeros_like(delta[:, -1]),
1, False), [1, 0]), [1])
return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')
def diag_normal_kl(mean0, logstd0, mean1, logstd1):
"""Epirical KL divergence of two normals with diagonal covariance."""
logstd0_2, logstd1_2 = 2 * logstd0, 2 * logstd1
return 0.5 * (
tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) +
tf.reduce_sum((mean1 - mean0) ** 2 / tf.exp(logstd1_2), -1) +
tf.reduce_sum(logstd1_2, -1) - tf.reduce_sum(logstd0_2, -1) -
mean0.shape[-1].value)
return 0.5 * (tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) + tf.reduce_sum(
(mean1 - mean0)**2 / tf.exp(logstd1_2), -1) + tf.reduce_sum(logstd1_2, -1) -
tf.reduce_sum(logstd0_2, -1) - mean0.shape[-1].value)
def diag_normal_logpdf(mean, logstd, loc):
"""Log density of a normal with diagonal covariance."""
constant = -0.5 * math.log(2 * math.pi) - logstd
value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2
value = -0.5 * ((loc - mean) / tf.exp(logstd))**2
return tf.reduce_sum(constant + value, -1)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for reinforcement learning."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrap a dictionary to access keys as attributes."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Combine multiple environments to step them in batch."""
from __future__ import absolute_import
@@ -83,13 +82,9 @@ class BatchEnv(object):
message = 'Invalid action at index {}: {}'
raise ValueError(message.format(index, action))
if self._blocking:
transitions = [
env.step(action)
for env, action in zip(self._envs, actions)]
transitions = [env.step(action) for env, action in zip(self._envs, actions)]
else:
transitions = [
env.step(action, blocking=False)
for env, action in zip(self._envs, actions)]
transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
transitions = [transition() for transition in transitions]
observs, rewards, dones, infos = zip(*transitions)
observ = np.stack(observs)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Count learnable parameters."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Batch of environments inside the TensorFlow graph."""
from __future__ import absolute_import
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
action_shape = self._parse_shape(self._batch_env.action_space)
action_dtype = self._parse_dtype(self._batch_env.action_space)
with tf.variable_scope('env_temporary'):
self._observ = tf.Variable(
tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
name='observ', trainable=False)
self._action = tf.Variable(
tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
name='action', trainable=False)
self._reward = tf.Variable(
tf.zeros((len(self._batch_env),), tf.float32),
name='reward', trainable=False)
self._done = tf.Variable(
tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
name='done', trainable=False)
self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
name='observ',
trainable=False)
self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
name='action',
trainable=False)
self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
name='reward',
trainable=False)
self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
name='done',
trainable=False)
def __getattr__(self, name):
"""Forward unimplemented attributes to one of the original environments.
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
if action.dtype in (tf.float16, tf.float32, tf.float64):
action = tf.check_numerics(action, 'action')
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
observ, reward, done = tf.py_func(
lambda a: self._batch_env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool], name='step')
observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool],
name='step')
observ = tf.check_numerics(observ, 'observ')
reward = tf.check_numerics(reward, 'reward')
return tf.group(
self._observ.assign(observ),
self._action.assign(action),
self._reward.assign(reward),
self._done.assign(done))
return tf.group(self._observ.assign(observ), self._action.assign(action),
self._reward.assign(reward), self._done.assign(done))
def reset(self, indices=None):
"""Reset the batch of environments.
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
if indices is None:
indices = tf.range(len(self._batch_env))
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
observ = tf.py_func(
self._batch_env.reset, [indices], observ_dtype, name='reset')
observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
observ = tf.check_numerics(observ, 'observ')
reward = tf.zeros_like(indices, tf.float32)
done = tf.zeros_like(indices, tf.bool)
with tf.control_dependencies([
tf.scatter_update(self._observ, indices, observ),
tf.scatter_update(self._reward, indices, reward),
tf.scatter_update(self._done, indices, done)]):
tf.scatter_update(self._done, indices, done)
]):
return tf.identity(observ)
@property

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Put an OpenAI Gym environment into the TensorFlow graph."""
from __future__ import absolute_import
@@ -42,16 +41,15 @@ class InGraphEnv(object):
action_shape = self._parse_shape(self._env.action_space)
action_dtype = self._parse_dtype(self._env.action_space)
with tf.name_scope('environment'):
self._observ = tf.Variable(
tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
self._action = tf.Variable(
tf.zeros(action_shape, action_dtype), name='action', trainable=False)
self._reward = tf.Variable(
0.0, dtype=tf.float32, name='reward', trainable=False)
self._done = tf.Variable(
True, dtype=tf.bool, name='done', trainable=False)
self._step = tf.Variable(
0, dtype=tf.int32, name='step', trainable=False)
self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
name='observ',
trainable=False)
self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
name='action',
trainable=False)
self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)
def __getattr__(self, name):
"""Forward unimplemented attributes to the original environment.
@@ -79,17 +77,14 @@ class InGraphEnv(object):
if action.dtype in (tf.float16, tf.float32, tf.float64):
action = tf.check_numerics(action, 'action')
observ_dtype = self._parse_dtype(self._env.observation_space)
observ, reward, done = tf.py_func(
lambda a: self._env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool], name='step')
observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
[observ_dtype, tf.float32, tf.bool],
name='step')
observ = tf.check_numerics(observ, 'observ')
reward = tf.check_numerics(reward, 'reward')
return tf.group(
self._observ.assign(observ),
self._action.assign(action),
self._reward.assign(reward),
self._done.assign(done),
self._step.assign_add(1))
return tf.group(self._observ.assign(observ), self._action.assign(action),
self._reward.assign(reward), self._done.assign(done),
self._step.assign_add(1))
def reset(self):
"""Reset the environment.
@@ -100,10 +95,10 @@ class InGraphEnv(object):
observ_dtype = self._parse_dtype(self._env.observation_space)
observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
observ = tf.check_numerics(observ, 'observ')
with tf.control_dependencies([
self._observ.assign(observ),
self._reward.assign(0),
self._done.assign(False)]):
with tf.control_dependencies(
[self._observ.assign(observ),
self._reward.assign(0),
self._done.assign(False)]):
return tf.identity(observ)
@property

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Execute operations in a loop and coordinate logging and checkpoints."""
from __future__ import absolute_import
@@ -25,10 +24,8 @@ import tensorflow as tf
from . import streaming_mean
_Phase = collections.namedtuple(
'Phase',
'name, writer, op, batch, steps, feed, report_every, log_every,'
'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
'checkpoint_every')
@@ -56,16 +53,22 @@ class Loop(object):
reset: Tensor indicating to the model to start a new computation.
"""
self._logdir = logdir
self._step = (
tf.Variable(0, False, name='global_step') if step is None else step)
self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
self._log = tf.placeholder(tf.bool) if log is None else log
self._report = tf.placeholder(tf.bool) if report is None else report
self._reset = tf.placeholder(tf.bool) if reset is None else reset
self._phases = []
def add_phase(
self, name, done, score, summary, steps,
report_every=None, log_every=None, checkpoint_every=None, feed=None):
def add_phase(self,
name,
done,
score,
summary,
steps,
report_every=None,
log_every=None,
checkpoint_every=None,
feed=None):
"""Add a phase to the loop protocol.
If the model breaks long computation into multiple steps, the done tensor
@@ -97,13 +100,12 @@ class Loop(object):
if done.shape.ndims is None or score.shape.ndims is None:
raise ValueError("Rank of 'done' and 'score' tensors must be known.")
writer = self._logdir and tf.summary.FileWriter(
os.path.join(self._logdir, name), tf.get_default_graph(),
flush_secs=60)
os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
op = self._define_step(done, score, summary)
batch = 1 if score.shape.ndims == 0 else score.shape[0].value
self._phases.append(_Phase(
name, writer, op, batch, int(steps), feed, report_every,
log_every, checkpoint_every))
self._phases.append(
_Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
checkpoint_every))
def run(self, sess, saver, max_step=None):
"""Run the loop schedule for a specified number of steps.
@@ -133,13 +135,11 @@ class Loop(object):
tf.logging.info(message.format(phase.name, phase_step, global_step))
# Populate book keeping tensors.
phase.feed[self._reset] = (steps_in < steps_made)
phase.feed[self._log] = (
phase.writer and
self._is_every_steps(phase_step, phase.batch, phase.log_every))
phase.feed[self._report] = (
self._is_every_steps(phase_step, phase.batch, phase.report_every))
summary, mean_score, global_step, steps_made = sess.run(
phase.op, phase.feed)
phase.feed[self._log] = (phase.writer and
self._is_every_steps(phase_step, phase.batch, phase.log_every))
phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
phase.report_every))
summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
self._store_checkpoint(sess, saver, global_step)
if self._is_every_steps(phase_step, phase.batch, phase.report_every):
@@ -207,8 +207,7 @@ class Loop(object):
score_mean = streaming_mean.StreamingMean((), tf.float32)
with tf.control_dependencies([done, score, summary]):
done_score = tf.gather(score, tf.where(done)[:, 0])
submit_score = tf.cond(
tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
with tf.control_dependencies([submit_score]):
mean_score = tf.cond(self._report, score_mean.clear, float)
steps_made = tf.shape(score)[0]

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock algorithm for testing reinforcement learning code."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock environment for testing reinforcement learning code."""
from __future__ import absolute_import

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""In-graph simulation step of a vectorized algorithm with environments."""
from __future__ import absolute_import
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
reset_ops = [
batch_env.reset(agent_indices),
tf.scatter_update(score, agent_indices, zero_scores),
tf.scatter_update(length, agent_indices, zero_durations)]
tf.scatter_update(length, agent_indices, zero_durations)
]
with tf.control_dependencies(reset_ops):
return algo.begin_episode(agent_indices)
@@ -78,9 +78,8 @@ def simulate(batch_env, algo, log=True, reset=False):
inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
with tf.control_dependencies([add_score, inc_length]):
agent_indices = tf.range(len(batch_env))
experience_summary = algo.experience(
agent_indices, prevob, batch_env.action, batch_env.reward,
batch_env.done, batch_env.observ)
experience_summary = algo.experience(agent_indices, prevob, batch_env.action,
batch_env.reward, batch_env.done, batch_env.observ)
return tf.summary.merge([step_summary, experience_summary])
def _define_end_episode(agent_indices):
@@ -96,8 +95,7 @@ def simulate(batch_env, algo, log=True, reset=False):
"""
assert agent_indices.shape.ndims == 1
submit_score = mean_score.submit(tf.gather(score, agent_indices))
submit_length = mean_length.submit(
tf.cast(tf.gather(length, agent_indices), tf.float32))
submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
with tf.control_dependencies([submit_score, submit_length]):
return algo.end_episode(agent_indices)
@@ -107,41 +105,34 @@ def simulate(batch_env, algo, log=True, reset=False):
Returns:
Summary string.
"""
score_summary = tf.cond(
tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
length_summary = tf.cond(
tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
score_summary = tf.cond(tf.logical_and(log, tf.cast(
mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
str)
length_summary = tf.cond(tf.logical_and(
log, tf.cast(mean_length.count,
tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
return tf.summary.merge([score_summary, length_summary])
with tf.name_scope('simulate'):
log = tf.convert_to_tensor(log)
reset = tf.convert_to_tensor(reset)
with tf.variable_scope('simulate_temporary'):
score = tf.Variable(
tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
length = tf.Variable(
tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
mean_score = streaming_mean.StreamingMean((), tf.float32)
mean_length = streaming_mean.StreamingMean((), tf.float32)
agent_indices = tf.cond(
reset,
lambda: tf.range(len(batch_env)),
lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
begin_episode = tf.cond(
tf.cast(tf.shape(agent_indices)[0], tf.bool),
lambda: _define_begin_episode(agent_indices), str)
agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
tf.where(batch_env.done)[:, 0], tf.int32))
begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
tf.bool), lambda: _define_begin_episode(agent_indices), str)
with tf.control_dependencies([begin_episode]):
step = _define_step()
with tf.control_dependencies([step]):
agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
end_episode = tf.cond(
tf.cast(tf.shape(agent_indices)[0], tf.bool),
lambda: _define_end_episode(agent_indices), str)
end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
tf.bool), lambda: _define_end_episode(agent_indices), str)
with tf.control_dependencies([end_episode]):
summary = tf.summary.merge([
_define_summaries(), begin_episode, step, end_episode])
summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
with tf.control_dependencies([summary]):
done, score = tf.identity(batch_env.done), tf.identity(score)
return done, score, summary

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute a streaming estimation of the mean of submitted tensors."""
from __future__ import absolute_import
@@ -53,9 +52,8 @@ class StreamingMean(object):
# Add a batch dimension if necessary.
if value.shape.ndims == self._sum.shape.ndims:
value = value[None, ...]
return tf.group(
self._sum.assign_add(tf.reduce_sum(value, 0)),
self._count.assign_add(tf.shape(value)[0]))
return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
self._count.assign_add(tf.shape(value)[0]))
def clear(self):
"""Return the mean estimate and reset the streaming statistics."""

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrappers for OpenAI Gym environments."""
from __future__ import absolute_import
@@ -149,8 +148,7 @@ class FrameHistory(object):
return self._select_frames()
def _select_frames(self):
indices = [
(self._step - index) % self._capacity for index in self._past_indices]
indices = [(self._step - index) % self._capacity for index in self._past_indices]
observ = self._buffer[indices]
if self._flatten:
observ = np.reshape(observ, (-1,) + observ.shape[2:])
@@ -191,14 +189,14 @@ class RangeNormalize(object):
def __init__(self, env, observ=None, action=None):
self._env = env
self._should_normalize_observ = (
observ is not False and self._is_finite(self._env.observation_space))
self._should_normalize_observ = (observ is not False and
self._is_finite(self._env.observation_space))
if observ is True and not self._should_normalize_observ:
raise ValueError('Cannot normalize infinite observation range.')
if observ is None and not self._should_normalize_observ:
tf.logging.info('Not normalizing infinite observation range.')
self._should_normalize_action = (
action is not False and self._is_finite(self._env.action_space))
self._should_normalize_action = (action is not False and
self._is_finite(self._env.action_space))
if action is True and not self._should_normalize_action:
raise ValueError('Cannot normalize infinite action range.')
if action is None and not self._should_normalize_action:
@@ -323,8 +321,7 @@ class ExternalProcess(object):
action_space: The cached action space of the environment.
"""
self._conn, conn = multiprocessing.Pipe()
self._process = multiprocessing.Process(
target=self._worker, args=(constructor, conn))
self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
atexit.register(self.close)
self._process.start()
self._observ_space = None

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Script to train a batch reinforcement learning algorithm.
Command line:
@@ -67,21 +66,25 @@ def _define_loop(graph, logdir, train_steps, eval_steps):
Returns:
Loop object.
"""
loop = tools.Loop(
logdir, graph.step, graph.should_log, graph.do_report,
graph.force_reset)
loop.add_phase(
'train', graph.done, graph.score, graph.summary, train_steps,
report_every=train_steps,
log_every=train_steps // 2,
checkpoint_every=None,
feed={graph.is_training: True})
loop.add_phase(
'eval', graph.done, graph.score, graph.summary, eval_steps,
report_every=eval_steps,
log_every=eval_steps // 2,
checkpoint_every=10 * eval_steps,
feed={graph.is_training: False})
loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase('train',
graph.done,
graph.score,
graph.summary,
train_steps,
report_every=train_steps,
log_every=train_steps // 2,
checkpoint_every=None,
feed={graph.is_training: True})
loop.add_phase('eval',
graph.done,
graph.score,
graph.summary,
eval_steps,
report_every=eval_steps,
log_every=eval_steps // 2,
checkpoint_every=10 * eval_steps,
feed={graph.is_training: False})
return loop
@@ -102,18 +105,13 @@ def train(config, env_processes):
if config.update_every % config.num_agents:
tf.logging.warn('Number of agents should divide episodes per update.')
with tf.device('/cpu:0'):
batch_env = utility.define_batch_env(
lambda: _create_environment(config),
config.num_agents, env_processes)
graph = utility.define_simulation_graph(
batch_env, config.algorithm, config)
loop = _define_loop(
graph, config.logdir,
config.update_every * config.max_length,
config.eval_episodes * config.max_length)
total_steps = int(
config.steps / config.update_every *
(config.update_every + config.eval_episodes))
batch_env = utility.define_batch_env(lambda: _create_environment(config), config.num_agents,
env_processes)
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
loop = _define_loop(graph, config.logdir, config.update_every * config.max_length,
config.eval_episodes * config.max_length)
total_steps = int(config.steps / config.update_every *
(config.update_every + config.eval_episodes))
# Exclude episode related variables since the Python state of environments is
# not checkpointed and thus new episodes start after resuming.
saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
@@ -131,8 +129,8 @@ def main(_):
utility.set_up_logging()
if not FLAGS.config:
raise KeyError('You must specify a configuration.')
logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
logdir = FLAGS.logdir and os.path.expanduser(
os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
try:
config = utility.load_config(logdir)
except IOError:
@@ -144,16 +142,11 @@ def main(_):
if __name__ == '__main__':
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'logdir', None,
'Base directory to store logs.')
tf.app.flags.DEFINE_string(
'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
'Sub directory to store logs.')
tf.app.flags.DEFINE_string(
'config', None,
'Configuration to execute.')
tf.app.flags.DEFINE_boolean(
'env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.flags.DEFINE_string('logdir', None, 'Base directory to store logs.')
tf.app.flags.DEFINE_string('timestamp',
datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
'Sub directory to store logs.')
tf.app.flags.DEFINE_string('config', None, 'Configuration to execute.')
tf.app.flags.DEFINE_boolean('env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.run()

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for using reinforcement learning algorithms."""
from __future__ import absolute_import
@@ -46,8 +45,7 @@ def define_simulation_graph(batch_env, algo_cls, config):
do_report = tf.placeholder(tf.bool, name='do_report')
force_reset = tf.placeholder(tf.bool, name='force_reset')
algo = algo_cls(batch_env, step, is_training, should_log, config)
done, score, summary = tools.simulate(
batch_env, algo, should_log, force_reset)
done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset)
message = 'Graph contains {} trainable variables.'
tf.logging.info(message.format(tools.count_weights()))
# pylint: enable=unused-variable
@@ -67,9 +65,7 @@ def define_batch_env(constructor, num_agents, env_processes):
"""
with tf.variable_scope('environments'):
if env_processes:
envs = [
tools.wrappers.ExternalProcess(constructor)
for _ in range(num_agents)]
envs = [tools.wrappers.ExternalProcess(constructor) for _ in range(num_agents)]
else:
envs = [constructor() for _ in range(num_agents)]
batch_env = tools.BatchEnv(envs, blocking=not env_processes)
@@ -111,9 +107,7 @@ def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
ValueError: If resume expected but no log directory specified.
RuntimeError: If no resume expected but a checkpoint was found.
"""
sess.run(tf.group(
tf.local_variables_initializer(),
tf.global_variables_initializer()))
sess.run(tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()))
if resume and not (logdir or checkpoint):
raise ValueError('Need to specify logdir to resume a checkpoint.')
if logdir:
@@ -152,9 +146,8 @@ def save_config(config, logdir=None):
with tf.gfile.GFile(config_path, 'w') as file_:
yaml.dump(config, file_, default_flow_style=False)
else:
message = (
'Start a new run without storing summaries and checkpoints since no '
'logging directory was specified.')
message = ('Start a new run without storing summaries and checkpoints since no '
'logging directory was specified.')
tf.logging.info(message)
return config
@@ -173,9 +166,8 @@ def load_config(logdir):
"""
config_path = logdir and os.path.join(logdir, 'config.yaml')
if not config_path or not tf.gfile.Exists(config_path):
message = (
'Cannot resume an existing run since the logging directory does not '
'contain a configuration file.')
message = ('Cannot resume an existing run since the logging directory does not '
'contain a configuration file.')
raise IOError(message)
with tf.gfile.FastGFile(config_path, 'r') as file_:
config = yaml.load(file_, Loader=yaml.Loader)

View File

@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Script to render videos of the Proximal Policy Gradient algorithm.
Command line:
@@ -53,8 +52,7 @@ def _create_environment(config, outdir):
setattr(env, 'spec', getattr(env, 'spec', None))
if config.max_length:
env = tools.wrappers.LimitDuration(env, config.max_length)
env = gym.wrappers.Monitor(
env, outdir, lambda unused_episode_number: True)
env = gym.wrappers.Monitor(env, outdir, lambda unused_episode_number: True)
env = tools.wrappers.RangeNormalize(env)
env = tools.wrappers.ClipAction(env)
env = tools.wrappers.ConvertTo32Bit(env)
@@ -71,20 +69,20 @@ def _define_loop(graph, eval_steps):
Returns:
Loop object.
"""
loop = tools.Loop(
None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase(
'eval', graph.done, graph.score, graph.summary, eval_steps,
report_every=eval_steps,
log_every=None,
checkpoint_every=None,
feed={graph.is_training: False})
loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
loop.add_phase('eval',
graph.done,
graph.score,
graph.summary,
eval_steps,
report_every=eval_steps,
log_every=None,
checkpoint_every=None,
feed={graph.is_training: False})
return loop
def visualize(
logdir, outdir, num_agents, num_episodes, checkpoint=None,
env_processes=True):
def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
"""Recover checkpoint and render videos from it.
Args:
@@ -97,20 +95,16 @@ def visualize(
"""
config = utility.load_config(logdir)
with tf.device('/cpu:0'):
batch_env = utility.define_batch_env(
lambda: _create_environment(config, outdir),
num_agents, env_processes)
graph = utility.define_simulation_graph(
batch_env, config.algorithm, config)
batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
env_processes)
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
total_steps = num_episodes * config.max_length
loop = _define_loop(graph, total_steps)
saver = utility.define_saver(
exclude=(r'.*_temporary/.*', r'global_step'))
saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
utility.initialize_variables(
sess, saver, config.logdir, checkpoint, resume=True)
utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
for unused_score in loop.run(sess, saver, total_steps):
pass
batch_env.close()
@@ -123,29 +117,18 @@ def main(_):
raise KeyError('You must specify logging and outdirs directories.')
FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
visualize(
FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
FLAGS.checkpoint, FLAGS.env_processes)
visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes, FLAGS.checkpoint,
FLAGS.env_processes)
if __name__ == '__main__':
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'logdir', None,
'Directory to the checkpoint of a training run.')
tf.app.flags.DEFINE_string(
'outdir', None,
'Local directory for storing the monitoring outdir.')
tf.app.flags.DEFINE_string(
'checkpoint', None,
'Checkpoint name to load; defaults to most recent.')
tf.app.flags.DEFINE_integer(
'num_agents', 1,
'How many environments to step in parallel.')
tf.app.flags.DEFINE_integer(
'num_episodes', 5,
'Minimum number of episodes to render.')
tf.app.flags.DEFINE_boolean(
'env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.flags.DEFINE_string('logdir', None, 'Directory to the checkpoint of a training run.')
tf.app.flags.DEFINE_string('outdir', None, 'Local directory for storing the monitoring outdir.')
tf.app.flags.DEFINE_string('checkpoint', None,
'Checkpoint name to load; defaults to most recent.')
tf.app.flags.DEFINE_integer('num_agents', 1, 'How many environments to step in parallel.')
tf.app.flags.DEFINE_integer('num_episodes', 5, 'Minimum number of episodes to render.')
tf.app.flags.DEFINE_boolean('env_processes', True,
'Step environments in separate processes to circumvent the GIL.')
tf.app.run()

View File

@@ -7,4 +7,3 @@ from . import train_kuka_grasping
from . import train_pybullet_cartpole
from . import train_pybullet_racecar
from . import train_pybullet_zed_racecar

View File

@@ -6,17 +6,17 @@ import numpy as np
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
from gym import spaces
class ContinuousDownwardBiasPolicy(object):
"""Policy which takes continuous actions, and is biased to move down.
"""
def __init__(self, height_hack_prob=0.9):
"""Initializes the DownwardBiasPolicy.
@@ -36,25 +36,25 @@ class ContinuousDownwardBiasPolicy(object):
def main():
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
policy = ContinuousDownwardBiasPolicy()
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render(mode='human')
act = policy.sample_action(obs, .1)
print("Action")
print(act)
obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
episode_rew += rew
print("Episode reward", episode_rew)
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
policy = ContinuousDownwardBiasPolicy()
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render(mode='human')
act = policy.sample_action(obs, .1)
print("Action")
print(act)
obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
episode_rew += rew
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq
def main():
env = KukaGymEnv(renders=True, isDiscrete=True)
act = deepq.load("kuka_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
env = KukaGymEnv(renders=True, isDiscrete=True)
act = deepq.load("kuka_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
import time
@@ -10,28 +10,29 @@ import time
from baselines import deepq
from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
def main():
env = gym.make('CartPoleBulletEnv-v1')
act = deepq.load("cartpole_model.pkl")
while True:
obs, done = env.reset(), False
print("obs")
print(obs)
print("type(obs)")
print(type(obs))
episode_rew = 0
while not done:
env.render()
o = obs[None]
aa = act(o)
a = aa[0]
obs, rew, done, _ = env.step(a)
episode_rew += rew
time.sleep(1./240.)
print("Episode reward", episode_rew)
def main():
env = gym.make('CartPoleBulletEnv-v1')
act = deepq.load("cartpole_model.pkl")
while True:
obs, done = env.reset(), False
print("obs")
print(obs)
print("type(obs)")
print(type(obs))
episode_rew = 0
while not done:
env.render()
o = obs[None]
aa = act(o)
a = aa[0]
obs, rew, done, _ = env.step(a)
episode_rew += rew
time.sleep(1. / 240.)
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq
def main():
env = RacecarGymEnv(renders=True,isDiscrete=True)
act = deepq.load("racecar_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
env = RacecarGymEnv(renders=True, isDiscrete=True)
act = deepq.load("racecar_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
@@ -11,22 +11,22 @@ from baselines import deepq
def main():
env = RacecarZEDGymEnv(renders=True)
act = deepq.load("racecar_zed_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
env = RacecarZEDGymEnv(renders=True)
act = deepq.load("racecar_zed_model.pkl")
print(act)
while True:
obs, done = env.reset(), False
print("===================================")
print("obs")
print(obs)
episode_rew = 0
while not done:
env.render()
obs, rew, done, _ = env.step(act(obs[None])[0])
episode_rew += rew
print("Episode reward", episode_rew)
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv
@@ -12,39 +12,34 @@ from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
#print("totalt")
#print(totalt)
is_solved = totalt > 2000 and total >= 10
return is_solved
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
#print("totalt")
#print(totalt)
is_solved = totalt > 2000 and total >= 10
return is_solved
def main():
env = KukaCamGymEnv(renders=False, isDiscrete=True)
model = deepq.models.cnn_to_mlp(
convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
hiddens=[256],
dueling=False
)
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to kuka_cam_model.pkl")
act.save("kuka_cam_model.pkl")
env = KukaCamGymEnv(renders=False, isDiscrete=True)
model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
hiddens=[256],
dueling=False)
act = deepq.learn(env,
q_func=model,
lr=1e-3,
max_timesteps=10000000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback)
print("Saving model to kuka_cam_model.pkl")
act.save("kuka_cam_model.pkl")
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
@@ -12,35 +12,32 @@ from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
#print("totalt")
#print(totalt)
is_solved = totalt > 2000 and total >= 10
return is_solved
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
#print("totalt")
#print(totalt)
is_solved = totalt > 2000 and total >= 10
return is_solved
def main():
env = KukaGymEnv(renders=False, isDiscrete=True)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to kuka_model.pkl")
act.save("kuka_model.pkl")
env = KukaGymEnv(renders=False, isDiscrete=True)
model = deepq.models.mlp([64])
act = deepq.learn(env,
q_func=model,
lr=1e-3,
max_timesteps=10000000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback)
print("Saving model to kuka_model.pkl")
act.save("kuka_model.pkl")
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
@@ -11,29 +11,27 @@ from baselines import deepq
def callback(lcl, glb):
# stop training if reward exceeds 199
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
return is_solved
# stop training if reward exceeds 199
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
return is_solved
def main():
env = CartPoleBulletEnv(renders=False)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to cartpole_model.pkl")
act.save("cartpole_model.pkl")
env = CartPoleBulletEnv(renders=False)
model = deepq.models.mlp([64])
act = deepq.learn(env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback)
print("Saving model to cartpole_model.pkl")
act.save("cartpole_model.pkl")
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
@@ -12,33 +12,30 @@ from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
def main():
env = RacecarGymEnv(renders=False,isDiscrete=True)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to racecar_model.pkl")
act.save("racecar_model.pkl")
env = RacecarGymEnv(renders=False, isDiscrete=True)
model = deepq.models.mlp([64])
act = deepq.learn(env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback)
print("Saving model to racecar_model.pkl")
act.save("racecar_model.pkl")
if __name__ == '__main__':
main()
main()

View File

@@ -2,7 +2,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
@@ -12,36 +12,32 @@ from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
def main():
env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
model = deepq.models.cnn_to_mlp(
convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
hiddens=[256],
dueling=False
)
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to racecar_zed_model.pkl")
act.save("racecar_zed_model.pkl")
env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
hiddens=[256],
dueling=False)
act = deepq.learn(env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback)
print("Saving model to racecar_zed_model.pkl")
act.save("racecar_zed_model.pkl")
if __name__ == '__main__':
main()
main()

View File

@@ -9,9 +9,9 @@ class BulletClient(object):
def __init__(self, connection_mode=pybullet.DIRECT, options=""):
"""Create a simulation and connect to it."""
self._client = pybullet.connect(pybullet.SHARED_MEMORY)
if(self._client<0):
print("options=",options)
self._client = pybullet.connect(connection_mode,options=options)
if (self._client < 0):
print("options=", options)
self._client = pybullet.connect(connection_mode, options=options)
self._shapes = {}
def __del__(self):
@@ -25,5 +25,5 @@ class BulletClient(object):
"""Inject the client id into Bullet functions."""
attribute = getattr(pybullet, name)
if inspect.isbuiltin(attribute):
attribute = functools.partial(attribute, physicsClientId=self._client)
attribute = functools.partial(attribute, physicsClientId=self._client)
return attribute

View File

@@ -2,10 +2,10 @@
Classic cart-pole system implemented by Rich Sutton et al.
Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
"""
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import logging
import math
@@ -21,26 +21,24 @@ from pkg_resources import parse_version
logger = logging.getLogger(__name__)
class CartPoleBulletEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
def __init__(self, renders=True):
# start the bullet physics server
self._renders = renders
if (renders):
p.connect(p.GUI)
p.connect(p.GUI)
else:
p.connect(p.DIRECT)
p.connect(p.DIRECT)
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 0.4 #2.4
self.x_threshold = 0.4 #2.4
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.x_threshold * 2,
np.finfo(np.float32).max, self.theta_threshold_radians * 2,
np.finfo(np.float32).max
])
self.force_mag = 10
@@ -60,7 +58,7 @@ class CartPoleBulletEnv(gym.Env):
return [seed]
def step(self, action):
force = self.force_mag if action==1 else -self.force_mag
force = self.force_mag if action == 1 else -self.force_mag
p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
p.stepSimulation()
@@ -78,16 +76,17 @@ class CartPoleBulletEnv(gym.Env):
return np.array(self.state), reward, done, {}
def reset(self):
# print("-----------reset simulation---------------")
# print("-----------reset simulation---------------")
p.resetSimulation()
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cartpole.urdf"),[0,0,0])
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
[0, 0, 0])
p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
self.timeStep = 0.02
p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
p.setGravity(0,0, -9.8)
p.setGravity(0, 0, -9.8)
p.setTimeStep(self.timeStep)
p.setRealTimeSimulation(0)
@@ -100,4 +99,4 @@ class CartPoleBulletEnv(gym.Env):
return np.array(self.state)
def render(self, mode='human', close=False):
return
return

View File

@@ -22,4 +22,3 @@ class EnvRandomizerBase(object):
env: The environment to be randomized.
"""
pass

View File

@@ -1,7 +1,7 @@
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import pybullet as p
import numpy as np
@@ -17,49 +17,60 @@ class Kuka:
self.timeStep = timeStep
self.maxVelocity = .35
self.maxForce = 200.
self.fingerAForce = 2
self.fingerAForce = 2
self.fingerBForce = 2.5
self.fingerTipForce = 2
self.useInverseKinematics = 1
self.useSimulation = 1
self.useNullSpace =21
self.useNullSpace = 21
self.useOrientation = 1
self.kukaEndEffectorIndex = 6
self.kukaGripperIndex = 7
#lower limits for null space
self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
self.ll = [-.967, -2, -2.96, 0.19, -2.96, -2.09, -3.05]
#upper limits for null space
self.ul=[.967,2 ,2.96,2.29,2.96,2.09,3.05]
self.ul = [.967, 2, 2.96, 2.29, 2.96, 2.09, 3.05]
#joint ranges for null space
self.jr=[5.8,4,5.8,4,5.8,4,6]
self.jr = [5.8, 4, 5.8, 4, 5.8, 4, 6]
#restposes for null space
self.rp=[0,0,0,0.5*math.pi,0,-math.pi*0.5*0.66,0]
self.rp = [0, 0, 0, 0.5 * math.pi, 0, -math.pi * 0.5 * 0.66, 0]
#joint damping coefficents
self.jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001]
self.jd = [
0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001,
0.00001, 0.00001, 0.00001, 0.00001
]
self.reset()
def reset(self):
objects = p.loadSDF(os.path.join(self.urdfRootPath,"kuka_iiwa/kuka_with_gripper2.sdf"))
objects = p.loadSDF(os.path.join(self.urdfRootPath, "kuka_iiwa/kuka_with_gripper2.sdf"))
self.kukaUid = objects[0]
#for i in range (p.getNumJoints(self.kukaUid)):
# print(p.getJointInfo(self.kukaUid,i))
p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000])
self.jointPositions=[ 0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048, -0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200 ]
p.resetBasePositionAndOrientation(self.kukaUid, [-0.100000, 0.000000, 0.070000],
[0.000000, 0.000000, 0.000000, 1.000000])
self.jointPositions = [
0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048,
-0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200
]
self.numJoints = p.getNumJoints(self.kukaUid)
for jointIndex in range (self.numJoints):
p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex])
p.setJointMotorControl2(self.kukaUid,jointIndex,p.POSITION_CONTROL,targetPosition=self.jointPositions[jointIndex],force=self.maxForce)
self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath,"tray/tray.urdf"), 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000)
self.endEffectorPos = [0.537,0.0,0.5]
for jointIndex in range(self.numJoints):
p.resetJointState(self.kukaUid, jointIndex, self.jointPositions[jointIndex])
p.setJointMotorControl2(self.kukaUid,
jointIndex,
p.POSITION_CONTROL,
targetPosition=self.jointPositions[jointIndex],
force=self.maxForce)
self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath, "tray/tray.urdf"), 0.640000,
0.075000, -0.190000, 0.000000, 0.000000, 1.000000, 0.000000)
self.endEffectorPos = [0.537, 0.0, 0.5]
self.endEffectorAngle = 0
self.motorNames = []
self.motorIndices = []
for i in range (self.numJoints):
jointInfo = p.getJointInfo(self.kukaUid,i)
for i in range(self.numJoints):
jointInfo = p.getJointInfo(self.kukaUid, i)
qIndex = jointInfo[3]
if qIndex > -1:
#print("motorname")
@@ -70,98 +81,136 @@ class Kuka:
def getActionDimension(self):
if (self.useInverseKinematics):
return len(self.motorIndices)
return 6 #position x,y,z and roll/pitch/yaw euler angles of end effector
return 6 #position x,y,z and roll/pitch/yaw euler angles of end effector
def getObservationDimension(self):
return len(self.getObservation())
def getObservation(self):
observation = []
state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
state = p.getLinkState(self.kukaUid, self.kukaGripperIndex)
pos = state[0]
orn = state[1]
euler = p.getEulerFromQuaternion(orn)
observation.extend(list(pos))
observation.extend(list(euler))
return observation
def applyAction(self, motorCommands):
#print ("self.numJoints")
#print (self.numJoints)
if (self.useInverseKinematics):
dx = motorCommands[0]
dy = motorCommands[1]
dz = motorCommands[2]
da = motorCommands[3]
fingerAngle = motorCommands[4]
state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
state = p.getLinkState(self.kukaUid, self.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
#print("pos[2] (getLinkState(kukaEndEffectorIndex)")
#print(actualEndEffectorPos[2])
self.endEffectorPos[0] = self.endEffectorPos[0]+dx
if (self.endEffectorPos[0]>0.65):
self.endEffectorPos[0]=0.65
if (self.endEffectorPos[0]<0.50):
self.endEffectorPos[0]=0.50
self.endEffectorPos[1] = self.endEffectorPos[1]+dy
if (self.endEffectorPos[1]<-0.17):
self.endEffectorPos[1]=-0.17
if (self.endEffectorPos[1]>0.22):
self.endEffectorPos[1]=0.22
self.endEffectorPos[0] = self.endEffectorPos[0] + dx
if (self.endEffectorPos[0] > 0.65):
self.endEffectorPos[0] = 0.65
if (self.endEffectorPos[0] < 0.50):
self.endEffectorPos[0] = 0.50
self.endEffectorPos[1] = self.endEffectorPos[1] + dy
if (self.endEffectorPos[1] < -0.17):
self.endEffectorPos[1] = -0.17
if (self.endEffectorPos[1] > 0.22):
self.endEffectorPos[1] = 0.22
#print ("self.endEffectorPos[2]")
#print (self.endEffectorPos[2])
#print("actualEndEffectorPos[2]")
#print(actualEndEffectorPos[2])
#if (dz<0 or actualEndEffectorPos[2]<0.5):
self.endEffectorPos[2] = self.endEffectorPos[2]+dz
self.endEffectorPos[2] = self.endEffectorPos[2] + dz
self.endEffectorAngle = self.endEffectorAngle + da
pos = self.endEffectorPos
orn = p.getQuaternionFromEuler([0,-math.pi,0]) # -math.pi,yaw])
if (self.useNullSpace==1):
if (self.useOrientation==1):
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,self.ll,self.ul,self.jr,self.rp)
orn = p.getQuaternionFromEuler([0, -math.pi, 0]) # -math.pi,yaw])
if (self.useNullSpace == 1):
if (self.useOrientation == 1):
jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos,
orn, self.ll, self.ul, self.jr, self.rp)
else:
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,lowerLimits=self.ll, upperLimits=self.ul, jointRanges=self.jr, restPoses=self.rp)
jointPoses = p.calculateInverseKinematics(self.kukaUid,
self.kukaEndEffectorIndex,
pos,
lowerLimits=self.ll,
upperLimits=self.ul,
jointRanges=self.jr,
restPoses=self.rp)
else:
if (self.useOrientation==1):
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,jointDamping=self.jd)
if (self.useOrientation == 1):
jointPoses = p.calculateInverseKinematics(self.kukaUid,
self.kukaEndEffectorIndex,
pos,
orn,
jointDamping=self.jd)
else:
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos)
jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos)
#print("jointPoses")
#print(jointPoses)
#print("self.kukaEndEffectorIndex")
#print(self.kukaEndEffectorIndex)
if (self.useSimulation):
for i in range (self.kukaEndEffectorIndex+1):
for i in range(self.kukaEndEffectorIndex + 1):
#print(i)
p.setJointMotorControl2(bodyUniqueId=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,maxVelocity=self.maxVelocity, positionGain=0.3,velocityGain=1)
p.setJointMotorControl2(bodyUniqueId=self.kukaUid,
jointIndex=i,
controlMode=p.POSITION_CONTROL,
targetPosition=jointPoses[i],
targetVelocity=0,
force=self.maxForce,
maxVelocity=self.maxVelocity,
positionGain=0.3,
velocityGain=1)
else:
#reset the joint state (ignoring all dynamics, not recommended to use during simulation)
for i in range (self.numJoints):
p.resetJointState(self.kukaUid,i,jointPoses[i])
for i in range(self.numJoints):
p.resetJointState(self.kukaUid, i, jointPoses[i])
#fingers
p.setJointMotorControl2(self.kukaUid,7,p.POSITION_CONTROL,targetPosition=self.endEffectorAngle,force=self.maxForce)
p.setJointMotorControl2(self.kukaUid,8,p.POSITION_CONTROL,targetPosition=-fingerAngle,force=self.fingerAForce)
p.setJointMotorControl2(self.kukaUid,11,p.POSITION_CONTROL,targetPosition=fingerAngle,force=self.fingerBForce)
p.setJointMotorControl2(self.kukaUid,10,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
p.setJointMotorControl2(self.kukaUid,13,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
p.setJointMotorControl2(self.kukaUid,
7,
p.POSITION_CONTROL,
targetPosition=self.endEffectorAngle,
force=self.maxForce)
p.setJointMotorControl2(self.kukaUid,
8,
p.POSITION_CONTROL,
targetPosition=-fingerAngle,
force=self.fingerAForce)
p.setJointMotorControl2(self.kukaUid,
11,
p.POSITION_CONTROL,
targetPosition=fingerAngle,
force=self.fingerBForce)
p.setJointMotorControl2(self.kukaUid,
10,
p.POSITION_CONTROL,
targetPosition=0,
force=self.fingerTipForce)
p.setJointMotorControl2(self.kukaUid,
13,
p.POSITION_CONTROL,
targetPosition=0,
force=self.fingerTipForce)
else:
for action in range (len(motorCommands)):
for action in range(len(motorCommands)):
motor = self.motorIndices[action]
p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce)
p.setJointMotorControl2(self.kukaUid,
motor,
p.POSITION_CONTROL,
targetPosition=motorCommands[action],
force=self.maxForce)

View File

@@ -1,8 +1,7 @@
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import gym
@@ -21,11 +20,9 @@ maxSteps = 1000
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
class KukaCamGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
def __init__(self,
urdfRoot=pybullet_data.getDataPath(),
@@ -33,7 +30,7 @@ class KukaCamGymEnv(gym.Env):
isEnableSelfCollision=True,
renders=False,
isDiscrete=False):
self._timeStep = 1./240.
self._timeStep = 1. / 240.
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
@@ -42,14 +39,14 @@ class KukaCamGymEnv(gym.Env):
self._renders = renders
self._width = 341
self._height = 256
self._isDiscrete=isDiscrete
self._isDiscrete = isDiscrete
self.terminated = 0
self._p = p
if self._renders:
cid = p.connect(p.SHARED_MEMORY)
if (cid<0):
p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
if (cid < 0):
p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
else:
p.connect(p.DIRECT)
#timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
@@ -67,7 +64,10 @@ class KukaCamGymEnv(gym.Env):
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
self.observation_space = spaces.Box(low=0,
high=255,
shape=(self._height, self._width, 4),
dtype=np.uint8)
self.viewer = None
def reset(self):
@@ -75,17 +75,19 @@ class KukaCamGymEnv(gym.Env):
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=150)
p.setTimeStep(self._timeStep)
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
0.000000, 0.000000, 0.0, 1.0)
xpos = 0.5 +0.2*random.random()
ypos = 0 +0.25*random.random()
ang = 3.1415925438*random.random()
orn = p.getQuaternionFromEuler([0,0,ang])
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
xpos = 0.5 + 0.2 * random.random()
ypos = 0 + 0.25 * random.random()
ang = 3.1415925438 * random.random()
orn = p.getQuaternionFromEuler([0, 0, ang])
self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.1,
orn[0], orn[1], orn[2], orn[3])
p.setGravity(0,0,-10)
p.setGravity(0, 0, -10)
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
@@ -101,49 +103,59 @@ class KukaCamGymEnv(gym.Env):
def getExtendedObservation(self):
#camEyePos = [0.03,0.236,0.54]
#distance = 1.06
#pitch=-56
#yaw = 258
#roll=0
#upAxisIndex = 2
#camInfo = p.getDebugVisualizerCamera()
#print("width,height")
#print(camInfo[0])
#print(camInfo[1])
#print("viewMatrix")
#print(camInfo[2])
#print("projectionMatrix")
#print(camInfo[3])
#viewMat = camInfo[2]
#viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0]
#projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
#camEyePos = [0.03,0.236,0.54]
#distance = 1.06
#pitch=-56
#yaw = 258
#roll=0
#upAxisIndex = 2
#camInfo = p.getDebugVisualizerCamera()
#print("width,height")
#print(camInfo[0])
#print(camInfo[1])
#print("viewMatrix")
#print(camInfo[2])
#print("projectionMatrix")
#print(camInfo[3])
#viewMat = camInfo[2]
#viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
viewMat = [
-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722,
-0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843,
0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0
]
#projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
projMatrix = [
0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0,
-0.02000020071864128, 0.0
]
img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
rgb=img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
self._observation = np_img_arr
return self._observation
img_arr = p.getCameraImage(width=self._width,
height=self._height,
viewMatrix=viewMat,
projectionMatrix=projMatrix)
rgb = img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
self._observation = np_img_arr
return self._observation
def step(self, action):
if (self._isDiscrete):
dv = 0.01
dx = [0,-dv,dv,0,0,0,0][action]
dy = [0,0,0,-dv,dv,0,0][action]
da = [0,0,0,0,0,-0.1,0.1][action]
dx = [0, -dv, dv, 0, 0, 0, 0][action]
dy = [0, 0, 0, -dv, dv, 0, 0][action]
da = [0, 0, 0, 0, 0, -0.1, 0.1][action]
f = 0.3
realAction = [dx,dy,-0.002,da,f]
realAction = [dx, dy, -0.002, da, f]
else:
dv = 0.01
dx = action[0] * dv
dy = action[1] * dv
da = action[2] * 0.1
f = 0.3
realAction = [dx,dy,-0.002,da,f]
realAction = [dx, dy, -0.002, da, f]
return self.step2( realAction)
return self.step2(realAction)
def step2(self, action):
for i in range(self._actionRepeat):
@@ -156,7 +168,7 @@ class KukaCamGymEnv(gym.Env):
self._observation = self.getExtendedObservation()
if self._renders:
time.sleep(self._timeStep)
time.sleep(self._timeStep)
#print("self._envStepCounter")
#print(self._envStepCounter)
@@ -170,66 +182,67 @@ class KukaCamGymEnv(gym.Env):
def render(self, mode='human', close=False):
if mode != "rgb_array":
return np.array([])
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = rgb_array[:, :, :3]
return rgb_array
def _termination(self):
#print (self._kuka.endEffectorPos[2])
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
#print("self._envStepCounter")
#print(self._envStepCounter)
if (self.terminated or self._envStepCounter>maxSteps):
if (self.terminated or self._envStepCounter > maxSteps):
self._observation = self.getExtendedObservation()
return True
maxDist = 0.005
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
if (len(closestPoints)): #(actualEndEffectorPos[2] <= -0.43):
self.terminated = 1
#print("closing gripper, attempting grasp")
#start grasp and terminate
fingerAngle = 0.3
for i in range (100):
graspAction = [0,0,0.0001,0,fingerAngle]
for i in range(100):
graspAction = [0, 0, 0.0001, 0, fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
fingerAngle = fingerAngle-(0.3/100.)
if (fingerAngle<0):
fingerAngle=0
fingerAngle = fingerAngle - (0.3 / 100.)
if (fingerAngle < 0):
fingerAngle = 0
for i in range (1000):
graspAction = [0,0,0.001,0,fingerAngle]
for i in range(1000):
graspAction = [0, 0, 0.001, 0, fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
if (blockPos[2] > 0.23):
#print("BLOCKPOS!")
#print(blockPos[2])
break
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
if (actualEndEffectorPos[2]>0.5):
if (actualEndEffectorPos[2] > 0.5):
break
self._observation = self.getExtendedObservation()
return True
return False
@@ -237,20 +250,21 @@ class KukaCamGymEnv(gym.Env):
def _reward(self):
#rewards is height of target object
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
self._kuka.kukaEndEffectorIndex)
reward = -1000
numPt = len(closestPoints)
#print(numPt)
if (numPt>0):
if (numPt > 0):
#print("reward:")
reward = -closestPoints[0][8]*10
if (blockPos[2] >0.2):
reward = -closestPoints[0][8] * 10
if (blockPos[2] > 0.2):
#print("grasped a block!!!")
#print("self._envStepCounter")
#print(self._envStepCounter)
reward = reward+1000
reward = reward + 1000
#print("reward")
#print(reward)

View File

@@ -1,7 +1,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
print ("current_dir=" + currentdir)
os.sys.path.insert(0,currentdir)
print("current_dir=" + currentdir)
os.sys.path.insert(0, currentdir)
import math
import gym
@@ -20,11 +20,9 @@ largeValObservation = 100
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
class KukaGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
def __init__(self,
urdfRoot=pybullet_data.getDataPath(),
@@ -32,10 +30,10 @@ class KukaGymEnv(gym.Env):
isEnableSelfCollision=True,
renders=False,
isDiscrete=False,
maxSteps = 1000):
maxSteps=1000):
#print("KukaGymEnv __init__")
self._isDiscrete = isDiscrete
self._timeStep = 1./240.
self._timeStep = 1. / 240.
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
@@ -51,9 +49,9 @@ class KukaGymEnv(gym.Env):
self._p = p
if self._renders:
cid = p.connect(p.SHARED_MEMORY)
if (cid<0):
cid = p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
if (cid < 0):
cid = p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
else:
p.connect(p.DIRECT)
#timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
@@ -67,10 +65,10 @@ class KukaGymEnv(gym.Env):
if (self._isDiscrete):
self.action_space = spaces.Discrete(7)
else:
action_dim = 3
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high)
action_dim = 3
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high)
self.observation_space = spaces.Box(-observation_high, observation_high)
self.viewer = None
@@ -80,17 +78,19 @@ class KukaGymEnv(gym.Env):
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=150)
p.setTimeStep(self._timeStep)
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
0.000000, 0.000000, 0.0, 1.0)
xpos = 0.55 +0.12*random.random()
ypos = 0 +0.2*random.random()
ang = 3.14*0.5+3.1415925438*random.random()
orn = p.getQuaternionFromEuler([0,0,ang])
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
xpos = 0.55 + 0.12 * random.random()
ypos = 0 + 0.2 * random.random()
ang = 3.14 * 0.5 + 3.1415925438 * random.random()
orn = p.getQuaternionFromEuler([0, 0, ang])
self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.15,
orn[0], orn[1], orn[2], orn[3])
p.setGravity(0,0,-10)
p.setGravity(0, 0, -10)
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
@@ -105,47 +105,48 @@ class KukaGymEnv(gym.Env):
return [seed]
def getExtendedObservation(self):
self._observation = self._kuka.getObservation()
gripperState = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
gripperPos = gripperState[0]
gripperOrn = gripperState[1]
blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
self._observation = self._kuka.getObservation()
gripperState = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaGripperIndex)
gripperPos = gripperState[0]
gripperOrn = gripperState[1]
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
gripperMat = p.getMatrixFromQuaternion(gripperOrn)
dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
invGripperPos, invGripperOrn = p.invertTransform(gripperPos, gripperOrn)
gripperMat = p.getMatrixFromQuaternion(gripperOrn)
dir0 = [gripperMat[0], gripperMat[3], gripperMat[6]]
dir1 = [gripperMat[1], gripperMat[4], gripperMat[7]]
dir2 = [gripperMat[2], gripperMat[5], gripperMat[8]]
gripperEul = p.getEulerFromQuaternion(gripperOrn)
#print("gripperEul")
#print(gripperEul)
blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
#print("projectedBlockPos2D")
#print(projectedBlockPos2D)
#print("blockEulerInGripper")
#print(blockEulerInGripper)
gripperEul = p.getEulerFromQuaternion(gripperOrn)
#print("gripperEul")
#print(gripperEul)
blockPosInGripper, blockOrnInGripper = p.multiplyTransforms(invGripperPos, invGripperOrn,
blockPos, blockOrn)
projectedBlockPos2D = [blockPosInGripper[0], blockPosInGripper[1]]
blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
#print("projectedBlockPos2D")
#print(projectedBlockPos2D)
#print("blockEulerInGripper")
#print(blockEulerInGripper)
#we return the relative x,y position and euler angle of block in gripper space
blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
#we return the relative x,y position and euler angle of block in gripper space
blockInGripperPosXYEulZ = [blockPosInGripper[0], blockPosInGripper[1], blockEulerInGripper[2]]
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
self._observation.extend(list(blockInGripperPosXYEulZ))
return self._observation
self._observation.extend(list(blockInGripperPosXYEulZ))
return self._observation
def step(self, action):
if (self._isDiscrete):
dv = 0.005
dx = [0,-dv,dv,0,0,0,0][action]
dy = [0,0,0,-dv,dv,0,0][action]
da = [0,0,0,0,0,-0.05,0.05][action]
dx = [0, -dv, dv, 0, 0, 0, 0][action]
dy = [0, 0, 0, -dv, dv, 0, 0][action]
da = [0, 0, 0, 0, 0, -0.05, 0.05][action]
f = 0.3
realAction = [dx,dy,-0.002,da,f]
realAction = [dx, dy, -0.002, da, f]
else:
#print("action[0]=", str(action[0]))
dv = 0.005
@@ -153,8 +154,8 @@ class KukaGymEnv(gym.Env):
dy = action[1] * dv
da = action[2] * 0.05
f = 0.3
realAction = [dx,dy,-0.002,da,f]
return self.step2( realAction)
realAction = [dx, dy, -0.002, da, f]
return self.step2(realAction)
def step2(self, action):
for i in range(self._actionRepeat):
@@ -171,11 +172,13 @@ class KukaGymEnv(gym.Env):
#print(self._envStepCounter)
done = self._termination()
npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
actionCost = np.linalg.norm(npaction)*10.
npaction = np.array([
action[3]
]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
actionCost = np.linalg.norm(npaction) * 10.
#print("actionCost")
#print(actionCost)
reward = self._reward()-actionCost
reward = self._reward() - actionCost
#print("reward")
#print(reward)
@@ -187,22 +190,23 @@ class KukaGymEnv(gym.Env):
if mode != "rgb_array":
return np.array([])
base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
#renderer=self._p.ER_TINY_RENDERER)
base_pos, orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
#renderer=self._p.ER_TINY_RENDERER)
rgb_array = np.array(px, dtype=np.uint8)
rgb_array = np.reshape(rgb_array, (RENDER_HEIGHT, RENDER_WIDTH, 4))
@@ -210,49 +214,47 @@ class KukaGymEnv(gym.Env):
rgb_array = rgb_array[:, :, :3]
return rgb_array
def _termination(self):
#print (self._kuka.endEffectorPos[2])
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
#print("self._envStepCounter")
#print(self._envStepCounter)
if (self.terminated or self._envStepCounter>self._maxSteps):
if (self.terminated or self._envStepCounter > self._maxSteps):
self._observation = self.getExtendedObservation()
return True
maxDist = 0.005
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
if (len(closestPoints)): #(actualEndEffectorPos[2] <= -0.43):
self.terminated = 1
#print("terminating, closing gripper, attempting grasp")
#start grasp and terminate
fingerAngle = 0.3
for i in range (100):
graspAction = [0,0,0.0001,0,fingerAngle]
for i in range(100):
graspAction = [0, 0, 0.0001, 0, fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
fingerAngle = fingerAngle-(0.3/100.)
if (fingerAngle<0):
fingerAngle=0
fingerAngle = fingerAngle - (0.3 / 100.)
if (fingerAngle < 0):
fingerAngle = 0
for i in range (1000):
graspAction = [0,0,0.001,0,fingerAngle]
for i in range(1000):
graspAction = [0, 0, 0.001, 0, fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
if (blockPos[2] > 0.23):
#print("BLOCKPOS!")
#print(blockPos[2])
break
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
if (actualEndEffectorPos[2]>0.5):
if (actualEndEffectorPos[2] > 0.5):
break
self._observation = self.getExtendedObservation()
return True
return False
@@ -260,18 +262,19 @@ class KukaGymEnv(gym.Env):
def _reward(self):
#rewards is height of target object
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
self._kuka.kukaEndEffectorIndex)
reward = -1000
numPt = len(closestPoints)
#print(numPt)
if (numPt>0):
if (numPt > 0):
#print("reward:")
reward = -closestPoints[0][8]*10
if (blockPos[2] >0.2):
reward = reward+10000
reward = -closestPoints[0][8] * 10
if (blockPos[2] > 0.2):
reward = reward + 10000
print("successfully grasped a block!!!")
#print("self._envStepCounter")
#print(self._envStepCounter)
@@ -279,7 +282,7 @@ class KukaGymEnv(gym.Env):
#print(self._envStepCounter)
#print("reward")
#print(reward)
#print("reward")
#print("reward")
#print(reward)
return reward

View File

@@ -13,6 +13,7 @@ import glob
from pkg_resources import parse_version
import gym
class KukaDiverseObjectEnv(KukaGymEnv):
"""Class for Kuka environment with diverse objects.
@@ -61,7 +62,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
"""
self._isDiscrete = isDiscrete
self._timeStep = 1./240.
self._timeStep = 1. / 240.
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
@@ -85,9 +86,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):
if self._renders:
self.cid = p.connect(p.SHARED_MEMORY)
if (self.cid<0):
self.cid = p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
if (self.cid < 0):
self.cid = p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
else:
self.cid = p.connect(p.DIRECT)
self.seed()
@@ -100,9 +101,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
else:
self.action_space = spaces.Box(low=-1, high=1, shape=(3,)) # dx, dy, da
if self._removeHeightHack:
self.action_space = spaces.Box(low=-1,
high=1,
shape=(4,)) # dx, dy, dz, da
self.action_space = spaces.Box(low=-1, high=1, shape=(4,)) # dx, dy, dz, da
self.viewer = None
def reset(self):
@@ -111,17 +110,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
# Set the camera settings.
look = [0.23, 0.2, 0.54]
distance = 1.
pitch = -56 + self._cameraRandom*np.random.uniform(-3, 3)
yaw = 245 + self._cameraRandom*np.random.uniform(-3, 3)
pitch = -56 + self._cameraRandom * np.random.uniform(-3, 3)
yaw = 245 + self._cameraRandom * np.random.uniform(-3, 3)
roll = 0
self._view_matrix = p.computeViewMatrixFromYawPitchRoll(
look, distance, yaw, pitch, roll, 2)
fov = 20. + self._cameraRandom*np.random.uniform(-2, 2)
self._view_matrix = p.computeViewMatrixFromYawPitchRoll(look, distance, yaw, pitch, roll, 2)
fov = 20. + self._cameraRandom * np.random.uniform(-2, 2)
aspect = self._width / self._height
near = 0.01
far = 10
self._proj_matrix = p.computeProjectionMatrixFOV(
fov, aspect, near, far)
self._proj_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
self._attempted_grasp = False
self._env_step = 0
@@ -130,18 +127,18 @@ class KukaDiverseObjectEnv(KukaGymEnv):
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=150)
p.setTimeStep(self._timeStep)
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
0.000000, 0.000000, 0.0, 1.0)
p.setGravity(0,0,-10)
p.setGravity(0, 0, -10)
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
# Choose the objects in the bin.
urdfList = self._get_random_object(
self._numObjects, self._isTest)
urdfList = self._get_random_object(self._numObjects, self._isTest)
self._objectUids = self._randomly_place_objects(urdfList)
self._observation = self._get_observation()
return np.array(self._observation)
@@ -156,17 +153,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
The list of object unique ID's.
"""
# Randomize positions of each object urdf.
objectUids = []
for urdf_name in urdfList:
xpos = 0.4 +self._blockRandom*random.random()
ypos = self._blockRandom*(random.random()-.5)
angle = np.pi/2 + self._blockRandom * np.pi * random.random()
xpos = 0.4 + self._blockRandom * random.random()
ypos = self._blockRandom * (random.random() - .5)
angle = np.pi / 2 + self._blockRandom * np.pi * random.random()
orn = p.getQuaternionFromEuler([0, 0, angle])
urdf_path = os.path.join(self._urdfRoot, urdf_name)
uid = p.loadURDF(urdf_path, [xpos, ypos, .15],
[orn[0], orn[1], orn[2], orn[3]])
uid = p.loadURDF(urdf_path, [xpos, ypos, .15], [orn[0], orn[1], orn[2], orn[3]])
objectUids.append(uid)
# Let each object fall to the tray individual, to prevent object
# intersection.
@@ -178,9 +173,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):
"""Return the observation as an image.
"""
img_arr = p.getCameraImage(width=self._width,
height=self._height,
viewMatrix=self._view_matrix,
projectionMatrix=self._proj_matrix)
height=self._height,
viewMatrix=self._view_matrix,
projectionMatrix=self._proj_matrix)
rgb = img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
return np_img_arr[:, :, :3]
@@ -246,8 +241,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
break
# If we are close to the bin, attempt grasp.
state = p.getLinkState(self._kuka.kukaUid,
self._kuka.kukaEndEffectorIndex)
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
end_effector_pos = state[0]
if end_effector_pos[2] <= 0.1:
finger_angle = 0.3
@@ -257,7 +251,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
p.stepSimulation()
#if self._renders:
# time.sleep(self._timeStep)
finger_angle -= 0.3/100.
finger_angle -= 0.3 / 100.
if finger_angle < 0:
finger_angle = 0
for _ in range(500):
@@ -266,7 +260,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
p.stepSimulation()
if self._renders:
time.sleep(self._timeStep)
finger_angle -= 0.3/100.
finger_angle -= 0.3 / 100.
if finger_angle < 0:
finger_angle = 0
self._attempted_grasp = True
@@ -274,9 +268,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
done = self._termination()
reward = self._reward()
debug = {
'grasp_success': self._graspSuccess
}
debug = {'grasp_success': self._graspSuccess}
return observation, reward, done, debug
def _reward(self):
@@ -288,8 +280,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
reward = 0
self._graspSuccess = 0
for uid in self._objectUids:
pos, _ = p.getBasePositionAndOrientation(
uid)
pos, _ = p.getBasePositionAndOrientation(uid)
# If any block is above height, provide reward.
if pos[2] > 0.2:
self._graspSuccess += 1
@@ -319,8 +310,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*[1-9]/*.urdf')
found_object_directories = glob.glob(urdf_pattern)
total_num_objects = len(found_object_directories)
selected_objects = np.random.choice(np.arange(total_num_objects),
num_objects)
selected_objects = np.random.choice(np.arange(total_num_objects), num_objects)
selected_objects_filenames = []
for object_index in selected_objects:
selected_objects_filenames += [found_object_directories[object_index]]

View File

@@ -15,9 +15,8 @@ OVERHEAT_SHUTDOWN_TORQUE = 2.45
OVERHEAT_SHUTDOWN_TIME = 1.0
LEG_POSITION = ["front_left", "back_left", "front_right", "back_right"]
MOTOR_NAMES = [
"motor_front_leftL_joint", "motor_front_leftR_joint",
"motor_back_leftL_joint", "motor_back_leftR_joint",
"motor_front_rightL_joint", "motor_front_rightR_joint",
"motor_front_leftL_joint", "motor_front_leftR_joint", "motor_back_leftL_joint",
"motor_back_leftR_joint", "motor_front_rightL_joint", "motor_front_rightR_joint",
"motor_back_rightL_joint", "motor_back_rightR_joint"
]
LEG_LINK_ID = [2, 3, 5, 6, 8, 9, 11, 12, 15, 16, 18, 19, 21, 22, 24, 25]
@@ -33,7 +32,7 @@ class Minitaur(object):
def __init__(self,
pybullet_client,
urdf_root= os.path.join(os.path.dirname(__file__),"../data"),
urdf_root=os.path.join(os.path.dirname(__file__), "../data"),
time_step=0.01,
self_collision_enabled=False,
motor_velocity_limit=np.inf,
@@ -87,10 +86,9 @@ class Minitaur(object):
if self._accurate_motor_model_enabled:
self._kp = motor_kp
self._kd = motor_kd
self._motor_model = motor.MotorModel(
torque_control_enabled=self._torque_control_enabled,
kp=self._kp,
kd=self._kd)
self._motor_model = motor.MotorModel(torque_control_enabled=self._torque_control_enabled,
kp=self._kp,
kd=self._kd)
elif self._pd_control_enabled:
self._kp = 8
self._kd = kd_for_pd_controllers
@@ -101,15 +99,12 @@ class Minitaur(object):
self.Reset()
def _RecordMassInfoFromURDF(self):
self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(
self.quadruped, BASE_LINK_ID)[0]
self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(self.quadruped, BASE_LINK_ID)[0]
self._leg_masses_urdf = []
self._leg_masses_urdf.append(
self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[
0])
self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[0])
self._leg_masses_urdf.append(
self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[
0])
self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[0])
def _BuildJointNameToIdDict(self):
num_joints = self._pybullet_client.getNumJoints(self.quadruped)
@@ -119,9 +114,7 @@ class Minitaur(object):
self._joint_name_to_id[joint_info[1].decode("UTF-8")] = joint_info[0]
def _BuildMotorIdList(self):
self._motor_id_list = [
self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES
]
self._motor_id_list = [self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES]
def Reset(self, reload_urdf=True):
"""Reset the minitaur to its initial states.
@@ -144,39 +137,35 @@ class Minitaur(object):
self._RecordMassInfoFromURDF()
self.ResetPose(add_constraint=True)
if self._on_rack:
self._pybullet_client.createConstraint(
self.quadruped, -1, -1, -1, self._pybullet_client.JOINT_FIXED,
[0, 0, 0], [0, 0, 0], [0, 0, 1])
self._pybullet_client.createConstraint(self.quadruped, -1, -1, -1,
self._pybullet_client.JOINT_FIXED, [0, 0, 0],
[0, 0, 0], [0, 0, 1])
else:
self._pybullet_client.resetBasePositionAndOrientation(
self.quadruped, INIT_POSITION, INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
[0, 0, 0])
self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, INIT_POSITION,
INIT_ORIENTATION)
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
self.ResetPose(add_constraint=False)
self._overheat_counter = np.zeros(self.num_motors)
self._motor_enabled_list = [True] * self.num_motors
def _SetMotorTorqueById(self, motor_id, torque):
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.TORQUE_CONTROL,
force=torque)
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.TORQUE_CONTROL,
force=torque)
def _SetDesiredMotorAngleById(self, motor_id, desired_angle):
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.POSITION_CONTROL,
targetPosition=desired_angle,
positionGain=self._kp,
velocityGain=self._kd,
force=self._max_force)
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
jointIndex=motor_id,
controlMode=self._pybullet_client.POSITION_CONTROL,
targetPosition=desired_angle,
positionGain=self._kp,
velocityGain=self._kd,
force=self._max_force)
def _SetDesiredMotorAngleByName(self, motor_name, desired_angle):
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name],
desired_angle)
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name], desired_angle)
def ResetPose(self, add_constraint):
"""Reset the pose of the minitaur.
@@ -200,59 +189,53 @@ class Minitaur(object):
knee_angle = -2.1834
leg_position = LEG_POSITION[leg_id]
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "L_link"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["motor_" + leg_position + "R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(
self.quadruped,
self._joint_name_to_id["knee_" + leg_position + "R_link"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"L_joint"],
self._motor_direction[2 * leg_id] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"L_link"],
self._motor_direction[2 * leg_id] * knee_angle,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["motor_" + leg_position +
"R_joint"],
self._motor_direction[2 * leg_id + 1] * half_pi,
targetVelocity=0)
self._pybullet_client.resetJointState(self.quadruped,
self._joint_name_to_id["knee_" + leg_position +
"R_link"],
self._motor_direction[2 * leg_id + 1] * knee_angle,
targetVelocity=0)
if add_constraint:
self._pybullet_client.createConstraint(
self.quadruped, self._joint_name_to_id["knee_"
+ leg_position + "R_link"],
self.quadruped, self._joint_name_to_id["knee_"
+ leg_position + "L_link"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
KNEE_CONSTRAINT_POINT_RIGHT, KNEE_CONSTRAINT_POINT_LEFT)
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_link"],
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_link"],
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_RIGHT,
KNEE_CONSTRAINT_POINT_LEFT)
if self._accurate_motor_model_enabled or self._pd_control_enabled:
# Disable the default motor in pybullet.
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(self._joint_name_to_id["motor_"
+ leg_position + "L_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
jointIndex=(self._joint_name_to_id["motor_"
+ leg_position + "R_joint"]),
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
controlMode=self._pybullet_client.VELOCITY_CONTROL,
targetVelocity=0,
force=knee_friction_force)
else:
self._SetDesiredMotorAngleByName(
"motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
self._motor_direction[2 * leg_id] * half_pi)
self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
self._motor_direction[2 * leg_id
+ 1] * half_pi)
self._motor_direction[2 * leg_id + 1] * half_pi)
self._pybullet_client.setJointMotorControl2(
bodyIndex=self.quadruped,
@@ -273,8 +256,7 @@ class Minitaur(object):
Returns:
The position of minitaur's base.
"""
position, _ = (
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
position, _ = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
return position
def GetBaseOrientation(self):
@@ -283,8 +265,7 @@ class Minitaur(object):
Returns:
The orientation of minitaur's base.
"""
_, orientation = (
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
_, orientation = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
return orientation
def GetActionDimension(self):
@@ -304,10 +285,9 @@ class Minitaur(object):
"""
upper_bound = np.array([0.0] * self.GetObservationDimension())
upper_bound[0:self.num_motors] = math.pi # Joint angle.
upper_bound[self.num_motors:2 * self.num_motors] = (
motor.MOTOR_SPEED_LIMIT) # Joint velocity.
upper_bound[2 * self.num_motors:3 * self.num_motors] = (
motor.OBSERVED_TORQUE_LIMIT) # Joint torque.
upper_bound[self.num_motors:2 * self.num_motors] = (motor.MOTOR_SPEED_LIMIT) # Joint velocity.
upper_bound[2 * self.num_motors:3 * self.num_motors] = (motor.OBSERVED_TORQUE_LIMIT
) # Joint torque.
upper_bound[3 * self.num_motors:] = 1.0 # Quaternion of base orientation.
return upper_bound
@@ -354,12 +334,9 @@ class Minitaur(object):
"""
if self._motor_velocity_limit < np.inf:
current_motor_angle = self.GetMotorAngles()
motor_commands_max = (
current_motor_angle + self.time_step * self._motor_velocity_limit)
motor_commands_min = (
current_motor_angle - self.time_step * self._motor_velocity_limit)
motor_commands = np.clip(motor_commands, motor_commands_min,
motor_commands_max)
motor_commands_max = (current_motor_angle + self.time_step * self._motor_velocity_limit)
motor_commands_min = (current_motor_angle - self.time_step * self._motor_velocity_limit)
motor_commands = np.clip(motor_commands, motor_commands_min, motor_commands_max)
if self._accurate_motor_model_enabled or self._pd_control_enabled:
q = self.GetMotorAngles()
@@ -373,8 +350,7 @@ class Minitaur(object):
self._overheat_counter[i] += 1
else:
self._overheat_counter[i] = 0
if (self._overheat_counter[i] >
OVERHEAT_SHUTDOWN_TIME / self.time_step):
if (self._overheat_counter[i] > OVERHEAT_SHUTDOWN_TIME / self.time_step):
self._motor_enabled_list[i] = False
# The torque is already in the observation space because we use
@@ -382,12 +358,11 @@ class Minitaur(object):
self._observed_motor_torques = observed_torque
# Transform into the motor space when applying the torque.
self._applied_motor_torque = np.multiply(actual_torque,
self._motor_direction)
self._applied_motor_torque = np.multiply(actual_torque, self._motor_direction)
for motor_id, motor_torque, motor_enabled in zip(
self._motor_id_list, self._applied_motor_torque,
self._motor_enabled_list):
for motor_id, motor_torque, motor_enabled in zip(self._motor_id_list,
self._applied_motor_torque,
self._motor_enabled_list):
if motor_enabled:
self._SetMotorTorqueById(motor_id, motor_torque)
else:
@@ -403,14 +378,12 @@ class Minitaur(object):
self._applied_motor_torques = np.multiply(self._observed_motor_torques,
self._motor_direction)
for motor_id, motor_torque in zip(self._motor_id_list,
self._applied_motor_torques):
for motor_id, motor_torque in zip(self._motor_id_list, self._applied_motor_torques):
self._SetMotorTorqueById(motor_id, motor_torque)
else:
motor_commands_with_direction = np.multiply(motor_commands,
self._motor_direction)
for motor_id, motor_command_with_direction in zip(
self._motor_id_list, motor_commands_with_direction):
motor_commands_with_direction = np.multiply(motor_commands, self._motor_direction)
for motor_id, motor_command_with_direction in zip(self._motor_id_list,
motor_commands_with_direction):
self._SetDesiredMotorAngleById(motor_id, motor_command_with_direction)
def GetMotorAngles(self):
@@ -471,13 +444,13 @@ class Minitaur(object):
quater_pi = math.pi / 4
for i in range(self.num_motors):
action_idx = i // 2
forward_backward_component = (-scale_for_singularity * quater_pi * (
actions[action_idx + half_num_motors] + offset_for_singularity))
forward_backward_component = (
-scale_for_singularity * quater_pi *
(actions[action_idx + half_num_motors] + offset_for_singularity))
extension_component = (-1)**i * quater_pi * actions[action_idx]
if i >= half_num_motors:
extension_component = -extension_component
motor_angle[i] = (
math.pi + forward_backward_component + extension_component)
motor_angle[i] = (math.pi + forward_backward_component + extension_component)
return motor_angle
def GetBaseMassFromURDF(self):
@@ -489,8 +462,7 @@ class Minitaur(object):
return self._leg_masses_urdf
def SetBaseMass(self, base_mass):
self._pybullet_client.changeDynamics(
self.quadruped, BASE_LINK_ID, mass=base_mass)
self._pybullet_client.changeDynamics(self.quadruped, BASE_LINK_ID, mass=base_mass)
def SetLegMasses(self, leg_masses):
"""Set the mass of the legs.
@@ -504,11 +476,9 @@ class Minitaur(object):
leg_masses[1] is the mass of the motor.
"""
for link_id in LEG_LINK_ID:
self._pybullet_client.changeDynamics(
self.quadruped, link_id, mass=leg_masses[0])
self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[0])
for link_id in MOTOR_LINK_ID:
self._pybullet_client.changeDynamics(
self.quadruped, link_id, mass=leg_masses[1])
self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[1])
def SetFootFriction(self, foot_friction):
"""Set the lateral friction of the feet.
@@ -518,8 +488,7 @@ class Minitaur(object):
shared by all four feet.
"""
for link_id in FOOT_LINK_ID:
self._pybullet_client.changeDynamics(
self.quadruped, link_id, lateralFriction=foot_friction)
self._pybullet_client.changeDynamics(self.quadruped, link_id, lateralFriction=foot_friction)
def SetBatteryVoltage(self, voltage):
if self._accurate_motor_model_enabled:

View File

@@ -6,8 +6,7 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import time
@@ -34,8 +33,9 @@ OBSERVATION_EPS = 0.01
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
duckStartPos = [0,0,0.25]
duckStartOrn = [0.5,0.5,0.5,0.5]
duckStartPos = [0, 0, 0.25]
duckStartOrn = [0.5, 0.5, 0.5, 0.5]
class MinitaurBulletDuckEnv(gym.Env):
"""The gym environment for the minitaur.
@@ -47,34 +47,32 @@ class MinitaurBulletDuckEnv(gym.Env):
expenditure.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 50
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
def __init__(self,
urdf_root=pybullet_data.getDataPath(),
action_repeat=1,
distance_weight=1.0,
energy_weight=0.005,
shake_weight=0.0,
drift_weight=0.0,
distance_limit=float("inf"),
observation_noise_stdev=0.0,
self_collision_enabled=True,
motor_velocity_limit=np.inf,
pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
leg_model_enabled=True,
accurate_motor_model_enabled=True,
motor_kp=1.0,
motor_kd=0.02,
torque_control_enabled=False,
motor_overheat_protection=True,
hard_reset=True,
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
def __init__(
self,
urdf_root=pybullet_data.getDataPath(),
action_repeat=1,
distance_weight=1.0,
energy_weight=0.005,
shake_weight=0.0,
drift_weight=0.0,
distance_limit=float("inf"),
observation_noise_stdev=0.0,
self_collision_enabled=True,
motor_velocity_limit=np.inf,
pd_control_enabled=False, #not needed to be true if accurate motor model is enabled (has its own better PD)
leg_model_enabled=True,
accurate_motor_model_enabled=True,
motor_kp=1.0,
motor_kd=0.02,
torque_control_enabled=False,
motor_overheat_protection=True,
hard_reset=True,
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
"""Initialize the minitaur gym environment.
Args:
@@ -152,17 +150,14 @@ class MinitaurBulletDuckEnv(gym.Env):
self._action_repeat *= NUM_SUBSTEPS
if self._is_render:
self._pybullet_client = bullet_client.BulletClient(
connection_mode=pybullet.GUI)
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._pybullet_client = bullet_client.BulletClient()
self.seed()
self.reset()
observation_high = (
self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
observation_low = (
self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
action_dim = 8
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
@@ -183,35 +178,36 @@ class MinitaurBulletDuckEnv(gym.Env):
numSolverIterations=int(self._num_bullet_solver_iterations))
self._pybullet_client.setTimeStep(self._time_step)
self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,
duckStartPos, duckStartOrn)
self._pybullet_client.setGravity(0, 0, -10)
acc_motor = self._accurate_motor_model_enabled
motor_protect = self._motor_overheat_protection
self.minitaur = (minitaur.Minitaur(
pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack,
kd_for_pd_controllers=self._kd_for_pd_controllers))
self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack,
kd_for_pd_controllers=self._kd_for_pd_controllers))
else:
self.minitaur.Reset(reload_urdf=False)
self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
self._pybullet_client.resetBasePositionAndOrientation(self._duckId, duckStartPos,
duckStartOrn)
if self._env_randomizer is not None:
self._env_randomizer.randomize_env(self)
self._env_step_counter = 0
self._last_base_position = [0, 0, 0]
self._objectives = []
self._pybullet_client.resetDebugVisualizerCamera(
self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
self._cam_pitch, [0, 0, 0])
if not self._torque_control_enabled:
for _ in range(100):
if self._pd_control_enabled or self._accurate_motor_model_enabled:
@@ -228,8 +224,7 @@ class MinitaurBulletDuckEnv(gym.Env):
for i, action_component in enumerate(action):
if not (-self._action_bound - ACTION_EPS <= action_component <=
self._action_bound + ACTION_EPS):
raise ValueError(
"{}th action {} out of bounds.".format(i, action_component))
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
action = self.minitaur.ConvertFromLegModel(action)
return action
@@ -258,8 +253,8 @@ class MinitaurBulletDuckEnv(gym.Env):
if time_to_sleep > 0:
time.sleep(time_to_sleep)
base_pos = self.minitaur.GetBasePosition()
self._pybullet_client.resetDebugVisualizerCamera(
self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
self._cam_pitch, base_pos)
action = self._transform_action_to_motor_command(action)
for _ in range(self._action_repeat):
self.minitaur.ApplyAction(action)
@@ -281,12 +276,17 @@ class MinitaurBulletDuckEnv(gym.Env):
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._pybullet_client.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) /
RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _,
_) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = rgb_array[:, :, :3]
return rgb_array
@@ -297,9 +297,8 @@ class MinitaurBulletDuckEnv(gym.Env):
Returns:
A numpy array of motor angles.
"""
return np.array(
self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_velocities(self):
"""Get the minitaur's motor velocities.
@@ -308,8 +307,8 @@ class MinitaurBulletDuckEnv(gym.Env):
A numpy array of motor velocities.
"""
return np.array(
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_torques(self):
"""Get the minitaur's motor torques.
@@ -318,8 +317,8 @@ class MinitaurBulletDuckEnv(gym.Env):
A numpy array of motor torques.
"""
return np.array(
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_base_orientation(self):
"""Get the minitaur's base orientation, represented by a quaternion.
@@ -330,8 +329,8 @@ class MinitaurBulletDuckEnv(gym.Env):
return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
def lost_duck(self):
points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
return len(points)>0
points = self._pybullet_client.getContactPoints(self._duckId, self._groundId)
return len(points) > 0
def is_fallen(self):
"""Decide whether the minitaur has fallen.
@@ -347,8 +346,7 @@ class MinitaurBulletDuckEnv(gym.Env):
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
local_up = rot_mat[6:]
pos = self.minitaur.GetBasePosition()
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
pos[2] < 0.13)
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)
def _termination(self):
position = self.minitaur.GetBasePosition()
@@ -364,12 +362,9 @@ class MinitaurBulletDuckEnv(gym.Env):
energy_reward = np.abs(
np.dot(self.minitaur.GetMotorTorques(),
self.minitaur.GetMotorVelocities())) * self._time_step
reward = (
self._distance_weight * forward_reward -
self._energy_weight * energy_reward + self._drift_weight * drift_reward
+ self._shake_weight * shake_reward)
self._objectives.append(
[forward_reward, energy_reward, drift_reward, shake_reward])
reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
self._drift_weight * drift_reward + self._shake_weight * shake_reward)
self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
return reward
def get_objectives(self):
@@ -383,9 +378,9 @@ class MinitaurBulletDuckEnv(gym.Env):
self._get_observation()
observation = np.array(self._observation)
if self._observation_noise_stdev > 0:
observation += (np.random.normal(
scale=self._observation_noise_stdev, size=observation.shape) *
self.minitaur.GetObservationUpperBound())
observation += (
np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
self.minitaur.GetObservationUpperBound())
return observation
if parse_version(gym.__version__) < parse_version('0.9.6'):

View File

@@ -45,24 +45,20 @@ class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
minitaur.SetBaseMass(randomized_base_mass)
leg_masses = minitaur.GetLegMassesFromURDF()
leg_masses_lower_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[1])
leg_masses_lower_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[1])
randomized_leg_masses = [
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
for i in range(len(leg_masses))
]
minitaur.SetLegMasses(randomized_leg_masses)
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
BATTERY_VOLTAGE_RANGE[1])
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0], BATTERY_VOLTAGE_RANGE[1])
minitaur.SetBatteryVoltage(randomized_battery_voltage)
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
MOTOR_VISCOUS_DAMPING_RANGE[1])
minitaur.SetMotorViscousDamping(randomized_motor_damping)
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
MINITAUR_LEG_FRICTION[1])
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0], MINITAUR_LEG_FRICTION[1])
minitaur.SetFootFriction(randomized_foot_friction)

View File

@@ -5,8 +5,7 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import time
@@ -33,6 +32,7 @@ OBSERVATION_EPS = 0.01
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
class MinitaurBulletEnv(gym.Env):
"""The gym environment for the minitaur.
@@ -43,34 +43,32 @@ class MinitaurBulletEnv(gym.Env):
expenditure.
"""
metadata = {
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 50
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
def __init__(self,
urdf_root=pybullet_data.getDataPath(),
action_repeat=1,
distance_weight=1.0,
energy_weight=0.005,
shake_weight=0.0,
drift_weight=0.0,
distance_limit=float("inf"),
observation_noise_stdev=0.0,
self_collision_enabled=True,
motor_velocity_limit=np.inf,
pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
leg_model_enabled=True,
accurate_motor_model_enabled=True,
motor_kp=1.0,
motor_kd=0.02,
torque_control_enabled=False,
motor_overheat_protection=True,
hard_reset=True,
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
def __init__(
self,
urdf_root=pybullet_data.getDataPath(),
action_repeat=1,
distance_weight=1.0,
energy_weight=0.005,
shake_weight=0.0,
drift_weight=0.0,
distance_limit=float("inf"),
observation_noise_stdev=0.0,
self_collision_enabled=True,
motor_velocity_limit=np.inf,
pd_control_enabled=False, #not needed to be true if accurate motor model is enabled (has its own better PD)
leg_model_enabled=True,
accurate_motor_model_enabled=True,
motor_kp=1.0,
motor_kd=0.02,
torque_control_enabled=False,
motor_overheat_protection=True,
hard_reset=True,
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
"""Initialize the minitaur gym environment.
Args:
@@ -147,17 +145,14 @@ class MinitaurBulletEnv(gym.Env):
self._action_repeat *= NUM_SUBSTEPS
if self._is_render:
self._pybullet_client = bullet_client.BulletClient(
connection_mode=pybullet.GUI)
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._pybullet_client = bullet_client.BulletClient()
self.seed()
self.reset()
observation_high = (
self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
observation_low = (
self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
action_dim = 8
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
@@ -178,25 +173,25 @@ class MinitaurBulletEnv(gym.Env):
numSolverIterations=int(self._num_bullet_solver_iterations))
self._pybullet_client.setTimeStep(self._time_step)
plane = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
self._pybullet_client.changeVisualShape(plane,-1,rgbaColor=[1,1,1,0.9])
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0)
self._pybullet_client.changeVisualShape(plane, -1, rgbaColor=[1, 1, 1, 0.9])
self._pybullet_client.configureDebugVisualizer(
self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION, 0)
self._pybullet_client.setGravity(0, 0, -10)
acc_motor = self._accurate_motor_model_enabled
motor_protect = self._motor_overheat_protection
self.minitaur = (minitaur.Minitaur(
pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack,
kd_for_pd_controllers=self._kd_for_pd_controllers))
self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
urdf_root=self._urdf_root,
time_step=self._time_step,
self_collision_enabled=self._self_collision_enabled,
motor_velocity_limit=self._motor_velocity_limit,
pd_control_enabled=self._pd_control_enabled,
accurate_motor_model_enabled=acc_motor,
motor_kp=self._motor_kp,
motor_kd=self._motor_kd,
torque_control_enabled=self._torque_control_enabled,
motor_overheat_protection=motor_protect,
on_rack=self._on_rack,
kd_for_pd_controllers=self._kd_for_pd_controllers))
else:
self.minitaur.Reset(reload_urdf=False)
@@ -206,8 +201,8 @@ class MinitaurBulletEnv(gym.Env):
self._env_step_counter = 0
self._last_base_position = [0, 0, 0]
self._objectives = []
self._pybullet_client.resetDebugVisualizerCamera(
self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
self._cam_pitch, [0, 0, 0])
if not self._torque_control_enabled:
for _ in range(100):
if self._pd_control_enabled or self._accurate_motor_model_enabled:
@@ -224,8 +219,7 @@ class MinitaurBulletEnv(gym.Env):
for i, action_component in enumerate(action):
if not (-self._action_bound - ACTION_EPS <= action_component <=
self._action_bound + ACTION_EPS):
raise ValueError(
"{}th action {} out of bounds.".format(i, action_component))
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
action = self.minitaur.ConvertFromLegModel(action)
return action
@@ -256,14 +250,15 @@ class MinitaurBulletEnv(gym.Env):
base_pos = self.minitaur.GetBasePosition()
camInfo = self._pybullet_client.getDebugVisualizerCamera()
curTargetPos = camInfo[11]
distance=camInfo[10]
distance = camInfo[10]
yaw = camInfo[8]
pitch=camInfo[9]
targetPos = [0.95*curTargetPos[0]+0.05*base_pos[0],0.95*curTargetPos[1]+0.05*base_pos[1],curTargetPos[2]]
pitch = camInfo[9]
targetPos = [
0.95 * curTargetPos[0] + 0.05 * base_pos[0], 0.95 * curTargetPos[1] + 0.05 * base_pos[1],
curTargetPos[2]
]
self._pybullet_client.resetDebugVisualizerCamera(
distance, yaw, pitch, base_pos)
self._pybullet_client.resetDebugVisualizerCamera(distance, yaw, pitch, base_pos)
action = self._transform_action_to_motor_command(action)
for _ in range(self._action_repeat):
self.minitaur.ApplyAction(action)
@@ -285,12 +280,17 @@ class MinitaurBulletEnv(gym.Env):
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._pybullet_client.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) /
RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _,
_) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = rgb_array[:, :, :3]
return rgb_array
@@ -301,9 +301,8 @@ class MinitaurBulletEnv(gym.Env):
Returns:
A numpy array of motor angles.
"""
return np.array(
self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_velocities(self):
"""Get the minitaur's motor velocities.
@@ -312,8 +311,8 @@ class MinitaurBulletEnv(gym.Env):
A numpy array of motor velocities.
"""
return np.array(
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_motor_torques(self):
"""Get the minitaur's motor torques.
@@ -322,8 +321,8 @@ class MinitaurBulletEnv(gym.Env):
A numpy array of motor torques.
"""
return np.array(
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
NUM_MOTORS])
def get_minitaur_base_orientation(self):
"""Get the minitaur's base orientation, represented by a quaternion.
@@ -347,8 +346,7 @@ class MinitaurBulletEnv(gym.Env):
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
local_up = rot_mat[6:]
pos = self.minitaur.GetBasePosition()
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
pos[2] < 0.13)
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)
def _termination(self):
position = self.minitaur.GetBasePosition()
@@ -364,12 +362,9 @@ class MinitaurBulletEnv(gym.Env):
energy_reward = np.abs(
np.dot(self.minitaur.GetMotorTorques(),
self.minitaur.GetMotorVelocities())) * self._time_step
reward = (
self._distance_weight * forward_reward -
self._energy_weight * energy_reward + self._drift_weight * drift_reward
+ self._shake_weight * shake_reward)
self._objectives.append(
[forward_reward, energy_reward, drift_reward, shake_reward])
reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
self._drift_weight * drift_reward + self._shake_weight * shake_reward)
self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
return reward
def get_objectives(self):
@@ -383,9 +378,9 @@ class MinitaurBulletEnv(gym.Env):
self._get_observation()
observation = np.array(self._observation)
if self._observation_noise_stdev > 0:
observation += (np.random.normal(
scale=self._observation_noise_stdev, size=observation.shape) *
self.minitaur.GetObservationUpperBound())
observation += (
np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
self.minitaur.GetObservationUpperBound())
return observation
if parse_version(gym.__version__) < parse_version('0.9.6'):

View File

@@ -7,8 +7,7 @@ MOTOR_VOLTAGE = 16.0
MOTOR_RESISTANCE = 0.186
MOTOR_TORQUE_CONSTANT = 0.0954
MOTOR_VISCOUS_DAMPING = 0
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING
+ MOTOR_TORQUE_CONSTANT)
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING + MOTOR_TORQUE_CONSTANT)
class MotorModel(object):
@@ -24,10 +23,7 @@ class MotorModel(object):
pd gains, viscous friction, back-EMF voltage and current-torque profile.
"""
def __init__(self,
torque_control_enabled=False,
kp=1.2,
kd=0):
def __init__(self, torque_control_enabled=False, kp=1.2, kd=0):
self._torque_control_enabled = torque_control_enabled
self._kp = kp
self._kd = kd
@@ -50,8 +46,7 @@ class MotorModel(object):
def get_viscous_dampling(self):
return self._viscous_damping
def convert_to_torque(self, motor_commands, current_motor_angle,
current_motor_velocity):
def convert_to_torque(self, motor_commands, current_motor_angle, current_motor_velocity):
"""Convert the commands (position control or torque control) to torque.
Args:
@@ -66,8 +61,8 @@ class MotorModel(object):
if self._torque_control_enabled:
pwm = motor_commands
else:
pwm = (-self._kp * (current_motor_angle - motor_commands)
- self._kd * current_motor_velocity)
pwm = (-self._kp * (current_motor_angle - motor_commands) -
self._kd * current_motor_velocity)
pwm = np.clip(pwm, -1.0, 1.0)
return self._convert_to_torque_from_pwm(pwm, current_motor_velocity)
@@ -81,21 +76,19 @@ class MotorModel(object):
actual_torque: The torque that needs to be applied to the motor.
observed_torque: The torque observed by the sensor.
"""
observed_torque = np.clip(
self._torque_constant * (pwm * self._voltage / self._resistance),
-OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
observed_torque = np.clip(self._torque_constant * (pwm * self._voltage / self._resistance),
-OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
# Net voltage is clipped at 50V by diodes on the motor controller.
voltage_net = np.clip(pwm * self._voltage -
(self._torque_constant + self._viscous_damping)
* current_motor_velocity,
-VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
voltage_net = np.clip(
pwm * self._voltage -
(self._torque_constant + self._viscous_damping) * current_motor_velocity,
-VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
current = voltage_net / self._resistance
current_sign = np.sign(current)
current_magnitude = np.absolute(current)
# Saturate torque based on empirical current relation.
actual_torque = np.interp(current_magnitude, self._current_table,
self._torque_table)
actual_torque = np.interp(current_magnitude, self._current_table, self._torque_table)
actual_torque = np.multiply(current_sign, actual_torque)
return actual_torque, observed_torque

View File

@@ -4,80 +4,150 @@ import math
import numpy as np
class Racecar:
def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
self.urdfRootPath = urdfRootPath
self.timeStep = timeStep
self._p = bullet_client
self.reset()
def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
self.urdfRootPath = urdfRootPath
self.timeStep = timeStep
self._p = bullet_client
self.reset()
def reset(self):
car = self._p.loadURDF(os.path.join(self.urdfRootPath,"racecar/racecar_differential.urdf"), [0,0,.2],useFixedBase=False)
self.racecarUniqueId = car
#for i in range (self._p.getNumJoints(car)):
# print (self._p.getJointInfo(car,i))
for wheel in range(self._p.getNumJoints(car)):
self._p.setJointMotorControl2(car,wheel,self._p.VELOCITY_CONTROL,targetVelocity=0,force=0)
self._p.getJointInfo(car,wheel)
def reset(self):
car = self._p.loadURDF(os.path.join(self.urdfRootPath, "racecar/racecar_differential.urdf"),
[0, 0, .2],
useFixedBase=False)
self.racecarUniqueId = car
#for i in range (self._p.getNumJoints(car)):
# print (self._p.getJointInfo(car,i))
for wheel in range(self._p.getNumJoints(car)):
self._p.setJointMotorControl2(car,
wheel,
self._p.VELOCITY_CONTROL,
targetVelocity=0,
force=0)
self._p.getJointInfo(car, wheel)
#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
c = self._p.createConstraint(car,9,car,11,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
c = self._p.createConstraint(car,
9,
car,
11,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=1, maxForce=10000)
c = self._p.createConstraint(car,10,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,
10,
car,
13,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,9,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,
9,
car,
13,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,16,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
c = self._p.createConstraint(car,
16,
car,
18,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=1, maxForce=10000)
c = self._p.createConstraint(car,16,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,
16,
car,
19,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,17,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,
17,
car,
19,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
c = self._p.createConstraint(car,1,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15, maxForce=10000)
c = self._p.createConstraint(car,3,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15,maxForce=10000)
c = self._p.createConstraint(car,
1,
car,
18,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)
c = self._p.createConstraint(car,
3,
car,
19,
jointType=self._p.JOINT_GEAR,
jointAxis=[0, 1, 0],
parentFramePosition=[0, 0, 0],
childFramePosition=[0, 0, 0])
self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)
self.steeringLinks = [0,2]
self.maxForce = 20
self.nMotors = 2
self.motorizedwheels=[8,15]
self.speedMultiplier = 20.
self.steeringMultiplier = 0.5
self.steeringLinks = [0, 2]
self.maxForce = 20
self.nMotors = 2
self.motorizedwheels = [8, 15]
self.speedMultiplier = 20.
self.steeringMultiplier = 0.5
def getActionDimension(self):
return self.nMotors
def getActionDimension(self):
return self.nMotors
def getObservationDimension(self):
return len(self.getObservation())
def getObservationDimension(self):
return len(self.getObservation())
def getObservation(self):
observation = []
pos,orn=self._p.getBasePositionAndOrientation(self.racecarUniqueId)
def getObservation(self):
observation = []
pos, orn = self._p.getBasePositionAndOrientation(self.racecarUniqueId)
observation.extend(list(pos))
observation.extend(list(orn))
observation.extend(list(pos))
observation.extend(list(orn))
return observation
return observation
def applyAction(self, motorCommands):
targetVelocity=motorCommands[0]*self.speedMultiplier
#print("targetVelocity")
#print(targetVelocity)
steeringAngle = motorCommands[1]*self.steeringMultiplier
#print("steeringAngle")
#print(steeringAngle)
#print("maxForce")
#print(self.maxForce)
def applyAction(self, motorCommands):
targetVelocity = motorCommands[0] * self.speedMultiplier
#print("targetVelocity")
#print(targetVelocity)
steeringAngle = motorCommands[1] * self.steeringMultiplier
#print("steeringAngle")
#print(steeringAngle)
#print("maxForce")
#print(self.maxForce)
for motor in self.motorizedwheels:
self._p.setJointMotorControl2(self.racecarUniqueId,motor,self._p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)
for steer in self.steeringLinks:
self._p.setJointMotorControl2(self.racecarUniqueId,steer,self._p.POSITION_CONTROL,targetPosition=steeringAngle)
for motor in self.motorizedwheels:
self._p.setJointMotorControl2(self.racecarUniqueId,
motor,
self._p.VELOCITY_CONTROL,
targetVelocity=targetVelocity,
force=self.maxForce)
for steer in self.steeringLinks:
self._p.setJointMotorControl2(self.racecarUniqueId,
steer,
self._p.POSITION_CONTROL,
targetPosition=steeringAngle)

View File

@@ -1,7 +1,7 @@
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import gym
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
class RacecarGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
def __init__(self,
urdfRoot=pybullet_data.getDataPath(),
@@ -42,25 +40,24 @@ class RacecarGymEnv(gym.Env):
self._renders = renders
self._isDiscrete = isDiscrete
if self._renders:
self._p = bullet_client.BulletClient(
connection_mode=pybullet.GUI)
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._p = bullet_client.BulletClient()
self.seed()
#self.reset()
observationDim = 2 #len(self.getExtendedObservation())
observationDim = 2 #len(self.getExtendedObservation())
#print("observationDim")
#print(observationDim)
# observation_high = np.array([np.finfo(np.float32).max] * observationDim)
observation_high = np.ones(observationDim) * 1000 #np.inf
observation_high = np.ones(observationDim) * 1000 #np.inf
if (isDiscrete):
self.action_space = spaces.Discrete(9)
else:
action_dim = 2
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
action_dim = 2
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
self.observation_space = spaces.Box(-observation_high, observation_high, dtype=np.float32)
self.viewer = None
@@ -69,23 +66,24 @@ class RacecarGymEnv(gym.Env):
#p.setPhysicsEngineParameter(numSolverIterations=300)
self._p.setTimeStep(self._timeStep)
#self._p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"))
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
#move the stadium objects slightly above 0
#for i in stadiumobjects:
# pos,orn = self._p.getBasePositionAndOrientation(i)
# newpos = [pos[0],pos[1],pos[2]-0.1]
# self._p.resetBasePositionAndOrientation(i,newpos,orn)
dist = 5 +2.*random.random()
ang = 2.*3.1415925438*random.random()
dist = 5 + 2. * random.random()
ang = 2. * 3.1415925438 * random.random()
ballx = dist * math.sin(ang)
bally = dist * math.cos(ang)
ballz = 1
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2.urdf"),[ballx,bally,ballz])
self._p.setGravity(0,0,-10)
self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2.urdf"),
[ballx, bally, ballz])
self._p.setGravity(0, 0, -10)
self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
for i in range(100):
self._p.stepSimulation()
@@ -100,26 +98,26 @@ class RacecarGymEnv(gym.Env):
return [seed]
def getExtendedObservation(self):
self._observation = [] #self._racecar.getObservation()
carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
self._observation = [] #self._racecar.getObservation()
carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)
self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
return self._observation
self._observation.extend([ballPosInCar[0], ballPosInCar[1]])
return self._observation
def step(self, action):
if (self._renders):
basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
#self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
if (self._isDiscrete):
fwd = [-1,-1,-1,0,0,0,1,1,1]
steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
forward = fwd[action]
steer = steerings[action]
realaction = [forward,steer]
fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
forward = fwd[action]
steer = steerings[action]
realaction = [forward, steer]
else:
realaction = action
@@ -142,35 +140,37 @@ class RacecarGymEnv(gym.Env):
def render(self, mode='human', close=False):
if mode != "rgb_array":
return np.array([])
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = rgb_array[:, :, :3]
return rgb_array
def _termination(self):
return self._envStepCounter>1000
return self._envStepCounter > 1000
def _reward(self):
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
10000)
numPt = len(closestPoints)
reward=-1000
reward = -1000
#print(numPt)
if (numPt>0):
if (numPt > 0):
#print("reward:")
reward = -closestPoints[0][8]
#print(reward)

View File

@@ -1,7 +1,7 @@
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import math
import gym
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
RENDER_HEIGHT = 720
RENDER_WIDTH = 960
class RacecarZEDGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
def __init__(self,
urdfRoot=pybullet_data.getDataPath(),
@@ -44,8 +42,7 @@ class RacecarZEDGymEnv(gym.Env):
self._isDiscrete = isDiscrete
if self._renders:
self._p = bullet_client.BulletClient(
connection_mode=pybullet.GUI)
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._p = bullet_client.BulletClient()
@@ -59,11 +56,14 @@ class RacecarZEDGymEnv(gym.Env):
if (isDiscrete):
self.action_space = spaces.Discrete(9)
else:
action_dim = 2
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
action_dim = 2
self._action_bound = 1
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
self.observation_space = spaces.Box(low=0,
high=255,
shape=(self._height, self._width, 4),
dtype=np.uint8)
self.viewer = None
@@ -72,23 +72,24 @@ class RacecarZEDGymEnv(gym.Env):
#p.setPhysicsEngineParameter(numSolverIterations=300)
self._p.setTimeStep(self._timeStep)
#self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
#move the stadium objects slightly above 0
for i in stadiumobjects:
pos,orn = self._p.getBasePositionAndOrientation(i)
newpos = [pos[0],pos[1],pos[2]+0.1]
self._p.resetBasePositionAndOrientation(i,newpos,orn)
pos, orn = self._p.getBasePositionAndOrientation(i)
newpos = [pos[0], pos[1], pos[2] + 0.1]
self._p.resetBasePositionAndOrientation(i, newpos, orn)
dist = 5 +2.*random.random()
ang = 2.*3.1415925438*random.random()
dist = 5 + 2. * random.random()
ang = 2. * 3.1415925438 * random.random()
ballx = dist * math.sin(ang)
bally = dist * math.cos(ang)
ballz = 1
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2red.urdf"),[ballx,bally,ballz])
self._p.setGravity(0,0,-10)
self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2red.urdf"),
[ballx, bally, ballz])
self._p.setGravity(0, 0, -10)
self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
for i in range(100):
self._p.stepSimulation()
@@ -103,38 +104,50 @@ class RacecarZEDGymEnv(gym.Env):
return [seed]
def getExtendedObservation(self):
carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
carmat = self._p.getMatrixFromQuaternion(carorn)
ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
dist0 = 0.3
dist1 = 1.
eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3]
targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3]
up = [carmat[2],carmat[5],carmat[8]]
viewMat = self._p.computeViewMatrix(eyePos,targetPos,up)
#viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
#print("projectionMatrix:")
#print(self._p.getDebugVisualizerCamera()[3])
projMatrix = [0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
img_arr = self._p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
rgb=img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
self._observation = np_img_arr
return self._observation
carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
carmat = self._p.getMatrixFromQuaternion(carorn)
ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)
dist0 = 0.3
dist1 = 1.
eyePos = [
carpos[0] + dist0 * carmat[0], carpos[1] + dist0 * carmat[3],
carpos[2] + dist0 * carmat[6] + 0.3
]
targetPos = [
carpos[0] + dist1 * carmat[0], carpos[1] + dist1 * carmat[3],
carpos[2] + dist1 * carmat[6] + 0.3
]
up = [carmat[2], carmat[5], carmat[8]]
viewMat = self._p.computeViewMatrix(eyePos, targetPos, up)
#viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
#print("projectionMatrix:")
#print(self._p.getDebugVisualizerCamera()[3])
projMatrix = [
0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0,
0.0, 0.0, -0.02000020071864128, 0.0
]
img_arr = self._p.getCameraImage(width=self._width,
height=self._height,
viewMatrix=viewMat,
projectionMatrix=projMatrix)
rgb = img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
self._observation = np_img_arr
return self._observation
def step(self, action):
if (self._renders):
basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
#self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
if (self._isDiscrete):
fwd = [-1,-1,-1,0,0,0,1,1,1]
steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
forward = fwd[action]
steer = steerings[action]
realaction = [forward,steer]
fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
forward = fwd[action]
steer = steerings[action]
realaction = [forward, steer]
else:
realaction = action
@@ -157,35 +170,37 @@ class RacecarZEDGymEnv(gym.Env):
def render(self, mode='human', close=False):
if mode != "rgb_array":
return np.array([])
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
height=RENDER_HEIGHT,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = rgb_array[:, :, :3]
return rgb_array
def _termination(self):
return self._envStepCounter>1000
return self._envStepCounter > 1000
def _reward(self):
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
10000)
numPt = len(closestPoints)
reward=-1000
reward = -1000
#print(numPt)
if (numPt>0):
if (numPt > 0):
#print("reward:")
reward = -closestPoints[0][8]
#print(reward)

View File

@@ -4,11 +4,10 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
print("parentdir=",parentdir)
os.sys.path.insert(0, parentdir)
print("parentdir=", parentdir)
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
from pybullet_utils.logger import Logger
from pybullet_envs.deep_mimic.testrl import update_world, update_timestep, build_world
@@ -17,36 +16,40 @@ import pybullet_utils.mpi_util as MPIUtil
args = []
world = None
def run():
global update_timestep
global world
global update_timestep
global world
done = False
while not (done):
update_world(world, update_timestep)
done = False
while not (done):
update_world(world, update_timestep)
return
return
def shutdown():
global world
global world
Logger.print2('Shutting down...')
world.shutdown()
return
Logger.print2('Shutting down...')
world.shutdown()
return
def main():
global args
global world
global args
global world
# Command line arguments
args = sys.argv[1:]
enable_draw = False
world = build_world(args, enable_draw)
# Command line arguments
args = sys.argv[1:]
enable_draw = False
world = build_world(args, enable_draw)
run()
shutdown()
run()
shutdown()
return
return
if __name__ == '__main__':
main()
main()

View File

@@ -1,6 +1,7 @@
from enum import Enum
class ActionSpace(Enum):
Null = 0
Continuous = 1
Discrete = 2
Null = 0
Continuous = 1
Discrete = 2

View File

@@ -1,19 +1,21 @@
from abc import abstractmethod
from abc import abstractmethod
import sys, abc
if sys.version_info >= (3, 4):
ABC = abc.ABC
ABC = abc.ABC
else:
ABC = abc.ABCMeta('ABC', (), {})
ABC = abc.ABCMeta('ABC', (), {})
import numpy as np
from enum import Enum
class Env(ABC):
class Terminate(Enum):
Null = 0
Fail = 1
Succ = 2
def __init__(self, args, enable_draw):
self.enable_draw = enable_draw
return
class Terminate(Enum):
Null = 0
Fail = 1
Succ = 2
def __init__(self, args, enable_draw):
self.enable_draw = enable_draw
return

View File

@@ -1,36 +1,61 @@
from pybullet_utils import bullet_client
import math
class HumanoidPoseInterpolator(object):
def __init__(self):
pass
def Reset(self,basePos=[0,0,0], baseOrn=[0,0,0,1],chestRot=[0,0,0,1], neckRot=[0,0,0,1],rightHipRot= [0,0,0,1], rightKneeRot=[0],rightAnkleRot = [0,0,0,1],
rightShoulderRot = [0,0,0,1],rightElbowRot = [0], leftHipRot = [0,0,0,1], leftKneeRot = [0],leftAnkleRot = [0,0,0,1],
leftShoulderRot = [0,0,0,1] ,leftElbowRot = [0],
baseLinVel = [0,0,0],baseAngVel = [0,0,0], chestVel = [0,0,0],neckVel = [0,0,0],rightHipVel = [0,0,0],rightKneeVel = [0],
rightAnkleVel = [0,0,0],rightShoulderVel = [0,0,0],rightElbowVel = [0],leftHipVel = [0,0,0],leftKneeVel = [0],leftAnkleVel = [0,0,0],leftShoulderVel = [0,0,0],leftElbowVel = [0]
):
def Reset(self,
basePos=[0, 0, 0],
baseOrn=[0, 0, 0, 1],
chestRot=[0, 0, 0, 1],
neckRot=[0, 0, 0, 1],
rightHipRot=[0, 0, 0, 1],
rightKneeRot=[0],
rightAnkleRot=[0, 0, 0, 1],
rightShoulderRot=[0, 0, 0, 1],
rightElbowRot=[0],
leftHipRot=[0, 0, 0, 1],
leftKneeRot=[0],
leftAnkleRot=[0, 0, 0, 1],
leftShoulderRot=[0, 0, 0, 1],
leftElbowRot=[0],
baseLinVel=[0, 0, 0],
baseAngVel=[0, 0, 0],
chestVel=[0, 0, 0],
neckVel=[0, 0, 0],
rightHipVel=[0, 0, 0],
rightKneeVel=[0],
rightAnkleVel=[0, 0, 0],
rightShoulderVel=[0, 0, 0],
rightElbowVel=[0],
leftHipVel=[0, 0, 0],
leftKneeVel=[0],
leftAnkleVel=[0, 0, 0],
leftShoulderVel=[0, 0, 0],
leftElbowVel=[0]):
self._basePos = basePos
self._baseLinVel = baseLinVel
#print("HumanoidPoseInterpolator.Reset: baseLinVel = ", baseLinVel)
self._baseOrn =baseOrn
self._baseOrn = baseOrn
self._baseAngVel = baseAngVel
self._chestRot = chestRot
self._chestVel =chestVel
self._chestVel = chestVel
self._neckRot = neckRot
self._neckVel = neckVel
self._rightHipRot = rightHipRot
self._rightHipVel = rightHipVel
self._rightKneeRot =rightKneeRot
self._rightKneeRot = rightKneeRot
self._rightKneeVel = rightKneeVel
self._rightAnkleRot = rightAnkleRot
self._rightAnkleVel = rightAnkleVel
self._rightShoulderRot =rightShoulderRot
self._rightShoulderRot = rightShoulderRot
self._rightShoulderVel = rightShoulderVel
self._rightElbowRot = rightElbowRot
self._rightElbowVel = rightElbowVel
@@ -39,225 +64,253 @@ class HumanoidPoseInterpolator(object):
self._leftHipVel = leftHipVel
self._leftKneeRot = leftKneeRot
self._leftKneeVel = leftKneeVel
self._leftAnkleRot =leftAnkleRot
self._leftAnkleRot = leftAnkleRot
self._leftAnkleVel = leftAnkleVel
self._leftShoulderRot = leftShoulderRot
self._leftShoulderVel = leftShoulderVel
self._leftElbowRot =leftElbowRot
self._leftElbowRot = leftElbowRot
self._leftElbowVel = leftElbowVel
def ComputeLinVel(self,posStart, posEnd, deltaTime):
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
def ComputeLinVel(self, posStart, posEnd, deltaTime):
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
(posEnd[2] - posStart[2]) / deltaTime]
return vel
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
(axis[2] * angle) / deltaTime]
return angVel
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
ornEnd)
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
(axis[2] * angle) / deltaTime]
return angVel
def NormalizeVector(self, vec):
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
if (length2>0):
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2]
if (length2 > 0):
length = math.sqrt(length2)
def NormalizeQuaternion(self, orn):
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
if (length2>0):
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2] + orn[3] * orn[3]
if (length2 > 0):
length = math.sqrt(length2)
orn[0]/=length
orn[1]/=length
orn[2]/=length
orn[3]/=length
orn[0] /= length
orn[1] /= length
orn[2] /= length
orn[3] /= length
return orn
#print("Normalize? length=",length)
def PostProcessMotionData(self, frameData):
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
def GetPose(self):
pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
self._rightKneeRot[0],
self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
self._rightElbowRot[0],
self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
self._leftKneeRot[0],
self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
self._leftElbowRot[0] ]
return pose
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
def GetPose(self):
pose = [
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
self._baseOrn[2], self._baseOrn[3], self._chestRot[0], self._chestRot[1],
self._chestRot[2], self._chestRot[3], self._neckRot[0], self._neckRot[1], self._neckRot[2],
self._neckRot[3], self._rightHipRot[0], self._rightHipRot[1], self._rightHipRot[2],
self._rightHipRot[3], self._rightKneeRot[0], self._rightAnkleRot[0],
self._rightAnkleRot[1], self._rightAnkleRot[2], self._rightAnkleRot[3],
self._rightShoulderRot[0], self._rightShoulderRot[1], self._rightShoulderRot[2],
self._rightShoulderRot[3], self._rightElbowRot[0], self._leftHipRot[0],
self._leftHipRot[1], self._leftHipRot[2], self._leftHipRot[3], self._leftKneeRot[0],
self._leftAnkleRot[0], self._leftAnkleRot[1], self._leftAnkleRot[2], self._leftAnkleRot[3],
self._leftShoulderRot[0], self._leftShoulderRot[1], self._leftShoulderRot[2],
self._leftShoulderRot[3], self._leftElbowRot[0]
]
return pose
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
keyFrameDuration = frameData[0]
basePos1Start = [frameData[1],frameData[2],frameData[3]]
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
basePos1Start = [frameData[1], frameData[2], frameData[3]]
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
self._basePos = [
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
]
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
bullet_client)
##pre-rotate to make z-up
#y2zPos=[0,0,0.0]
#y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
#basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
self._chestVel = self.ComputeAngVelRel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
self._neckVel = self.ComputeAngVelRel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
chestRotEnd = [frameDataNext[9], frameDataNext[10], frameDataNext[11], frameDataNext[8]]
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart, chestRotEnd, frameFraction)
self._chestVel = self.ComputeAngVelRel(chestRotStart, chestRotEnd, keyFrameDuration,
bullet_client)
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
neckRotEnd = [frameDataNext[13], frameDataNext[14], frameDataNext[15], frameDataNext[12]]
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart, neckRotEnd, frameFraction)
self._neckVel = self.ComputeAngVelRel(neckRotStart, neckRotEnd, keyFrameDuration,
bullet_client)
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
rightHipRotEnd = [frameDataNext[17], frameDataNext[18], frameDataNext[19], frameDataNext[16]]
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart, rightHipRotEnd,
frameFraction)
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart, rightHipRotEnd, keyFrameDuration,
bullet_client)
rightKneeRotStart = [frameData[20]]
rightKneeRotEnd = [frameDataNext[20]]
self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
self._rightKneeRot = [
rightKneeRotStart[0] + frameFraction * (rightKneeRotEnd[0] - rightKneeRotStart[0])
]
self._rightKneeVel = [(rightKneeRotEnd[0] - rightKneeRotStart[0]) / keyFrameDuration]
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
rightAnkleRotEnd = [frameDataNext[22], frameDataNext[23], frameDataNext[24], frameDataNext[21]]
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart, rightAnkleRotEnd,
frameFraction)
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart, rightAnkleRotEnd,
keyFrameDuration, bullet_client)
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
rightShoulderRotEnd = [
frameDataNext[26], frameDataNext[27], frameDataNext[28], frameDataNext[25]
]
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,
rightShoulderRotEnd, frameFraction)
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart, rightShoulderRotEnd,
keyFrameDuration, bullet_client)
rightElbowRotStart = [frameData[29]]
rightElbowRotEnd = [frameDataNext[29]]
self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
self._rightElbowRot = [
rightElbowRotStart[0] + frameFraction * (rightElbowRotEnd[0] - rightElbowRotStart[0])
]
self._rightElbowVel = [(rightElbowRotEnd[0] - rightElbowRotStart[0]) / keyFrameDuration]
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
leftHipRotEnd = [frameDataNext[31], frameDataNext[32], frameDataNext[33], frameDataNext[30]]
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart, leftHipRotEnd,
frameFraction)
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd, keyFrameDuration,
bullet_client)
leftKneeRotStart = [frameData[34]]
leftKneeRotEnd = [frameDataNext[34]]
self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
self._leftKneeRot = [
leftKneeRotStart[0] + frameFraction * (leftKneeRotEnd[0] - leftKneeRotStart[0])
]
self._leftKneeVel = [(leftKneeRotEnd[0] - leftKneeRotStart[0]) / keyFrameDuration]
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
leftAnkleRotEnd = [frameDataNext[36], frameDataNext[37], frameDataNext[38], frameDataNext[35]]
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart, leftAnkleRotEnd,
frameFraction)
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart, leftAnkleRotEnd,
keyFrameDuration, bullet_client)
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
leftShoulderRotEnd = [
frameDataNext[40], frameDataNext[41], frameDataNext[42], frameDataNext[39]
]
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,
leftShoulderRotEnd, frameFraction)
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart, leftShoulderRotEnd,
keyFrameDuration, bullet_client)
leftElbowRotStart = [frameData[43]]
leftElbowRotEnd = [frameDataNext[43]]
self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
self._leftElbowRot = [
leftElbowRotStart[0] + frameFraction * (leftElbowRotEnd[0] - leftElbowRotStart[0])
]
self._leftElbowVel = [(leftElbowRotEnd[0] - leftElbowRotStart[0]) / keyFrameDuration]
pose = self.GetPose()
return pose
def ConvertFromAction(self, pybullet_client, action):
#turn action into pose
self.Reset()#?? needed?
index=0
self.Reset() #?? needed?
index = 0
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
#print("pose._chestRot=",pose._chestRot)
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
index+=1
index += 1
self._rightKneeRot = [angle]
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
index+=1
index += 1
self._rightElbowRot = [angle]
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
index+=1
index += 1
self._leftKneeRot = [angle]
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
axis = [action[index+1],action[index+2],action[index+3]]
index+=4
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
axis = [action[index + 1], action[index + 2], action[index + 3]]
index += 4
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
angle = action[index]
index+=1
index += 1
self._leftElbowRot = [angle]
pose = self.GetPose()
return pose

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,9 @@
import json
import math
class MotionCaptureData(object):
def __init__(self):
self.Reset()
@@ -13,30 +15,33 @@ class MotionCaptureData(object):
self._motion_data = json.load(f)
def NumFrames(self):
return len(self._motion_data['Frames'])
return len(self._motion_data['Frames'])
def KeyFrameDuraction(self):
return self._motion_data['Frames'][0][0]
return self._motion_data['Frames'][0][0]
def getCycleTime(self):
keyFrameDuration = self.KeyFrameDuraction()
cycleTime = keyFrameDuration*(self.NumFrames()-1)
cycleTime = keyFrameDuration * (self.NumFrames() - 1)
return cycleTime
def calcCycleCount(self, simTime, cycleTime):
phases = simTime / cycleTime;
phases = simTime / cycleTime
count = math.floor(phases)
loop = True
#count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
return count
def computeCycleOffset(self):
firstFrame=0
lastFrame = self.NumFrames()-1
firstFrame = 0
lastFrame = self.NumFrames() - 1
frameData = self._motion_data['Frames'][0]
frameDataNext = self._motion_data['Frames'][lastFrame]
basePosStart = [frameData[1],frameData[2],frameData[3]]
basePosEnd = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
self._cycleOffset = [basePosEnd[0]-basePosStart[0],basePosEnd[1]-basePosStart[1],basePosEnd[2]-basePosStart[2]]
return self._cycleOffset
basePosStart = [frameData[1], frameData[2], frameData[3]]
basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
self._cycleOffset = [
basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
basePosEnd[2] - basePosStart[2]
]
return self._cycleOffset

View File

@@ -9,301 +9,313 @@ from pybullet_envs.deep_mimic.env import humanoid_stable_pd
import pybullet_data
import pybullet as p1
import random
class PyBulletDeepMimicEnv(Env):
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
super().__init__(arg_parser, enable_draw)
self._num_agents = 1
self._pybullet_client = pybullet_client
self._isInitialized = False
self._useStablePD = True
self._arg_parser = arg_parser
self.reset()
def reset(self):
if not self._isInitialized:
if self.enable_draw:
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI,0)
else:
self._pybullet_client = bullet_client.BulletClient()
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
#print("planeId=",self._planeId)
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
self._pybullet_client.setGravity(0,-9.8,0)
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
self._mocapData = motion_capture_data.MotionCaptureData()
motion_file = self._arg_parser.parse_strings('motion_file')
print("motion_file=",motion_file[0])
motionPath = pybullet_data.getDataPath()+"/"+motion_file[0]
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
self._mocapData.Load(motionPath)
timeStep = 1./600.
useFixedBase=False
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
self._isInitialized = True
self._pybullet_client.setTimeStep(timeStep)
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
selfCheck = False
if (selfCheck):
curTime = 0
while self._pybullet_client.isConnected():
self._humanoid.setSimTime(curTime)
state = self._humanoid.getState()
#print("state=",state)
pose = self._humanoid.computePose(self._humanoid._frameFraction)
for i in range (10):
curTime+=timeStep
#taus = self._humanoid.computePDForces(pose)
#self._humanoid.applyPDForces(taus)
#self._pybullet_client.stepSimulation()
time.sleep(timeStep)
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
rnrange = 1000
rn = random.randint(0,rnrange)
startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
self.t = startTime
self._humanoid.setSimTime(startTime)
self._humanoid.resetPose()
#this clears the contact points. Todo: add API to explicitly clear all contact points?
#self._pybullet_client.stepSimulation()
self._humanoid.resetPose()
self.needs_update_time = self.t-1#force update
def get_num_agents(self):
return self._num_agents
def get_action_space(self, agent_id):
return ActionSpace(ActionSpace.Continuous)
def get_reward_min(self, agent_id):
return 0
def get_reward_max(self, agent_id):
return 1
def get_reward_fail(self, agent_id):
return self.get_reward_min(agent_id)
def get_reward_succ(self, agent_id):
return self.get_reward_max(agent_id)
#scene_name == "imitate" -> cDrawSceneImitate
def get_state_size(self, agent_id):
#cCtController::GetStateSize()
#int state_size = cDeepMimicCharController::GetStateSize();
# state_size += GetStatePoseSize();#106
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
#state_size += GetStatePhaseSize();#1
#197
return 197
def build_state_norm_groups(self, agent_id):
#if (mEnablePhaseInput)
#{
#int phase_group = gNormGroupNone;
#int phase_offset = GetStatePhaseOffset();
#int phase_size = GetStatePhaseSize();
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
groups = [0]*self.get_state_size(agent_id)
groups[0] = -1
return groups
def build_state_offset(self, agent_id):
out_offset = [0]*self.get_state_size(agent_id)
phase_offset = -0.5
out_offset[0] = phase_offset
return np.array(out_offset)
def build_state_scale(self, agent_id):
out_scale = [1]*self.get_state_size(agent_id)
phase_scale = 2
out_scale[0] = phase_scale
return np.array(out_scale)
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
super().__init__(arg_parser, enable_draw)
self._num_agents = 1
self._pybullet_client = pybullet_client
self._isInitialized = False
self._useStablePD = True
self._arg_parser = arg_parser
self.reset()
def get_goal_size(self, agent_id):
return 0
def reset(self):
def get_action_size(self, agent_id):
ctrl_size = 43 #numDof
root_size = 7
return ctrl_size - root_size
def build_goal_norm_groups(self, agent_id):
return np.array([])
def build_goal_offset(self, agent_id):
return np.array([])
def build_goal_scale(self, agent_id):
return np.array([])
def build_action_offset(self, agent_id):
out_offset = [0] * self.get_action_size(agent_id)
out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
-0.200000000,0.0000000000,0.0000000000, 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000,
0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, -1.5700000, 0.00000000, 0.00000000,
0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000,
0.00000000, -0.2000000, -1.5700000]
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
return np.array(out_offset)
def build_action_scale(self, agent_id):
out_scale = [1] * self.get_action_size(agent_id)
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
0.159235668789]
return np.array(out_scale)
def build_action_bound_min(self, agent_id):
#see cCtCtrlUtil::BuildBoundsPDSpherical
out_scale = [-1] * self.get_action_size(agent_id)
out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
-1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000, -1.000000000,
-1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, -1.000000000,
-12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000,
-6.280000000, -1.000000000, -1.000000000, -1.000000000, -8.460000000,
-1.000000000, -1.000000000, -1.000000000, -4.710000000]
if not self._isInitialized:
if self.enable_draw:
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
else:
self._pybullet_client = bullet_client.BulletClient()
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
z2y,
useMaximalCoordinates=True)
#print("planeId=",self._planeId)
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
self._pybullet_client.setGravity(0, -9.8, 0)
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
self._mocapData = motion_capture_data.MotionCaptureData()
motion_file = self._arg_parser.parse_strings('motion_file')
print("motion_file=", motion_file[0])
motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
self._mocapData.Load(motionPath)
timeStep = 1. / 600.
useFixedBase = False
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData,
timeStep, useFixedBase)
self._isInitialized = True
return out_scale
def build_action_bound_max(self, agent_id):
out_scale = [1] * self.get_action_size(agent_id)
out_scale=[
4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
1.000000000,1.000000000,8.779999999,1.000000000, 1.0000000, 1.0000000,
4.7100000, 6.2800000, 1.0000000, 1.0000000, 1.0000000,
12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000,
6.2800000, 1.0000000, 1.0000000, 1.0000000, 10.100000,
1.0000000, 1.0000000, 1.0000000, 7.8500000]
return out_scale
def set_mode(self, mode):
self._mode = mode
def need_new_action(self, agent_id):
if self.t>=self.needs_update_time:
self.needs_update_time = self.t + 1./30.
return True
return False
def record_state(self, agent_id):
state = self._humanoid.getState()
return np.array(state)
def record_goal(self, agent_id):
return np.array([])
def calc_reward(self, agent_id):
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
reward = self._humanoid.getReward(kinPose)
return reward
def set_action(self, agent_id, action):
#print("action=",)
#for a in action:
# print(a)
np.savetxt("pb_action.csv", action, delimiter=",")
self.desiredPose = self._humanoid.convertActionToPose(action)
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
self.desiredPose[0] = 0
self.desiredPose[1] = 0
self.desiredPose[2] = 0
self.desiredPose[3] = 0
self.desiredPose[4] = 0
self.desiredPose[5] = 0
self.desiredPose[6] = 0
target_pose = np.array(self.desiredPose)
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
#print("set_action: desiredPose=", self.desiredPose)
def log_val(self, agent_id, val):
pass
def update(self, timeStep):
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
self._pybullet_client.setTimeStep(timeStep)
self._humanoid._timeStep = timeStep
for i in range(1):
self.t += timeStep
self._humanoid.setSimTime(self.t)
if self.desiredPose:
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=True)
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
#print("desiredPositions=",self.desiredPose)
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
if self._useStablePD:
taus = self._humanoid.computePDForces(self.desiredPose, desiredVelocities=None, maxForces=maxForces)
self._humanoid.applyPDForces(taus)
else:
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
self._pybullet_client.stepSimulation()
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
def set_sample_count(self, count):
return
def check_terminate(self, agent_id):
return Env.Terminate(self.is_episode_end())
def is_episode_end(self):
isEnded = self._humanoid.terminates()
#also check maximum time, 20 seconds (todo get from file)
#print("self.t=",self.t)
if (self.t>20):
isEnded = True
return isEnded
def check_valid_episode(self):
#could check if limbs exceed velocity threshold
return true
def getKeyboardEvents(self):
return self._pybullet_client.getKeyboardEvents()
def isKeyTriggered(self, keys, key):
o = ord(key)
#print("ord=",o)
if o in keys:
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
return False
selfCheck = False
if (selfCheck):
curTime = 0
while self._pybullet_client.isConnected():
self._humanoid.setSimTime(curTime)
state = self._humanoid.getState()
#print("state=",state)
pose = self._humanoid.computePose(self._humanoid._frameFraction)
for i in range(10):
curTime += timeStep
#taus = self._humanoid.computePDForces(pose)
#self._humanoid.applyPDForces(taus)
#self._pybullet_client.stepSimulation()
time.sleep(timeStep)
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
rnrange = 1000
rn = random.randint(0, rnrange)
startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
self.t = startTime
self._humanoid.setSimTime(startTime)
self._humanoid.resetPose()
#this clears the contact points. Todo: add API to explicitly clear all contact points?
#self._pybullet_client.stepSimulation()
self._humanoid.resetPose()
self.needs_update_time = self.t - 1 #force update
def get_num_agents(self):
return self._num_agents
def get_action_space(self, agent_id):
return ActionSpace(ActionSpace.Continuous)
def get_reward_min(self, agent_id):
return 0
def get_reward_max(self, agent_id):
return 1
def get_reward_fail(self, agent_id):
return self.get_reward_min(agent_id)
def get_reward_succ(self, agent_id):
return self.get_reward_max(agent_id)
#scene_name == "imitate" -> cDrawSceneImitate
def get_state_size(self, agent_id):
#cCtController::GetStateSize()
#int state_size = cDeepMimicCharController::GetStateSize();
# state_size += GetStatePoseSize();#106
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
#state_size += GetStatePhaseSize();#1
#197
return 197
def build_state_norm_groups(self, agent_id):
#if (mEnablePhaseInput)
#{
#int phase_group = gNormGroupNone;
#int phase_offset = GetStatePhaseOffset();
#int phase_size = GetStatePhaseSize();
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
groups = [0] * self.get_state_size(agent_id)
groups[0] = -1
return groups
def build_state_offset(self, agent_id):
out_offset = [0] * self.get_state_size(agent_id)
phase_offset = -0.5
out_offset[0] = phase_offset
return np.array(out_offset)
def build_state_scale(self, agent_id):
out_scale = [1] * self.get_state_size(agent_id)
phase_scale = 2
out_scale[0] = phase_scale
return np.array(out_scale)
def get_goal_size(self, agent_id):
return 0
def get_action_size(self, agent_id):
ctrl_size = 43 #numDof
root_size = 7
return ctrl_size - root_size
def build_goal_norm_groups(self, agent_id):
return np.array([])
def build_goal_offset(self, agent_id):
return np.array([])
def build_goal_scale(self, agent_id):
return np.array([])
def build_action_offset(self, agent_id):
out_offset = [0] * self.get_action_size(agent_id)
out_offset = [
0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
-0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
-0.2000000, -1.5700000
]
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
return np.array(out_offset)
def build_action_scale(self, agent_id):
out_scale = [1] * self.get_action_size(agent_id)
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
out_scale = [
0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
0.159235668789
]
return np.array(out_scale)
def build_action_bound_min(self, agent_id):
#see cCtCtrlUtil::BuildBoundsPDSpherical
out_scale = [-1] * self.get_action_size(agent_id)
out_scale = [
-4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
-1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
-1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
-1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
-1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
-1.000000000, -4.710000000
]
return out_scale
def build_action_bound_max(self, agent_id):
out_scale = [1] * self.get_action_size(agent_id)
out_scale = [
4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
]
return out_scale
def set_mode(self, mode):
self._mode = mode
def need_new_action(self, agent_id):
if self.t >= self.needs_update_time:
self.needs_update_time = self.t + 1. / 30.
return True
return False
def record_state(self, agent_id):
state = self._humanoid.getState()
return np.array(state)
def record_goal(self, agent_id):
return np.array([])
def calc_reward(self, agent_id):
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
reward = self._humanoid.getReward(kinPose)
return reward
def set_action(self, agent_id, action):
#print("action=",)
#for a in action:
# print(a)
np.savetxt("pb_action.csv", action, delimiter=",")
self.desiredPose = self._humanoid.convertActionToPose(action)
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
self.desiredPose[0] = 0
self.desiredPose[1] = 0
self.desiredPose[2] = 0
self.desiredPose[3] = 0
self.desiredPose[4] = 0
self.desiredPose[5] = 0
self.desiredPose[6] = 0
target_pose = np.array(self.desiredPose)
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
#print("set_action: desiredPose=", self.desiredPose)
def log_val(self, agent_id, val):
pass
def update(self, timeStep):
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
self._pybullet_client.setTimeStep(timeStep)
self._humanoid._timeStep = timeStep
for i in range(1):
self.t += timeStep
self._humanoid.setSimTime(self.t)
if self.desiredPose:
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
self._humanoid.initializePose(self._humanoid._poseInterpolator,
self._humanoid._kin_model,
initBase=True)
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
#print("desiredPositions=",self.desiredPose)
maxForces = [
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
100, 100, 60
]
if self._useStablePD:
taus = self._humanoid.computePDForces(self.desiredPose,
desiredVelocities=None,
maxForces=maxForces)
self._humanoid.applyPDForces(taus)
else:
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
self._pybullet_client.stepSimulation()
def set_sample_count(self, count):
return
def check_terminate(self, agent_id):
return Env.Terminate(self.is_episode_end())
def is_episode_end(self):
isEnded = self._humanoid.terminates()
#also check maximum time, 20 seconds (todo get from file)
#print("self.t=",self.t)
if (self.t > 20):
isEnded = True
return isEnded
def check_valid_episode(self):
#could check if limbs exceed velocity threshold
return true
def getKeyboardEvents(self):
return self._pybullet_client.getKeyboardEvents()
def isKeyTriggered(self, keys, key):
o = ord(key)
#print("ord=",o)
if o in keys:
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
return False

View File

@@ -1,54 +1,66 @@
from pybullet_utils import bullet_client
import math
class QuadrupedPoseInterpolator(object):
def __init__(self):
pass
def ComputeLinVel(self,posStart, posEnd, deltaTime):
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
def ComputeLinVel(self, posStart, posEnd, deltaTime):
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
(posEnd[2] - posStart[2]) / deltaTime]
return vel
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
(axis[2] * angle) / deltaTime]
return angVel
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
ornEnd)
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
(axis[2] * angle) / deltaTime]
return angVel
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
keyFrameDuration = frameData[0]
basePos1Start = [frameData[1],frameData[2],frameData[3]]
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
jointPositions=[self._basePos[0],self._basePos[1],self._basePos[2],
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3]]
jointVelocities=[self._baseLinVel[0],self._baseLinVel[1],self._baseLinVel[2],
self._baseAngVel[0],self._baseAngVel[1],self._baseAngVel[2]]
for j in range (12):
index=j+8
jointPosStart=frameData[index]
jointPosEnd=frameDataNext[index]
jointPos=jointPosStart+frameFraction*(jointPosEnd-jointPosStart)
jointVel=(jointPosEnd-jointPosStart)/keyFrameDuration
basePos1Start = [frameData[1], frameData[2], frameData[3]]
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
self._basePos = [
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
]
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
bullet_client)
jointPositions = [
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
self._baseOrn[2], self._baseOrn[3]
]
jointVelocities = [
self._baseLinVel[0], self._baseLinVel[1], self._baseLinVel[2], self._baseAngVel[0],
self._baseAngVel[1], self._baseAngVel[2]
]
for j in range(12):
index = j + 8
jointPosStart = frameData[index]
jointPosEnd = frameDataNext[index]
jointPos = jointPosStart + frameFraction * (jointPosEnd - jointPosStart)
jointVel = (jointPosEnd - jointPosStart) / keyFrameDuration
jointPositions.append(jointPos)
jointVelocities.append(jointVel)
self._jointPositions = jointPositions
self._jointVelocities = jointVelocities
return jointPositions,jointVelocities
return jointPositions, jointVelocities

File diff suppressed because it is too large Load Diff

View File

@@ -8,87 +8,89 @@ import pybullet as p1
import humanoid_pose_interpolator
import numpy as np
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
z2y = pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
z2y = pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
#planeId = pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
planeId= pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
planeId = pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
z2y,
useMaximalCoordinates=True)
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
#print("planeId=",planeId)
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
pybullet_client.setGravity(0,-9.8,0)
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
pybullet_client.setGravity(0, -9.8, 0)
pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
mocapData = motion_capture_data.MotionCaptureData()
#motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_walk.txt"
motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_backflip.txt"
motionPath = pybullet_data.getDataPath() + "/data/motions/humanoid3d_backflip.txt"
mocapData.Load(motionPath)
timeStep = 1./600
useFixedBase=False
timeStep = 1. / 600
useFixedBase = False
humanoid = humanoid_stable_pd.HumanoidStablePD(pybullet_client, mocapData, timeStep, useFixedBase)
isInitialized = True
pybullet_client.setTimeStep(timeStep)
pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
timeId = pybullet_client.addUserDebugParameter("time",0,10,0)
timeId = pybullet_client.addUserDebugParameter("time", 0, 10, 0)
def isKeyTriggered(keys, key):
o = ord(key)
if o in keys:
return keys[ord(key)] & pybullet_client.KEY_WAS_TRIGGERED
return False
animating = False
singleStep = False
t=0
t = 0
while (1):
keys = pybullet_client.getKeyboardEvents()
#print(keys)
if isKeyTriggered(keys, ' '):
animating = not animating
animating = not animating
if isKeyTriggered(keys, 'b'):
singleStep = True
singleStep = True
if animating or singleStep:
singleStep = False
#t = pybullet_client.readUserDebugParameter(timeId)
#print("t=",t)
for i in range (1):
for i in range(1):
print("t=",t)
print("t=", t)
humanoid.setSimTime(t)
humanoid.computePose(humanoid._frameFraction)
pose = humanoid._poseInterpolator
#humanoid.initializePose(pose=pose, phys_model = humanoid._sim_model, initBase=True, initializeVelocity=True)
#humanoid.resetPose()
desiredPose = humanoid.computePose(humanoid._frameFraction)
#desiredPose = desiredPose.GetPose()
#desiredPose = desiredPose.GetPose()
#curPose = HumanoidPoseInterpolator()
#curPose.reset()
s = humanoid.getState()
#np.savetxt("pb_record_state_s.csv", s, delimiter=",")
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
maxForces = [
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90, 90,
90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100, 100,
100, 60
]
taus = humanoid.computePDForces(desiredPose, desiredVelocities=None, maxForces=maxForces)
#print("taus=",taus)
humanoid.applyPDForces(taus)
pybullet_client.stepSimulation()
t+=1./600.
time.sleep(1./600.)
t += 1. / 600.
time.sleep(1. / 600.)

View File

@@ -8,240 +8,258 @@ import motion_capture_data
import quadrupedPoseInterpolator
useConstraints = False
p = bullet_client.BulletClient(connection_mode=p1.GUI)
p = bullet_client.BulletClient(connection_mode=p1.GUI)
p.setAdditionalSearchPath(pybullet_data.getDataPath())
plane = p.loadURDF("plane.urdf")
p.setGravity(0,0,-10)
timeStep=1./500
p.setGravity(0, 0, -10)
timeStep = 1. / 500
p.setTimeStep(timeStep)
#p.setDefaultContactERP(0)
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
urdfFlags = p.URDF_USE_SELF_COLLISION
startPos=[0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
startOrn=[0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
quadruped = p.loadURDF("laikago/laikago.urdf",startPos,startOrn, flags = urdfFlags,useFixedBase=False)
p.resetBasePositionAndOrientation(quadruped,startPos,startOrn)
startPos = [0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
startOrn = [0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
quadruped = p.loadURDF("laikago/laikago.urdf",
startPos,
startOrn,
flags=urdfFlags,
useFixedBase=False)
p.resetBasePositionAndOrientation(quadruped, startPos, startOrn)
if not useConstraints:
for j in range(p.getNumJoints(quadruped)):
p.setJointMotorControl2(quadruped,j,p.POSITION_CONTROL,force=0)
#This cube is added as a soft constraint to keep the laikago from falling
p.setJointMotorControl2(quadruped, j, p.POSITION_CONTROL, force=0)
#This cube is added as a soft constraint to keep the laikago from falling
#since we didn't train it yet, it doesn't balance
cube = p.loadURDF("cube_no_rotation.urdf",[0,0,-0.5],[0,0.5,0.5,0])
p.setCollisionFilterGroupMask(cube,-1,0,0)
cube = p.loadURDF("cube_no_rotation.urdf", [0, 0, -0.5], [0, 0.5, 0.5, 0])
p.setCollisionFilterGroupMask(cube, -1, 0, 0)
for j in range(p.getNumJoints(cube)):
p.setJointMotorControl2(cube,j,p.POSITION_CONTROL,force=0)
p.setCollisionFilterGroupMask(cube,j,0,0)
p.changeVisualShape(cube,j,rgbaColor=[1,0,0,0])
cid = p.createConstraint(cube,p.getNumJoints(cube)-1,quadruped,-1,p.JOINT_FIXED,[0,0,0],[0,1,0],[0,0,0])
p.setJointMotorControl2(cube, j, p.POSITION_CONTROL, force=0)
p.setCollisionFilterGroupMask(cube, j, 0, 0)
p.changeVisualShape(cube, j, rgbaColor=[1, 0, 0, 0])
cid = p.createConstraint(cube,
p.getNumJoints(cube) - 1, quadruped, -1, p.JOINT_FIXED, [0, 0, 0],
[0, 1, 0], [0, 0, 0])
p.changeConstraint(cid, maxForce=10)
jointIds = []
paramIds = []
jointOffsets = []
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
jointIds=[]
paramIds=[]
jointOffsets=[]
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
for i in range (4):
for i in range(4):
jointOffsets.append(0)
jointOffsets.append(-0.7)
jointOffsets.append(0.7)
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
for j in range (p.getNumJoints(quadruped)):
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped,j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
jointIds.append(j)
startQ=[0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892, -0.068262, 0.836745, -1.534517]
for j in range(p.getNumJoints(quadruped)):
p.resetJointState(quadruped,jointIds[j],jointDirections[j]*startQ[j]+jointOffsets[j])
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped, j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
jointIds.append(j)
startQ = [
0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892,
-0.068262, 0.836745, -1.534517
]
for j in range(p.getNumJoints(quadruped)):
p.resetJointState(quadruped, jointIds[j], jointDirections[j] * startQ[j] + jointOffsets[j])
qpi = quadrupedPoseInterpolator.QuadrupedPoseInterpolator()
#enable collision between lower legs
for j in range (p.getNumJoints(quadruped)):
print(p.getJointInfo(quadruped,j))
for j in range(p.getNumJoints(quadruped)):
print(p.getJointInfo(quadruped, j))
#2,5,8 and 11 are the lower legs
lower_legs = [2,5,8,11]
lower_legs = [2, 5, 8, 11]
for l0 in lower_legs:
for l1 in lower_legs:
if (l1>l0):
enableCollision = 1
print("collision for pair",l0,l1, p.getJointInfo(quadruped,l0)[12],p.getJointInfo(quadruped,l1)[12], "enabled=",enableCollision)
p.setCollisionFilterPair(quadruped, quadruped, 2,5,enableCollision)
for l1 in lower_legs:
if (l1 > l0):
enableCollision = 1
print("collision for pair", l0, l1,
p.getJointInfo(quadruped, l0)[12],
p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)
jointIds=[]
paramIds=[]
jointOffsets=[]
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
jointIds = []
paramIds = []
jointOffsets = []
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
for i in range (4):
jointOffsets.append(0)
jointOffsets.append(-0.7)
jointOffsets.append(0.7)
for i in range(4):
jointOffsets.append(0)
jointOffsets.append(-0.7)
jointOffsets.append(0.7)
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
for j in range (p.getNumJoints(quadruped)):
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped,j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
jointIds.append(j)
for j in range(p.getNumJoints(quadruped)):
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped, j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
jointIds.append(j)
p.getCameraImage(480,320)
p.getCameraImage(480, 320)
p.setRealTimeSimulation(0)
joints=[]
joints = []
mocapData = motion_capture_data.MotionCaptureData()
motionPath = pybullet_data.getDataPath()+"/data/motions/laikago_walk.txt"
motionPath = pybullet_data.getDataPath() + "/data/motions/laikago_walk.txt"
mocapData.Load(motionPath)
print("mocapData.NumFrames=",mocapData.NumFrames())
print("mocapData.KeyFrameDuraction=",mocapData.KeyFrameDuraction())
print("mocapData.getCycleTime=",mocapData.getCycleTime())
print("mocapData.computeCycleOffset=",mocapData.computeCycleOffset())
print("mocapData.NumFrames=", mocapData.NumFrames())
print("mocapData.KeyFrameDuraction=", mocapData.KeyFrameDuraction())
print("mocapData.getCycleTime=", mocapData.getCycleTime())
print("mocapData.computeCycleOffset=", mocapData.computeCycleOffset())
stablePD = pd_controller_stable.PDControllerStable(p)
cycleTime = mocapData.getCycleTime()
t=0
t = 0
while t<10.*cycleTime:
while t < 10. * cycleTime:
#get interpolated joint
keyFrameDuration = mocapData.KeyFrameDuraction()
cycleTime = mocapData.getCycleTime()
cycleCount = mocapData.calcCycleCount(t, cycleTime)
#print("cycleTime=",cycleTime)
#print("cycleCount=",cycleCount)
#print("cycles=",cycles)
frameTime = t - cycleCount*cycleTime
frameTime = t - cycleCount * cycleTime
#print("frameTime=",frameTime)
if (frameTime<0):
if (frameTime < 0):
frameTime += cycleTime
frame = int(frameTime/keyFrameDuration)
frameNext = frame+1
if (frameNext >= mocapData.NumFrames()):
frame = int(frameTime / keyFrameDuration)
frameNext = frame + 1
if (frameNext >= mocapData.NumFrames()):
frameNext = frame
frameFraction = (frameTime - frame*keyFrameDuration)/(keyFrameDuration)
frameFraction = (frameTime - frame * keyFrameDuration) / (keyFrameDuration)
#print("frame=",frame)
#print("frameFraction=",frameFraction)
frameData = mocapData._motion_data['Frames'][frame]
frameDataNext = mocapData._motion_data['Frames'][frameNext]
jointsStr,qdot=qpi.Slerp(frameFraction, frameData, frameDataNext, p)
jointsStr, qdot = qpi.Slerp(frameFraction, frameData, frameDataNext, p)
maxForce = p.readUserDebugParameter(maxForceId)
print("jointIds=",jointIds)
print("jointIds=", jointIds)
if useConstraints:
for j in range (12):
for j in range(12):
#skip the base positional dofs
targetPos = float(jointsStr[j+7])
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
targetPos = float(jointsStr[j + 7])
p.setJointMotorControl2(quadruped,
jointIds[j],
p.POSITION_CONTROL,
jointDirections[j] * targetPos + jointOffsets[j],
force=maxForce)
else:
desiredPositions=[]
for j in range (7):
desiredPositions = []
for j in range(7):
targetPosUnmodified = float(jointsStr[j])
desiredPositions.append(targetPosUnmodified)
for j in range (12):
targetPosUnmodified = float(jointsStr[j+7])
targetPos=jointDirections[j]*targetPosUnmodified+jointOffsets[j]
for j in range(12):
targetPosUnmodified = float(jointsStr[j + 7])
targetPos = jointDirections[j] * targetPosUnmodified + jointOffsets[j]
desiredPositions.append(targetPos)
numBaseDofs=6
totalDofs=12+numBaseDofs
desiredVelocities=None
if desiredVelocities==None:
desiredVelocities = [0]*totalDofs
taus = stablePD.computePD(bodyUniqueId=quadruped,
jointIndices = jointIds,
desiredPositions = desiredPositions,
desiredVelocities = desiredVelocities,
kps = [4000]*totalDofs,
kds = [40]*totalDofs,
maxForces = [500]*totalDofs,
timeStep=timeStep)
dofIndex=6
numBaseDofs = 6
totalDofs = 12 + numBaseDofs
desiredVelocities = None
if desiredVelocities == None:
desiredVelocities = [0] * totalDofs
taus = stablePD.computePD(bodyUniqueId=quadruped,
jointIndices=jointIds,
desiredPositions=desiredPositions,
desiredVelocities=desiredVelocities,
kps=[4000] * totalDofs,
kds=[40] * totalDofs,
maxForces=[500] * totalDofs,
timeStep=timeStep)
dofIndex = 6
scaling = 1
for index in range (len(jointIds)):
for index in range(len(jointIds)):
jointIndex = jointIds[index]
force=[scaling*taus[dofIndex]]
print("force[", jointIndex,"]=",force)
p.setJointMotorControlMultiDof(quadruped, jointIndex, controlMode=p.TORQUE_CONTROL, force=force)
dofIndex+=1
force = [scaling * taus[dofIndex]]
print("force[", jointIndex, "]=", force)
p.setJointMotorControlMultiDof(quadruped,
jointIndex,
controlMode=p.TORQUE_CONTROL,
force=force)
dofIndex += 1
p.stepSimulation()
t+=timeStep
t += timeStep
time.sleep(timeStep)
useOrgData=False
useOrgData = False
if useOrgData:
with open("data1.txt","r") as filestream:
for line in filestream:
maxForce = p.readUserDebugParameter(maxForceId)
currentline = line.split(",")
frame = currentline[0]
t = currentline[1]
joints=currentline[2:14]
for j in range (12):
targetPos = float(joints[j])
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
p.stepSimulation()
for lower_leg in lower_legs:
#print("points for ", quadruped, " link: ", lower_leg)
pts = p.getContactPoints(quadruped,-1, lower_leg)
#print("num points=",len(pts))
#for pt in pts:
# print(pt[9])
time.sleep(1./500.)
for j in range (p.getNumJoints(quadruped)):
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped,j)
js = p.getJointState(quadruped,j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"),-4,4,(js[0]-jointOffsets[j])/jointDirections[j]))
with open("data1.txt", "r") as filestream:
for line in filestream:
maxForce = p.readUserDebugParameter(maxForceId)
currentline = line.split(",")
frame = currentline[0]
t = currentline[1]
joints = currentline[2:14]
for j in range(12):
targetPos = float(joints[j])
p.setJointMotorControl2(quadruped,
jointIds[j],
p.POSITION_CONTROL,
jointDirections[j] * targetPos + jointOffsets[j],
force=maxForce)
p.stepSimulation()
for lower_leg in lower_legs:
#print("points for ", quadruped, " link: ", lower_leg)
pts = p.getContactPoints(quadruped, -1, lower_leg)
#print("num points=",len(pts))
#for pt in pts:
# print(pt[9])
time.sleep(1. / 500.)
for j in range(p.getNumJoints(quadruped)):
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
info = p.getJointInfo(quadruped, j)
js = p.getJointState(quadruped, j)
#print(info)
jointName = info[1]
jointType = info[2]
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
paramIds.append(
p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
(js[0] - jointOffsets[j]) / jointDirections[j]))
p.setRealTimeSimulation(1)
while (1):
for i in range(len(paramIds)):
c = paramIds[i]
targetPos = p.readUserDebugParameter(c)
maxForce = p.readUserDebugParameter(maxForceId)
p.setJointMotorControl2(quadruped,jointIds[i],p.POSITION_CONTROL,jointDirections[i]*targetPos+jointOffsets[i], force=maxForce)
for i in range(len(paramIds)):
c = paramIds[i]
targetPos = p.readUserDebugParameter(c)
maxForce = p.readUserDebugParameter(maxForceId)
p.setJointMotorControl2(quadruped,
jointIds[i],
p.POSITION_CONTROL,
jointDirections[i] * targetPos + jointOffsets[i],
force=maxForce)

View File

@@ -5,17 +5,18 @@ import pybullet_data
AGENT_TYPE_KEY = "AgentType"
def build_agent(world, id, file):
agent = None
with open(pybullet_data.getDataPath()+"/"+file) as data_file:
json_data = json.load(data_file)
assert AGENT_TYPE_KEY in json_data
agent_type = json_data[AGENT_TYPE_KEY]
if (agent_type == PPOAgent.NAME):
agent = PPOAgent(world, id, json_data)
else:
assert False, 'Unsupported agent type: ' + agent_type
return agent
def build_agent(world, id, file):
agent = None
with open(pybullet_data.getDataPath() + "/" + file) as data_file:
json_data = json.load(data_file)
assert AGENT_TYPE_KEY in json_data
agent_type = json_data[AGENT_TYPE_KEY]
if (agent_type == PPOAgent.NAME):
agent = PPOAgent(world, id, json_data)
else:
assert False, 'Unsupported agent type: ' + agent_type
return agent

View File

@@ -2,53 +2,54 @@ import json
import numpy as np
import pybullet_utils.math_util as MathUtil
class ExpParams(object):
RATE_KEY = 'Rate'
INIT_ACTION_RATE_KEY = 'InitActionRate'
NOISE_KEY = 'Noise'
NOISE_INTERNAL_KEY = 'NoiseInternal'
TEMP_KEY = 'Temp'
RATE_KEY = 'Rate'
INIT_ACTION_RATE_KEY = 'InitActionRate'
NOISE_KEY = 'Noise'
NOISE_INTERNAL_KEY = 'NoiseInternal'
TEMP_KEY = 'Temp'
def __init__(self):
self.rate = 0.2
self.init_action_rate = 0
self.noise = 0.1
self.noise_internal = 0
self.temp = 0.1
return
def __init__(self):
self.rate = 0.2
self.init_action_rate = 0
self.noise = 0.1
self.noise_internal = 0
self.temp = 0.1
return
def __str__(self):
str = ''
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
return str
def __str__(self):
str = ''
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
return str
def load(self, json_data):
if (self.RATE_KEY in json_data):
self.rate = json_data[self.RATE_KEY]
def load(self, json_data):
if (self.RATE_KEY in json_data):
self.rate = json_data[self.RATE_KEY]
if (self.INIT_ACTION_RATE_KEY in json_data):
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
if (self.INIT_ACTION_RATE_KEY in json_data):
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
if (self.NOISE_KEY in json_data):
self.noise = json_data[self.NOISE_KEY]
if (self.NOISE_KEY in json_data):
self.noise = json_data[self.NOISE_KEY]
if (self.NOISE_INTERNAL_KEY in json_data):
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
if (self.NOISE_INTERNAL_KEY in json_data):
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
if (self.TEMP_KEY in json_data):
self.temp = json_data[self.TEMP_KEY]
if (self.TEMP_KEY in json_data):
self.temp = json_data[self.TEMP_KEY]
return
return
def lerp(self, other, t):
lerp_params = ExpParams()
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
return lerp_params
def lerp(self, other, t):
lerp_params = ExpParams()
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
return lerp_params

View File

@@ -1 +1 @@
from . import *
from . import *

View File

@@ -3,11 +3,12 @@ import pybullet_envs.deep_mimic.learning.tf_util as TFUtil
NAME = "fc_2layers_1024units"
def build_net(input_tfs, reuse=False):
layers = [1024, 512]
activation = tf.nn.relu
input_tf = tf.concat(axis=-1, values=input_tfs)
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
h = activation(h)
return h
def build_net(input_tfs, reuse=False):
layers = [1024, 512]
activation = tf.nn.relu
input_tf = tf.concat(axis=-1, values=input_tfs)
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
h = activation(h)
return h

View File

@@ -1,11 +1,12 @@
import pybullet_envs.deep_mimic.learning.nets.fc_2layers_1024units as fc_2layers_1024units
def build_net(net_name, input_tfs, reuse=False):
net = None
if (net_name == fc_2layers_1024units.NAME):
net = fc_2layers_1024units.build_net(input_tfs, reuse)
else:
assert False, 'Unsupported net: ' + net_name
return net
def build_net(net_name, input_tfs, reuse=False):
net = None
if (net_name == fc_2layers_1024units.NAME):
net = fc_2layers_1024units.build_net(input_tfs, reuse)
else:
assert False, 'Unsupported net: ' + net_name
return net

View File

@@ -3,147 +3,149 @@ import copy
import pybullet_utils.mpi_util as MPIUtil
from pybullet_utils.logger import Logger
class Normalizer(object):
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
# these group IDs must be the same as those in CharController.h
NORM_GROUP_SINGLE = 0
NORM_GROUP_NONE = -1
# these group IDs must be the same as those in CharController.h
NORM_GROUP_SINGLE = 0
NORM_GROUP_NONE = -1
class Group(object):
def __init__(self, id, indices):
self.id = id
self.indices = indices
return
class Group(object):
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
self.eps = eps
self.clip = clip
self.mean = np.zeros(size)
self.mean_sq = np.zeros(size)
self.std = np.ones(size)
self.count = 0
self.groups = self._build_groups(groups_ids)
def __init__(self, id, indices):
self.id = id
self.indices = indices
return
self.new_count = 0
self.new_sum = np.zeros_like(self.mean)
self.new_sum_sq = np.zeros_like(self.mean_sq)
return
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
self.eps = eps
self.clip = clip
self.mean = np.zeros(size)
self.mean_sq = np.zeros(size)
self.std = np.ones(size)
self.count = 0
self.groups = self._build_groups(groups_ids)
def record(self, x):
size = self.get_size()
is_array = isinstance(x, np.ndarray)
if not is_array:
assert(size == 1)
x = np.array([[x]])
self.new_count = 0
self.new_sum = np.zeros_like(self.mean)
self.new_sum_sq = np.zeros_like(self.mean_sq)
return
assert x.shape[-1] == size, \
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
x = np.reshape(x, [-1, size])
def record(self, x):
size = self.get_size()
is_array = isinstance(x, np.ndarray)
if not is_array:
assert (size == 1)
x = np.array([[x]])
self.new_count += x.shape[0]
self.new_sum += np.sum(x, axis=0)
self.new_sum_sq += np.sum(np.square(x), axis=0)
return
assert x.shape[-1] == size, \
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
x = np.reshape(x, [-1, size])
def update(self):
new_count = MPIUtil.reduce_sum(self.new_count)
new_sum = MPIUtil.reduce_sum(self.new_sum)
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
self.new_count += x.shape[0]
self.new_sum += np.sum(x, axis=0)
self.new_sum_sq += np.sum(np.square(x), axis=0)
return
new_total = self.count + new_count
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
def update(self):
new_count = MPIUtil.reduce_sum(self.new_count)
new_sum = MPIUtil.reduce_sum(self.new_sum)
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
if new_count > 0:
new_mean = self._process_group_data(new_sum / new_count, self.mean)
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
w_old = float(self.count) / new_total
w_new = float(new_count) / new_total
new_total = self.count + new_count
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
self.mean = w_old * self.mean + w_new * new_mean
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
self.count = new_total
self.std = self.calc_std(self.mean, self.mean_sq)
if new_count > 0:
new_mean = self._process_group_data(new_sum / new_count, self.mean)
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
w_old = float(self.count) / new_total
w_new = float(new_count) / new_total
self.new_count = 0
self.new_sum.fill(0)
self.new_sum_sq.fill(0)
self.mean = w_old * self.mean + w_new * new_mean
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
self.count = new_total
self.std = self.calc_std(self.mean, self.mean_sq)
return
self.new_count = 0
self.new_sum.fill(0)
self.new_sum_sq.fill(0)
def get_size(self):
return self.mean.size
return
def set_mean_std(self, mean, std):
size = self.get_size()
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
if not is_array:
assert(size == 1)
mean = np.array([mean])
std = np.array([std])
def get_size(self):
return self.mean.size
assert len(mean) == size and len(std) == size, \
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
self.mean = mean
self.std = std
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
return
def set_mean_std(self, mean, std):
size = self.get_size()
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
def normalize(self, x):
norm_x = (x - self.mean) / self.std
norm_x = np.clip(norm_x, -self.clip, self.clip)
return norm_x
if not is_array:
assert (size == 1)
mean = np.array([mean])
std = np.array([std])
def unnormalize(self, norm_x):
x = norm_x * self.std + self.mean
return x
assert len(mean) == size and len(std) == size, \
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
def calc_std(self, mean, mean_sq):
var = mean_sq - np.square(mean)
# some time floating point errors can lead to small negative numbers
var = np.maximum(var, 0)
std = np.sqrt(var)
std = np.maximum(std, self.eps)
return std
self.mean = mean
self.std = std
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
return
def calc_mean_sq(self, mean, std):
return np.square(std) + np.square(self.mean)
def normalize(self, x):
norm_x = (x - self.mean) / self.std
norm_x = np.clip(norm_x, -self.clip, self.clip)
return norm_x
def check_synced(self):
synced = True
if MPIUtil.is_root_proc():
vars = np.concatenate([self.mean, self.mean_sq])
MPIUtil.bcast(vars)
else:
vars_local = np.concatenate([self.mean, self.mean_sq])
vars_root = np.empty_like(vars_local)
MPIUtil.bcast(vars_root)
synced = (vars_local == vars_root).all()
return synced
def unnormalize(self, norm_x):
x = norm_x * self.std + self.mean
return x
def _build_groups(self, groups_ids):
groups = []
if groups_ids is None:
curr_id = self.NORM_GROUP_SINGLE
curr_list = np.arange(self.get_size()).astype(np.int32)
groups.append(self.Group(curr_id, curr_list))
else:
ids = np.unique(groups_ids)
for id in ids:
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
groups.append(self.Group(id, curr_list))
def calc_std(self, mean, mean_sq):
var = mean_sq - np.square(mean)
# some time floating point errors can lead to small negative numbers
var = np.maximum(var, 0)
std = np.sqrt(var)
std = np.maximum(std, self.eps)
return std
return groups
def calc_mean_sq(self, mean, std):
return np.square(std) + np.square(self.mean)
def _process_group_data(self, new_data, old_data):
proc_data = new_data.copy()
for group in self.groups:
if group.id == self.NORM_GROUP_NONE:
proc_data[group.indices] = old_data[group.indices]
elif group.id != self.NORM_GROUP_SINGLE:
avg = np.mean(new_data[group.indices])
proc_data[group.indices] = avg
return proc_data
def check_synced(self):
synced = True
if MPIUtil.is_root_proc():
vars = np.concatenate([self.mean, self.mean_sq])
MPIUtil.bcast(vars)
else:
vars_local = np.concatenate([self.mean, self.mean_sq])
vars_root = np.empty_like(vars_local)
MPIUtil.bcast(vars_root)
synced = (vars_local == vars_root).all()
return synced
def _build_groups(self, groups_ids):
groups = []
if groups_ids is None:
curr_id = self.NORM_GROUP_SINGLE
curr_list = np.arange(self.get_size()).astype(np.int32)
groups.append(self.Group(curr_id, curr_list))
else:
ids = np.unique(groups_ids)
for id in ids:
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
groups.append(self.Group(id, curr_list))
return groups
def _process_group_data(self, new_data, old_data):
proc_data = new_data.copy()
for group in self.groups:
if group.id == self.NORM_GROUP_NONE:
proc_data[group.indices] = old_data[group.indices]
elif group.id != self.NORM_GROUP_SINGLE:
avg = np.mean(new_data[group.indices])
proc_data[group.indices] = avg
return proc_data

View File

@@ -1,46 +1,47 @@
import numpy as np
from pybullet_envs.deep_mimic.env.env import Env
class Path(object):
def __init__(self):
self.clear()
return
def pathlength(self):
return len(self.actions)
def __init__(self):
self.clear()
return
def is_valid(self):
valid = True
l = self.pathlength()
valid &= len(self.states) == l + 1
valid &= len(self.goals) == l + 1
valid &= len(self.actions) == l
valid &= len(self.logps) == l
valid &= len(self.rewards) == l
valid &= len(self.flags) == l
def pathlength(self):
return len(self.actions)
return valid
def is_valid(self):
valid = True
l = self.pathlength()
valid &= len(self.states) == l + 1
valid &= len(self.goals) == l + 1
valid &= len(self.actions) == l
valid &= len(self.logps) == l
valid &= len(self.rewards) == l
valid &= len(self.flags) == l
def check_vals(self):
for vals in [self.states, self.goals, self.actions, self.logps,
self.rewards]:
for v in vals:
if not np.isfinite(v).all():
return False
return True
return valid
def clear(self):
self.states = []
self.goals = []
self.actions = []
self.logps = []
self.rewards = []
self.flags = []
self.terminate = Env.Terminate.Null
return
def check_vals(self):
for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
for v in vals:
if not np.isfinite(v).all():
return False
return True
def get_pathlen(self):
return len(self.rewards)
def clear(self):
self.states = []
self.goals = []
self.actions = []
self.logps = []
self.rewards = []
self.flags = []
self.terminate = Env.Terminate.Null
return
def calc_return(self):
return sum(self.rewards)
def get_pathlen(self):
return len(self.rewards)
def calc_return(self):
return sum(self.rewards)

View File

@@ -13,341 +13,343 @@ import pybullet_utils.mpi_util as MPIUtil
import pybullet_utils.math_util as MathUtil
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
from pybullet_envs.deep_mimic.env.env import Env
'''
Policy Gradient Agent
'''
class PGAgent(TFAgent):
NAME = 'PG'
NAME = 'PG'
ACTOR_NET_KEY = 'ActorNet'
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
ACTOR_NET_KEY = 'ActorNet'
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
CRITIC_NET_KEY = 'CriticNet'
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
EXP_ACTION_FLAG = 1 << 0
CRITIC_NET_KEY = 'CriticNet'
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
def __init__(self, world, id, json_data):
self._exp_action = False
super().__init__(world, id, json_data)
return
EXP_ACTION_FLAG = 1 << 0
def reset(self):
super().reset()
self._exp_action = False
return
def __init__(self, world, id, json_data):
self._exp_action = False
super().__init__(world, id, json_data)
return
def _check_action_space(self):
action_space = self.get_action_space()
return action_space == ActionSpace.Continuous
def reset(self):
super().reset()
self._exp_action = False
return
def _load_params(self, json_data):
super()._load_params(json_data)
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
return
def _check_action_space(self):
action_space = self.get_action_space()
return action_space == ActionSpace.Continuous
def _build_nets(self, json_data):
assert self.ACTOR_NET_KEY in json_data
assert self.CRITIC_NET_KEY in json_data
def _load_params(self, json_data):
super()._load_params(json_data)
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
return
actor_net_name = json_data[self.ACTOR_NET_KEY]
critic_net_name = json_data[self.CRITIC_NET_KEY]
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
s_size = self.get_state_size()
g_size = self.get_goal_size()
a_size = self.get_action_size()
def _build_nets(self, json_data):
assert self.ACTOR_NET_KEY in json_data
assert self.CRITIC_NET_KEY in json_data
# setup input tensors
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
actor_net_name = json_data[self.ACTOR_NET_KEY]
critic_net_name = json_data[self.CRITIC_NET_KEY]
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
with tf.variable_scope('main'):
with tf.variable_scope('actor'):
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
with tf.variable_scope('critic'):
self.critic_tf = self._build_net_critic(critic_net_name)
s_size = self.get_state_size()
g_size = self.get_goal_size()
a_size = self.get_action_size()
if (self.actor_tf != None):
Logger.print2('Built actor net: ' + actor_net_name)
# setup input tensors
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
self.g_tf = tf.placeholder(tf.float32,
shape=([None, g_size] if self.has_goal() else None),
name="g") # goals
if (self.critic_tf != None):
Logger.print2('Built critic net: ' + critic_net_name)
with tf.variable_scope('main'):
with tf.variable_scope('actor'):
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
with tf.variable_scope('critic'):
self.critic_tf = self._build_net_critic(critic_net_name)
return
if (self.actor_tf != None):
Logger.print2('Built actor net: ' + actor_net_name)
def _build_normalizers(self):
super()._build_normalizers()
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
with tf.variable_scope(self.RESOURCE_SCOPE):
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
return
if (self.critic_tf != None):
Logger.print2('Built critic net: ' + critic_net_name)
def _init_normalizers(self):
super()._init_normalizers()
with self.sess.as_default(), self.graph.as_default():
self.val_norm.update()
return
return
def _load_normalizers(self):
super()._load_normalizers()
self.val_norm.load()
return
def _build_normalizers(self):
super()._build_normalizers()
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
with tf.variable_scope(self.RESOURCE_SCOPE):
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
return
def _build_losses(self, json_data):
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
def _init_normalizers(self):
super()._init_normalizers()
with self.sess.as_default(), self.graph.as_default():
self.val_norm.update()
return
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
def _load_normalizers(self):
super()._load_normalizers()
self.val_norm.load()
return
if (critic_weight_decay != 0):
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
def _build_losses(self, json_data):
actor_weight_decay = 0 if (
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
critic_weight_decay = 0 if (
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
self.actor_loss_tf *= self.adv_tf
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
self.critic_tf)
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
a_bound_loss /= self.exp_params_curr.noise
self.actor_loss_tf += a_bound_loss
if (critic_weight_decay != 0):
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
if (actor_weight_decay != 0):
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
return
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
def _build_solvers(self, json_data):
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
critic_vars = self._tf_vars('main/critic')
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
self.actor_loss_tf *= self.adv_tf
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
actor_vars = self._tf_vars('main/actor')
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
a_bound_loss /= self.exp_params_curr.noise
self.actor_loss_tf += a_bound_loss
return
if (actor_weight_decay != 0):
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
def _build_net_actor(self, net_name, init_output_scale):
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
input_tfs = [norm_s_tf]
if (self.has_goal()):
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
input_tfs += [norm_g_tf]
h = NetBuilder.build_net(net_name, input_tfs)
norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
return a_tf
def _build_net_critic(self, net_name):
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
input_tfs = [norm_s_tf]
if (self.has_goal()):
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
input_tfs += [norm_g_tf]
h = NetBuilder.build_net(net_name, input_tfs)
norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
kernel_initializer=TFUtil.xavier_initializer);
return
norm_val_tf = tf.reshape(norm_val_tf, [-1])
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
return val_tf
def _build_solvers(self, json_data):
actor_stepsize = 0.001 if (
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
actor_momentum = 0.9 if (
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
critic_stepsize = 0.01 if (
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
critic_momentum = 0.9 if (
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
def _initialize_vars(self):
super()._initialize_vars()
self._sync_solvers()
return
critic_vars = self._tf_vars('main/critic')
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
momentum=critic_momentum)
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
def _sync_solvers(self):
self.actor_solver.sync()
self.critic_solver.sync()
return
actor_vars = self._tf_vars('main/actor')
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
def _decide_action(self, s, g):
with self.sess.as_default(), self.graph.as_default():
self._exp_action = False
a = self._eval_actor(s, g)[0]
logp = 0
return
if self._enable_stoch_policy():
# epsilon-greedy
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
if rand_action:
norm_exp_noise = np.random.randn(*a.shape)
norm_exp_noise *= self.exp_params_curr.noise
exp_noise = norm_exp_noise * self.a_norm.std
a += exp_noise
def _build_net_actor(self, net_name, init_output_scale):
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
input_tfs = [norm_s_tf]
if (self.has_goal()):
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
input_tfs += [norm_g_tf]
logp = self._calc_action_logp(norm_exp_noise)
self._exp_action = True
h = NetBuilder.build_net(net_name, input_tfs)
norm_a_tf = tf.layers.dense(inputs=h,
units=self.get_action_size(),
activation=None,
kernel_initializer=tf.random_uniform_initializer(
minval=-init_output_scale, maxval=init_output_scale))
return a, logp
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
return a_tf
def _enable_stoch_policy(self):
return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
def _build_net_critic(self, net_name):
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
input_tfs = [norm_s_tf]
if (self.has_goal()):
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
input_tfs += [norm_g_tf]
def _eval_actor(self, s, g):
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
feed = {
self.s_tf : s,
self.g_tf : g
}
h = NetBuilder.build_net(net_name, input_tfs)
norm_val_tf = tf.layers.dense(inputs=h,
units=1,
activation=None,
kernel_initializer=TFUtil.xavier_initializer)
a = self.actor_tf.eval(feed)
return a
def _eval_critic(self, s, g):
with self.sess.as_default(), self.graph.as_default():
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
norm_val_tf = tf.reshape(norm_val_tf, [-1])
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
return val_tf
feed = {
self.s_tf : s,
self.g_tf : g
}
def _initialize_vars(self):
super()._initialize_vars()
self._sync_solvers()
return
val = self.critic_tf.eval(feed)
return val
def _sync_solvers(self):
self.actor_solver.sync()
self.critic_solver.sync()
return
def _record_flags(self):
flags = int(0)
if (self._exp_action):
flags = flags | self.EXP_ACTION_FLAG
return flags
def _decide_action(self, s, g):
with self.sess.as_default(), self.graph.as_default():
self._exp_action = False
a = self._eval_actor(s, g)[0]
logp = 0
def _train_step(self):
super()._train_step()
if self._enable_stoch_policy():
# epsilon-greedy
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
if rand_action:
norm_exp_noise = np.random.randn(*a.shape)
norm_exp_noise *= self.exp_params_curr.noise
exp_noise = norm_exp_noise * self.a_norm.std
a += exp_noise
critic_loss = self._update_critic()
actor_loss = self._update_actor()
critic_loss = MPIUtil.reduce_avg(critic_loss)
actor_loss = MPIUtil.reduce_avg(actor_loss)
logp = self._calc_action_logp(norm_exp_noise)
self._exp_action = True
critic_stepsize = self.critic_solver.get_stepsize()
actor_stepsize = self.actor_solver.get_stepsize()
self.logger.log_tabular('Critic_Loss', critic_loss)
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
self.logger.log_tabular('Actor_Loss', actor_loss)
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
return a, logp
return
def _enable_stoch_policy(self):
return self.enable_training and (self._mode == self.Mode.TRAIN or
self._mode == self.Mode.TRAIN_END)
def _update_critic(self):
idx = self.replay_buffer.sample(self._local_mini_batch_size)
s = self.replay_buffer.get('states', idx)
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
tar_V = self._calc_updated_vals(idx)
tar_V = np.clip(tar_V, self.val_min, self.val_max)
def _eval_actor(self, s, g):
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
feed = {
self.s_tf: s,
self.g_tf: g,
self.tar_val_tf: tar_V
}
feed = {self.s_tf: s, self.g_tf: g}
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
self.critic_solver.update(grads)
return loss
def _update_actor(self):
key = self.EXP_ACTION_FLAG
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
has_goal = self.has_goal()
a = self.actor_tf.eval(feed)
return a
s = self.replay_buffer.get('states', idx)
g = self.replay_buffer.get('goals', idx) if has_goal else None
a = self.replay_buffer.get('actions', idx)
def _eval_critic(self, s, g):
with self.sess.as_default(), self.graph.as_default():
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
V_new = self._calc_updated_vals(idx)
V_old = self._eval_critic(s, g)
adv = V_new - V_old
feed = {self.s_tf: s, self.g_tf: g}
feed = {
self.s_tf: s,
self.g_tf: g,
self.a_tf: a,
self.adv_tf: adv
}
val = self.critic_tf.eval(feed)
return val
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
self.actor_solver.update(grads)
def _record_flags(self):
flags = int(0)
if (self._exp_action):
flags = flags | self.EXP_ACTION_FLAG
return flags
return loss
def _train_step(self):
super()._train_step()
def _calc_updated_vals(self, idx):
r = self.replay_buffer.get('rewards', idx)
critic_loss = self._update_critic()
actor_loss = self._update_actor()
critic_loss = MPIUtil.reduce_avg(critic_loss)
actor_loss = MPIUtil.reduce_avg(actor_loss)
if self.discount == 0:
new_V = r
else:
next_idx = self.replay_buffer.get_next_idx(idx)
s_next = self.replay_buffer.get('states', next_idx)
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
critic_stepsize = self.critic_solver.get_stepsize()
actor_stepsize = self.actor_solver.get_stepsize()
is_end = self.replay_buffer.is_path_end(idx)
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
is_fail = np.logical_and(is_end, is_fail)
is_succ = np.logical_and(is_end, is_succ)
self.logger.log_tabular('Critic_Loss', critic_loss)
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
self.logger.log_tabular('Actor_Loss', actor_loss)
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
V_next = self._eval_critic(s_next, g_next)
V_next[is_fail] = self.val_fail
V_next[is_succ] = self.val_succ
return
new_V = r + self.discount * V_next
return new_V
def _update_critic(self):
idx = self.replay_buffer.sample(self._local_mini_batch_size)
s = self.replay_buffer.get('states', idx)
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
def _calc_action_logp(self, norm_action_deltas):
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
stdev = self.exp_params_curr.noise
assert stdev > 0
tar_V = self._calc_updated_vals(idx)
tar_V = np.clip(tar_V, self.val_min, self.val_max)
a_size = self.get_action_size()
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
logp += -0.5 * a_size * np.log(2 * np.pi)
logp += -a_size * np.log(stdev)
return logp
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_V}
def _log_val(self, s, g):
val = self._eval_critic(s, g)
norm_val = self.val_norm.normalize(val)
self.world.env.log_val(self.id, norm_val[0])
return
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
self.critic_solver.update(grads)
return loss
def _build_replay_buffer(self, buffer_size):
super()._build_replay_buffer(buffer_size)
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
return
def _update_actor(self):
key = self.EXP_ACTION_FLAG
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
has_goal = self.has_goal()
s = self.replay_buffer.get('states', idx)
g = self.replay_buffer.get('goals', idx) if has_goal else None
a = self.replay_buffer.get('actions', idx)
V_new = self._calc_updated_vals(idx)
V_old = self._eval_critic(s, g)
adv = V_new - V_old
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv}
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
self.actor_solver.update(grads)
return loss
def _calc_updated_vals(self, idx):
r = self.replay_buffer.get('rewards', idx)
if self.discount == 0:
new_V = r
else:
next_idx = self.replay_buffer.get_next_idx(idx)
s_next = self.replay_buffer.get('states', next_idx)
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
is_end = self.replay_buffer.is_path_end(idx)
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
is_fail = np.logical_and(is_end, is_fail)
is_succ = np.logical_and(is_end, is_succ)
V_next = self._eval_critic(s_next, g_next)
V_next[is_fail] = self.val_fail
V_next[is_succ] = self.val_succ
new_V = r + self.discount * V_next
return new_V
def _calc_action_logp(self, norm_action_deltas):
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
stdev = self.exp_params_curr.noise
assert stdev > 0
a_size = self.get_action_size()
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
logp += -0.5 * a_size * np.log(2 * np.pi)
logp += -a_size * np.log(stdev)
return logp
def _log_val(self, s, g):
val = self._eval_critic(s, g)
norm_val = self.val_norm.normalize(val)
self.world.env.log_val(self.id, norm_val[0])
return
def _build_replay_buffer(self, buffer_size):
super()._build_replay_buffer(buffer_size)
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
return

View File

@@ -10,359 +10,374 @@ from pybullet_utils.logger import Logger
import pybullet_utils.mpi_util as MPIUtil
import pybullet_utils.math_util as MathUtil
from pybullet_envs.deep_mimic.env.env import Env
'''
Proximal Policy Optimization Agent
'''
class PPOAgent(PGAgent):
NAME = "PPO"
EPOCHS_KEY = "Epochs"
BATCH_SIZE_KEY = "BatchSize"
RATIO_CLIP_KEY = "RatioClip"
NORM_ADV_CLIP_KEY = "NormAdvClip"
TD_LAMBDA_KEY = "TDLambda"
TAR_CLIP_FRAC = "TarClipFrac"
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
NAME = "PPO"
EPOCHS_KEY = "Epochs"
BATCH_SIZE_KEY = "BatchSize"
RATIO_CLIP_KEY = "RatioClip"
NORM_ADV_CLIP_KEY = "NormAdvClip"
TD_LAMBDA_KEY = "TDLambda"
TAR_CLIP_FRAC = "TarClipFrac"
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
def __init__(self, world, id, json_data):
super().__init__(world, id, json_data)
return
def __init__(self, world, id, json_data):
super().__init__(world, id, json_data)
return
def _load_params(self, json_data):
super()._load_params(json_data)
def _load_params(self, json_data):
super()._load_params(json_data)
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
self.batch_size = 1024 if (
self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
self.ratio_clip = 0.2 if (
self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
self.norm_adv_clip = 5 if (
self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
self.td_lambda = 0.95 if (
self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
self.tar_clip_frac = -1 if (
self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
self.actor_stepsize_decay = 0.5 if (
self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
num_procs = MPIUtil.get_num_procs()
local_batch_size = int(self.batch_size / num_procs)
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
assert(self.replay_buffer_size > min_replay_size)
num_procs = MPIUtil.get_num_procs()
local_batch_size = int(self.batch_size / num_procs)
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
assert (self.replay_buffer_size > min_replay_size)
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
return
return
def _build_nets(self, json_data):
assert self.ACTOR_NET_KEY in json_data
assert self.CRITIC_NET_KEY in json_data
def _build_nets(self, json_data):
assert self.ACTOR_NET_KEY in json_data
assert self.CRITIC_NET_KEY in json_data
actor_net_name = json_data[self.ACTOR_NET_KEY]
critic_net_name = json_data[self.CRITIC_NET_KEY]
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
actor_net_name = json_data[self.ACTOR_NET_KEY]
critic_net_name = json_data[self.CRITIC_NET_KEY]
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
s_size = self.get_state_size()
g_size = self.get_goal_size()
a_size = self.get_action_size()
s_size = self.get_state_size()
g_size = self.get_goal_size()
a_size = self.get_action_size()
# setup input tensors
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
# setup input tensors
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
self.g_tf = tf.placeholder(tf.float32,
shape=([None, g_size] if self.has_goal() else None),
name="g")
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
with tf.variable_scope('main'):
with tf.variable_scope('actor'):
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
with tf.variable_scope('critic'):
self.critic_tf = self._build_net_critic(critic_net_name)
if (self.a_mean_tf != None):
Logger.print2('Built actor net: ' + actor_net_name)
with tf.variable_scope('main'):
with tf.variable_scope('actor'):
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
with tf.variable_scope('critic'):
self.critic_tf = self._build_net_critic(critic_net_name)
if (self.critic_tf != None):
Logger.print2('Built critic net: ' + critic_net_name)
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
if (self.a_mean_tf != None):
Logger.print2('Built actor net: ' + actor_net_name)
return
if (self.critic_tf != None):
Logger.print2('Built critic net: ' + critic_net_name)
def _build_losses(self, json_data):
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf,
mean_tf=None,
std_tf=self.norm_a_std_tf)
if (critic_weight_decay != 0):
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
return
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
actor_loss0 = self.adv_tf * ratio_tf
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
def _build_losses(self, json_data):
actor_weight_decay = 0 if (
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
critic_weight_decay = 0 if (
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
self.actor_loss_tf += a_bound_loss
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
self.critic_tf)
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
if (actor_weight_decay != 0):
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
# for debugging
self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
if (critic_weight_decay != 0):
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
return
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
def _build_solvers(self, json_data):
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
critic_vars = self._tf_vars('main/critic')
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf,
self.norm_a_std_tf)
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
actor_loss0 = self.adv_tf * ratio_tf
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip,
1 + self.ratio_clip)
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
self.actor_loss_tf += a_bound_loss
actor_vars = self._tf_vars('main/actor')
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
return
if (actor_weight_decay != 0):
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
def _decide_action(self, s, g):
with self.sess.as_default(), self.graph.as_default():
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
#print("_decide_action._exp_action=",self._exp_action)
a, logp = self._eval_actor(s, g, self._exp_action)
return a[0], logp[0]
# for debugging
self.clip_frac_tf = tf.reduce_mean(
tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
def _eval_actor(self, s, g, enable_exp):
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
feed = {
self.s_tf : s,
self.g_tf : g,
self.exp_mask_tf: np.array([1 if enable_exp else 0])
}
return
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
return a, logp
def _build_solvers(self, json_data):
actor_stepsize = 0.001 if (
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
actor_momentum = 0.9 if (
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
critic_stepsize = 0.01 if (
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
critic_momentum = 0.9 if (
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
def _train_step(self):
adv_eps = 1e-5
critic_vars = self._tf_vars('main/critic')
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
momentum=critic_momentum)
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
start_idx = self.replay_buffer.buffer_tail
end_idx = self.replay_buffer.buffer_head
assert(start_idx == 0)
assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
assert(start_idx < end_idx)
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32,
name='actor_stepsize',
initializer=actor_stepsize,
trainable=False)
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
idx = np.array(list(range(start_idx, end_idx)))
end_mask = self.replay_buffer.is_path_end(idx)
end_mask = np.logical_not(end_mask)
vals = self._compute_batch_vals(start_idx, end_idx)
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
actor_vars = self._tf_vars('main/actor')
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf,
momentum=actor_momentum)
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
valid_idx = idx[end_mask]
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
num_valid_idx = valid_idx.shape[0]
num_exp_idx = exp_idx.shape[0]
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
local_sample_count = valid_idx.size
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
new_vals = np.clip(new_vals, self.val_min, self.val_max)
return
adv_mean = np.mean(adv)
adv_std = np.std(adv)
adv = (adv - adv_mean) / (adv_std + adv_eps)
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
def _decide_action(self, s, g):
with self.sess.as_default(), self.graph.as_default():
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(
self.exp_params_curr.rate)
#print("_decide_action._exp_action=",self._exp_action)
a, logp = self._eval_actor(s, g, self._exp_action)
return a[0], logp[0]
critic_loss = 0
actor_loss = 0
actor_clip_frac = 0
def _eval_actor(self, s, g, enable_exp):
s = np.reshape(s, [-1, self.get_state_size()])
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
for e in range(self.epochs):
np.random.shuffle(valid_idx)
np.random.shuffle(exp_idx)
feed = {self.s_tf: s, self.g_tf: g, self.exp_mask_tf: np.array([1 if enable_exp else 0])}
for b in range(mini_batches):
batch_idx_beg = b * self._local_mini_batch_size
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
return a, logp
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
actor_batch = critic_batch.copy()
critic_batch = np.mod(critic_batch, num_valid_idx)
actor_batch = np.mod(actor_batch, num_exp_idx)
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
def _train_step(self):
adv_eps = 1e-5
critic_batch = valid_idx[critic_batch]
actor_batch = exp_idx[actor_batch]
critic_batch_vals = new_vals[critic_batch]
actor_batch_adv = adv[actor_batch[:,1]]
start_idx = self.replay_buffer.buffer_tail
end_idx = self.replay_buffer.buffer_head
assert (start_idx == 0)
assert (self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size
) # must avoid overflow
assert (start_idx < end_idx)
critic_s = self.replay_buffer.get('states', critic_batch)
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
idx = np.array(list(range(start_idx, end_idx)))
end_mask = self.replay_buffer.is_path_end(idx)
end_mask = np.logical_not(end_mask)
actor_s = self.replay_buffer.get("states", actor_batch[:,0])
actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
critic_loss += curr_critic_loss
actor_loss += np.abs(curr_actor_loss)
actor_clip_frac += curr_actor_clip_frac
vals = self._compute_batch_vals(start_idx, end_idx)
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
if (shuffle_actor):
np.random.shuffle(exp_idx)
valid_idx = idx[end_mask]
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
num_valid_idx = valid_idx.shape[0]
num_exp_idx = exp_idx.shape[0]
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
total_batches = mini_batches * self.epochs
critic_loss /= total_batches
actor_loss /= total_batches
actor_clip_frac /= total_batches
local_sample_count = valid_idx.size
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
critic_loss = MPIUtil.reduce_avg(critic_loss)
actor_loss = MPIUtil.reduce_avg(actor_loss)
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
adv = new_vals[exp_idx[:, 0]] - vals[exp_idx[:, 0]]
new_vals = np.clip(new_vals, self.val_min, self.val_max)
critic_stepsize = self.critic_solver.get_stepsize()
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
adv_mean = np.mean(adv)
adv_std = np.std(adv)
adv = (adv - adv_mean) / (adv_std + adv_eps)
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
self.logger.log_tabular('Critic_Loss', critic_loss)
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
self.logger.log_tabular('Actor_Loss', actor_loss)
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
self.logger.log_tabular('Adv_Mean', adv_mean)
self.logger.log_tabular('Adv_Std', adv_std)
critic_loss = 0
actor_loss = 0
actor_clip_frac = 0
self.replay_buffer.clear()
for e in range(self.epochs):
np.random.shuffle(valid_idx)
np.random.shuffle(exp_idx)
return
for b in range(mini_batches):
batch_idx_beg = b * self._local_mini_batch_size
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
def _get_iters_per_update(self):
return 1
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
actor_batch = critic_batch.copy()
critic_batch = np.mod(critic_batch, num_valid_idx)
actor_batch = np.mod(actor_batch, num_exp_idx)
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
def _valid_train_step(self):
samples = self.replay_buffer.get_current_size()
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
global_sample_count = int(MPIUtil.reduce_sum(samples))
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
critic_batch = valid_idx[critic_batch]
actor_batch = exp_idx[actor_batch]
critic_batch_vals = new_vals[critic_batch]
actor_batch_adv = adv[actor_batch[:, 1]]
def _compute_batch_vals(self, start_idx, end_idx):
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
idx = np.array(list(range(start_idx, end_idx)))
is_end = self.replay_buffer.is_path_end(idx)
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
is_fail = np.logical_and(is_end, is_fail)
is_succ = np.logical_and(is_end, is_succ)
critic_s = self.replay_buffer.get('states', critic_batch)
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
vals = self._eval_critic(states, goals)
vals[is_fail] = self.val_fail
vals[is_succ] = self.val_succ
actor_s = self.replay_buffer.get("states", actor_batch[:, 0])
actor_g = self.replay_buffer.get("goals", actor_batch[:, 0]) if self.has_goal() else None
actor_a = self.replay_buffer.get("actions", actor_batch[:, 0])
actor_logp = self.replay_buffer.get("logps", actor_batch[:, 0])
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a,
actor_logp, actor_batch_adv)
return vals
critic_loss += curr_critic_loss
actor_loss += np.abs(curr_actor_loss)
actor_clip_frac += curr_actor_clip_frac
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
if (shuffle_actor):
np.random.shuffle(exp_idx)
if self.discount == 0:
new_vals = rewards.copy()
total_batches = mini_batches * self.epochs
critic_loss /= total_batches
actor_loss /= total_batches
actor_clip_frac /= total_batches
critic_loss = MPIUtil.reduce_avg(critic_loss)
actor_loss = MPIUtil.reduce_avg(actor_loss)
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
critic_stepsize = self.critic_solver.get_stepsize()
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
self.logger.log_tabular('Critic_Loss', critic_loss)
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
self.logger.log_tabular('Actor_Loss', actor_loss)
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
self.logger.log_tabular('Adv_Mean', adv_mean)
self.logger.log_tabular('Adv_Std', adv_std)
self.replay_buffer.clear()
return
def _get_iters_per_update(self):
return 1
def _valid_train_step(self):
samples = self.replay_buffer.get_current_size()
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
global_sample_count = int(MPIUtil.reduce_sum(samples))
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
def _compute_batch_vals(self, start_idx, end_idx):
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
idx = np.array(list(range(start_idx, end_idx)))
is_end = self.replay_buffer.is_path_end(idx)
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
is_fail = np.logical_and(is_end, is_fail)
is_succ = np.logical_and(is_end, is_succ)
vals = self._eval_critic(states, goals)
vals[is_fail] = self.val_fail
vals[is_succ] = self.val_succ
return vals
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
if self.discount == 0:
new_vals = rewards.copy()
else:
new_vals = np.zeros_like(val_buffer)
curr_idx = start_idx
while curr_idx < end_idx:
idx0 = curr_idx - start_idx
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
r = rewards[idx0:idx1]
v = val_buffer[idx0:(idx1 + 1)]
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
curr_idx = idx1 + start_idx + 1
return new_vals
def _update_critic(self, s, g, tar_vals):
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_vals}
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
self.critic_solver.update(grads)
return loss
def _update_actor(self, s, g, a, logp, adv):
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv, self.old_logp_tf: logp}
loss, grads, clip_frac = self.sess.run(
[self.actor_loss_tf, self.actor_grad_tf, self.clip_frac_tf], feed)
self.actor_solver.update(grads)
return loss, clip_frac
def update_actor_stepsize(self, clip_frac):
clip_tol = 1.5
step_scale = 2
max_stepsize = 1e-2
min_stepsize = 1e-8
warmup_iters = 5
actor_stepsize = self.actor_solver.get_stepsize()
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
min_clip = self.tar_clip_frac / clip_tol
max_clip = self.tar_clip_frac * clip_tol
under_tol = clip_frac < min_clip
over_tol = clip_frac > max_clip
if (over_tol or under_tol):
if (over_tol):
actor_stepsize *= self.actor_stepsize_decay
else:
new_vals = np.zeros_like(val_buffer)
actor_stepsize /= self.actor_stepsize_decay
curr_idx = start_idx
while curr_idx < end_idx:
idx0 = curr_idx - start_idx
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
r = rewards[idx0:idx1]
v = val_buffer[idx0:(idx1 + 1)]
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
self.set_actor_stepsize(actor_stepsize)
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
curr_idx = idx1 + start_idx + 1
return new_vals
return actor_stepsize
def _update_critic(self, s, g, tar_vals):
feed = {
self.s_tf: s,
self.g_tf: g,
self.tar_val_tf: tar_vals
}
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
self.critic_solver.update(grads)
return loss
def _update_actor(self, s, g, a, logp, adv):
feed = {
self.s_tf: s,
self.g_tf: g,
self.a_tf: a,
self.adv_tf: adv,
self.old_logp_tf: logp
}
loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
self.clip_frac_tf], feed)
self.actor_solver.update(grads)
return loss, clip_frac
def update_actor_stepsize(self, clip_frac):
clip_tol = 1.5
step_scale = 2
max_stepsize = 1e-2
min_stepsize = 1e-8
warmup_iters = 5
actor_stepsize = self.actor_solver.get_stepsize()
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
min_clip = self.tar_clip_frac / clip_tol
max_clip = self.tar_clip_frac * clip_tol
under_tol = clip_frac < min_clip
over_tol = clip_frac > max_clip
if (over_tol or under_tol):
if (over_tol):
actor_stepsize *= self.actor_stepsize_decay
else:
actor_stepsize /= self.actor_stepsize_decay
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
self.set_actor_stepsize(actor_stepsize)
return actor_stepsize
def set_actor_stepsize(self, stepsize):
feed = {
self._actor_stepsize_ph: stepsize,
}
self.sess.run(self._actor_stepsize_update_op, feed)
return
def set_actor_stepsize(self, stepsize):
feed = {
self._actor_stepsize_ph: stepsize,
}
self.sess.run(self._actor_stepsize_update_op, feed)
return

View File

@@ -5,347 +5,353 @@ import inspect as inspect
from pybullet_envs.deep_mimic.env.env import Env
import pybullet_utils.math_util as MathUtil
class ReplayBuffer(object):
TERMINATE_KEY = 'terminate'
PATH_START_KEY = 'path_start'
PATH_END_KEY = 'path_end'
TERMINATE_KEY = 'terminate'
PATH_START_KEY = 'path_start'
PATH_END_KEY = 'path_end'
def __init__(self, buffer_size):
assert buffer_size > 0
def __init__(self, buffer_size):
assert buffer_size > 0
self.buffer_size = buffer_size
self.total_count = 0
self.buffer_head = 0
self.buffer_tail = MathUtil.INVALID_IDX
self.num_paths = 0
self._sample_buffers = dict()
self.buffers = None
self.buffer_size = buffer_size
self.total_count = 0
self.buffer_head = 0
self.buffer_tail = MathUtil.INVALID_IDX
self.num_paths = 0
self._sample_buffers = dict()
self.buffers = None
self.clear()
return
self.clear()
return
def sample(self, n):
curr_size = self.get_current_size()
assert curr_size > 0
def sample(self, n):
curr_size = self.get_current_size()
assert curr_size > 0
idx = np.empty(n, dtype=int)
# makes sure that the end states are not sampled
for i in range(n):
while True:
curr_idx = np.random.randint(0, curr_size, size=1)[0]
curr_idx += self.buffer_tail
curr_idx = np.mod(curr_idx, self.buffer_size)
idx = np.empty(n, dtype=int)
# makes sure that the end states are not sampled
for i in range(n):
while True:
curr_idx = np.random.randint(0, curr_size, size=1)[0]
curr_idx += self.buffer_tail
curr_idx = np.mod(curr_idx, self.buffer_size)
if not self.is_path_end(curr_idx):
break
idx[i] = curr_idx
if not self.is_path_end(curr_idx):
break
idx[i] = curr_idx
return idx
return idx
def sample_filtered(self, n, key):
assert key in self._sample_buffers
curr_buffer = self._sample_buffers[key]
idx = curr_buffer.sample(n)
return idx
def sample_filtered(self, n, key):
assert key in self._sample_buffers
curr_buffer = self._sample_buffers[key]
idx = curr_buffer.sample(n)
return idx
def count_filtered(self, key):
curr_buffer = self._sample_buffers[key]
return curr_buffer.count
def count_filtered(self, key):
curr_buffer = self._sample_buffers[key]
return curr_buffer.count
def get(self, key, idx):
return self.buffers[key][idx]
def get(self, key, idx):
return self.buffers[key][idx]
def get_all(self, key):
return self.buffers[key]
def get_all(self, key):
return self.buffers[key]
def get_idx_filtered(self, key):
assert key in self._sample_buffers
curr_buffer = self._sample_buffers[key]
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
return idx
def get_path_start(self, idx):
return self.buffers[self.PATH_START_KEY][idx]
def get_idx_filtered(self, key):
assert key in self._sample_buffers
curr_buffer = self._sample_buffers[key]
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
return idx
def get_path_end(self, idx):
return self.buffers[self.PATH_END_KEY][idx]
def get_path_start(self, idx):
return self.buffers[self.PATH_START_KEY][idx]
def get_pathlen(self, idx):
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
if not is_array:
idx = [idx]
def get_path_end(self, idx):
return self.buffers[self.PATH_END_KEY][idx]
n = len(idx)
start_idx = self.get_path_start(idx)
end_idx = self.get_path_end(idx)
pathlen = np.empty(n, dtype=int)
def get_pathlen(self, idx):
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
if not is_array:
idx = [idx]
for i in range(n):
curr_start = start_idx[i]
curr_end = end_idx[i]
if curr_start < curr_end:
curr_len = curr_end - curr_start
else:
curr_len = self.buffer_size - curr_start + curr_end
pathlen[i] = curr_len
n = len(idx)
start_idx = self.get_path_start(idx)
end_idx = self.get_path_end(idx)
pathlen = np.empty(n, dtype=int)
if not is_array:
pathlen = pathlen[0]
for i in range(n):
curr_start = start_idx[i]
curr_end = end_idx[i]
if curr_start < curr_end:
curr_len = curr_end - curr_start
else:
curr_len = self.buffer_size - curr_start + curr_end
pathlen[i] = curr_len
return pathlen
if not is_array:
pathlen = pathlen[0]
def is_valid_path(self, idx):
start_idx = self.get_path_start(idx)
valid = start_idx != MathUtil.INVALID_IDX
return valid
return pathlen
def store(self, path):
start_idx = MathUtil.INVALID_IDX
n = path.pathlength()
if (n > 0):
assert path.is_valid()
def is_valid_path(self, idx):
start_idx = self.get_path_start(idx)
valid = start_idx != MathUtil.INVALID_IDX
return valid
if path.check_vals():
if self.buffers is None:
self._init_buffers(path)
def store(self, path):
start_idx = MathUtil.INVALID_IDX
n = path.pathlength()
idx = self._request_idx(n + 1)
self._store_path(path, idx)
self._add_sample_buffers(idx)
if (n > 0):
assert path.is_valid()
self.num_paths += 1
self.total_count += n + 1
start_idx = idx[0]
else:
Logger.print2('Invalid path data value detected')
return start_idx
if path.check_vals():
if self.buffers is None:
self._init_buffers(path)
def clear(self):
self.buffer_head = 0
self.buffer_tail = MathUtil.INVALID_IDX
self.num_paths = 0
idx = self._request_idx(n + 1)
self._store_path(path, idx)
self._add_sample_buffers(idx)
for key in self._sample_buffers:
self._sample_buffers[key].clear()
return
self.num_paths += 1
self.total_count += n + 1
start_idx = idx[0]
else:
Logger.print2('Invalid path data value detected')
def get_next_idx(self, idx):
next_idx = np.mod(idx + 1, self.buffer_size)
return next_idx
return start_idx
def is_terminal_state(self, idx):
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
terminate = terminate_flags != Env.Terminate.Null.value
is_end = self.is_path_end(idx)
terminal_state = np.logical_and(terminate, is_end)
return terminal_state
def clear(self):
self.buffer_head = 0
self.buffer_tail = MathUtil.INVALID_IDX
self.num_paths = 0
def check_terminal_flag(self, idx, flag):
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
terminate = terminate_flags == flag.value
return terminate
for key in self._sample_buffers:
self._sample_buffers[key].clear()
return
def is_path_end(self, idx):
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
return is_end
def get_next_idx(self, idx):
next_idx = np.mod(idx + 1, self.buffer_size)
return next_idx
def add_filter_key(self, key):
assert self.get_current_size() == 0
if key not in self._sample_buffers:
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
return
def is_terminal_state(self, idx):
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
terminate = terminate_flags != Env.Terminate.Null.value
is_end = self.is_path_end(idx)
terminal_state = np.logical_and(terminate, is_end)
return terminal_state
def get_current_size(self):
if self.buffer_tail == MathUtil.INVALID_IDX:
return 0
elif self.buffer_tail < self.buffer_head:
return self.buffer_head - self.buffer_tail
def check_terminal_flag(self, idx, flag):
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
terminate = terminate_flags == flag.value
return terminate
def is_path_end(self, idx):
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
return is_end
def add_filter_key(self, key):
assert self.get_current_size() == 0
if key not in self._sample_buffers:
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
return
def get_current_size(self):
if self.buffer_tail == MathUtil.INVALID_IDX:
return 0
elif self.buffer_tail < self.buffer_head:
return self.buffer_head - self.buffer_tail
else:
return self.buffer_size - self.buffer_tail + self.buffer_head
def _check_flags(self, key, flags):
return (flags & key) == key
def _add_sample_buffers(self, idx):
flags = self.buffers['flags']
for key in self._sample_buffers:
curr_buffer = self._sample_buffers[key]
filter_idx = [
i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))
]
curr_buffer.add(filter_idx)
return
def _free_sample_buffers(self, idx):
for key in self._sample_buffers:
curr_buffer = self._sample_buffers[key]
curr_buffer.free(idx)
return
def _init_buffers(self, path):
self.buffers = dict()
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
for key in dir(path):
val = getattr(path, key)
if not key.startswith('__') and not inspect.ismethod(val):
if key == self.TERMINATE_KEY:
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
else:
return self.buffer_size - self.buffer_tail + self.buffer_head
val_type = type(val[0])
is_array = val_type == np.ndarray
if is_array:
shape = [self.buffer_size, val[0].shape[0]]
dtype = val[0].dtype
else:
shape = [self.buffer_size]
dtype = val_type
def _check_flags(self, key, flags):
return (flags & key) == key
self.buffers[key] = np.zeros(shape, dtype=dtype)
return
def _add_sample_buffers(self, idx):
flags = self.buffers['flags']
for key in self._sample_buffers:
curr_buffer = self._sample_buffers[key]
filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
curr_buffer.add(filter_idx)
return
def _request_idx(self, n):
assert n + 1 < self.buffer_size # bad things can happen if path is too long
def _free_sample_buffers(self, idx):
for key in self._sample_buffers:
curr_buffer = self._sample_buffers[key]
curr_buffer.free(idx)
return
remainder = n
idx = []
def _init_buffers(self, path):
self.buffers = dict()
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
start_idx = self.buffer_head
while remainder > 0:
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
remainder -= (end_idx - start_idx)
for key in dir(path):
val = getattr(path, key)
if not key.startswith('__') and not inspect.ismethod(val):
if key == self.TERMINATE_KEY:
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
else:
val_type = type(val[0])
is_array = val_type == np.ndarray
if is_array:
shape = [self.buffer_size, val[0].shape[0]]
dtype = val[0].dtype
else:
shape = [self.buffer_size]
dtype = val_type
self.buffers[key] = np.zeros(shape, dtype=dtype)
return
free_idx = list(range(start_idx, end_idx))
self._free_idx(free_idx)
idx += free_idx
start_idx = 0
def _request_idx(self, n):
assert n + 1 < self.buffer_size # bad things can happen if path is too long
self.buffer_head = (self.buffer_head + n) % self.buffer_size
return idx
remainder = n
idx = []
def _free_idx(self, idx):
assert (idx[0] <= idx[-1])
n = len(idx)
if self.buffer_tail != MathUtil.INVALID_IDX:
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or
idx[-1] >= self.buffer_tail)
start_idx = self.buffer_head
while remainder > 0:
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
remainder -= (end_idx - start_idx)
if update_tail:
i = 0
while i < n:
curr_idx = idx[i]
if self.is_valid_path(curr_idx):
start_idx = self.get_path_start(curr_idx)
end_idx = self.get_path_end(curr_idx)
pathlen = self.get_pathlen(curr_idx)
free_idx = list(range(start_idx, end_idx))
self._free_idx(free_idx)
idx += free_idx
start_idx = 0
if start_idx < end_idx:
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
else:
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
self._free_sample_buffers(list(range(0, end_idx + 1)))
self.buffer_head = (self.buffer_head + n) % self.buffer_size
return idx
self.num_paths -= 1
i += pathlen + 1
self.buffer_tail = (end_idx + 1) % self.buffer_size
else:
i += 1
else:
self.buffer_tail = idx[0]
return
def _free_idx(self, idx):
assert(idx[0] <= idx[-1])
n = len(idx)
if self.buffer_tail != MathUtil.INVALID_IDX:
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
if update_tail:
i = 0
while i < n:
curr_idx = idx[i]
if self.is_valid_path(curr_idx):
start_idx = self.get_path_start(curr_idx)
end_idx = self.get_path_end(curr_idx)
pathlen = self.get_pathlen(curr_idx)
def _store_path(self, path, idx):
n = path.pathlength()
for key, data in self.buffers.items():
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
val = getattr(path, key)
val_len = len(val)
assert val_len == n or val_len == n + 1
data[idx[:val_len]] = val
if start_idx < end_idx:
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
else:
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
self._free_sample_buffers(list(range(0, end_idx + 1)))
self.num_paths -= 1
i += pathlen + 1
self.buffer_tail = (end_idx + 1) % self.buffer_size;
else:
i += 1
else:
self.buffer_tail = idx[0]
return
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
self.buffers[self.PATH_START_KEY][idx] = idx[0]
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
return
def _store_path(self, path, idx):
n = path.pathlength()
for key, data in self.buffers.items():
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
val = getattr(path, key)
val_len = len(val)
assert val_len == n or val_len == n + 1
data[idx[:val_len]] = val
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
self.buffers[self.PATH_START_KEY][idx] = idx[0]
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
return
class SampleBuffer(object):
def __init__(self, size):
self.idx_to_slot = np.empty(shape=[size], dtype=int)
self.slot_to_idx = np.empty(shape=[size], dtype=int)
self.count = 0
self.clear()
return
def clear(self):
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
self.count = 0
return
def is_valid(self, idx):
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
def __init__(self, size):
self.idx_to_slot = np.empty(shape=[size], dtype=int)
self.slot_to_idx = np.empty(shape=[size], dtype=int)
self.count = 0
self.clear()
return
def get_size(self):
return self.idx_to_slot.shape[0]
def clear(self):
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
self.count = 0
return
def add(self, idx):
for i in idx:
if not self.is_valid(i):
new_slot = self.count
assert new_slot >= 0
def is_valid(self, idx):
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
self.idx_to_slot[i] = new_slot
self.slot_to_idx[new_slot] = i
self.count += 1
return
def get_size(self):
return self.idx_to_slot.shape[0]
def free(self, idx):
for i in idx:
if self.is_valid(i):
slot = self.idx_to_slot[i]
last_slot = self.count - 1
last_idx = self.slot_to_idx[last_slot]
def add(self, idx):
for i in idx:
if not self.is_valid(i):
new_slot = self.count
assert new_slot >= 0
self.idx_to_slot[last_idx] = slot
self.slot_to_idx[slot] = last_idx
self.idx_to_slot[i] = MathUtil.INVALID_IDX
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
self.count -= 1
return
self.idx_to_slot[i] = new_slot
self.slot_to_idx[new_slot] = i
self.count += 1
return
def sample(self, n):
if self.count > 0:
slots = np.random.randint(0, self.count, size=n)
idx = self.slot_to_idx[slots]
else:
idx = np.empty(shape=[0], dtype=int)
return idx
def free(self, idx):
for i in idx:
if self.is_valid(i):
slot = self.idx_to_slot[i]
last_slot = self.count - 1
last_idx = self.slot_to_idx[last_slot]
def check_consistency(self):
valid = True
if self.count < 0:
self.idx_to_slot[last_idx] = slot
self.slot_to_idx[slot] = last_idx
self.idx_to_slot[i] = MathUtil.INVALID_IDX
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
self.count -= 1
return
def sample(self, n):
if self.count > 0:
slots = np.random.randint(0, self.count, size=n)
idx = self.slot_to_idx[slots]
else:
idx = np.empty(shape=[0], dtype=int)
return idx
def check_consistency(self):
valid = True
if self.count < 0:
valid = False
if valid:
for i in range(self.get_size()):
if self.is_valid(i):
s = self.idx_to_slot[i]
if self.slot_to_idx[s] != i:
valid = False
break
if valid:
for i in range(self.get_size()):
if self.is_valid(i):
s = self.idx_to_slot[i]
if self.slot_to_idx[s] != i:
valid = False
break
s2i = self.slot_to_idx[i]
if s2i != MathUtil.INVALID_IDX:
i2s = self.idx_to_slot[s2i]
if i2s != i:
valid = False
break
s2i = self.slot_to_idx[i]
if s2i != MathUtil.INVALID_IDX:
i2s = self.idx_to_slot[s2i]
if i2s != i:
valid = False
break
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
valid &= count0 == count1
return valid
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
valid &= count0 == count1
return valid

View File

@@ -1,18 +1,19 @@
import numpy as np
def compute_return(rewards, gamma, td_lambda, val_t):
# computes td-lambda return of path
path_len = len(rewards)
assert len(val_t) == path_len + 1
# computes td-lambda return of path
path_len = len(rewards)
assert len(val_t) == path_len + 1
return_t = np.zeros(path_len)
last_val = rewards[-1] + gamma * val_t[-1]
return_t[-1] = last_val
return_t = np.zeros(path_len)
last_val = rewards[-1] + gamma * val_t[-1]
return_t[-1] = last_val
for i in reversed(range(0, path_len - 1)):
curr_r = rewards[i]
next_ret = return_t[i + 1]
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
return_t[i] = curr_val
return return_t
for i in reversed(range(0, path_len - 1)):
curr_r = rewards[i]
next_ret = return_t[i + 1]
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
return_t[i] = curr_val
return return_t

View File

@@ -5,139 +5,140 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
from pybullet_utils.logger import Logger
import pybullet_data
class RLWorld(object):
def __init__(self, env, arg_parser):
TFUtil.disable_gpu()
self.env = env
self.arg_parser = arg_parser
self._enable_training = True
self.train_agents = []
self.parse_args(arg_parser)
def __init__(self, env, arg_parser):
TFUtil.disable_gpu()
self.build_agents()
return
self.env = env
self.arg_parser = arg_parser
self._enable_training = True
self.train_agents = []
self.parse_args(arg_parser)
def get_enable_training(self):
return self._enable_training
def set_enable_training(self, enable):
self._enable_training = enable
for i in range(len(self.agents)):
curr_agent = self.agents[i]
if curr_agent is not None:
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
curr_agent.enable_training = self.enable_training and enable_curr_train
self.build_agents()
if (self._enable_training):
self.env.set_mode(RLAgent.Mode.TRAIN)
else:
self.env.set_mode(RLAgent.Mode.TEST)
return
return
def get_enable_training(self):
return self._enable_training
enable_training = property(get_enable_training, set_enable_training)
def parse_args(self, arg_parser):
self.train_agents = self.arg_parser.parse_bools('train_agents')
num_agents = self.env.get_num_agents()
assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
def set_enable_training(self, enable):
self._enable_training = enable
for i in range(len(self.agents)):
curr_agent = self.agents[i]
if curr_agent is not None:
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
curr_agent.enable_training = self.enable_training and enable_curr_train
return
if (self._enable_training):
self.env.set_mode(RLAgent.Mode.TRAIN)
else:
self.env.set_mode(RLAgent.Mode.TEST)
def shutdown(self):
self.env.shutdown()
return
return
def build_agents(self):
num_agents = self.env.get_num_agents()
print("num_agents=",num_agents)
self.agents = []
enable_training = property(get_enable_training, set_enable_training)
Logger.print2('')
Logger.print2('Num Agents: {:d}'.format(num_agents))
def parse_args(self, arg_parser):
self.train_agents = self.arg_parser.parse_bools('train_agents')
num_agents = self.env.get_num_agents()
assert (len(self.train_agents) == num_agents or len(self.train_agents) == 0)
agent_files = self.arg_parser.parse_strings('agent_files')
print("len(agent_files)=",len(agent_files))
assert(len(agent_files) == num_agents or len(agent_files) == 0)
return
model_files = self.arg_parser.parse_strings('model_files')
assert(len(model_files) == num_agents or len(model_files) == 0)
def shutdown(self):
self.env.shutdown()
return
output_path = self.arg_parser.parse_string('output_path')
int_output_path = self.arg_parser.parse_string('int_output_path')
def build_agents(self):
num_agents = self.env.get_num_agents()
print("num_agents=", num_agents)
self.agents = []
for i in range(num_agents):
curr_file = agent_files[i]
curr_agent = self._build_agent(i, curr_file)
Logger.print2('')
Logger.print2('Num Agents: {:d}'.format(num_agents))
if curr_agent is not None:
curr_agent.output_dir = output_path
curr_agent.int_output_dir = int_output_path
Logger.print2(str(curr_agent))
agent_files = self.arg_parser.parse_strings('agent_files')
print("len(agent_files)=", len(agent_files))
assert (len(agent_files) == num_agents or len(agent_files) == 0)
if (len(model_files) > 0):
curr_model_file = model_files[i]
if curr_model_file != 'none':
curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
model_files = self.arg_parser.parse_strings('model_files')
assert (len(model_files) == num_agents or len(model_files) == 0)
self.agents.append(curr_agent)
Logger.print2('')
output_path = self.arg_parser.parse_string('output_path')
int_output_path = self.arg_parser.parse_string('int_output_path')
self.set_enable_training(self.enable_training)
for i in range(num_agents):
curr_file = agent_files[i]
curr_agent = self._build_agent(i, curr_file)
return
if curr_agent is not None:
curr_agent.output_dir = output_path
curr_agent.int_output_dir = int_output_path
Logger.print2(str(curr_agent))
def update(self, timestep):
#print("world update!\n")
self._update_agents(timestep)
self._update_env(timestep)
return
if (len(model_files) > 0):
curr_model_file = model_files[i]
if curr_model_file != 'none':
curr_agent.load_model(pybullet_data.getDataPath() + "/" + curr_model_file)
def reset(self):
self._reset_agents()
self._reset_env()
return
self.agents.append(curr_agent)
Logger.print2('')
def end_episode(self):
self._end_episode_agents();
return
self.set_enable_training(self.enable_training)
def _update_env(self, timestep):
self.env.update(timestep)
return
return
def _update_agents(self, timestep):
#print("len(agents)=",len(self.agents))
for agent in self.agents:
if (agent is not None):
agent.update(timestep)
return
def update(self, timestep):
#print("world update!\n")
self._update_agents(timestep)
self._update_env(timestep)
return
def _reset_env(self):
self.env.reset()
return
def reset(self):
self._reset_agents()
self._reset_env()
return
def _reset_agents(self):
for agent in self.agents:
if (agent != None):
agent.reset()
return
def end_episode(self):
self._end_episode_agents()
return
def _end_episode_agents(self):
for agent in self.agents:
if (agent != None):
agent.end_episode()
return
def _update_env(self, timestep):
self.env.update(timestep)
return
def _build_agent(self, id, agent_file):
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
if (agent_file == 'none'):
agent = None
else:
agent = AgentBuilder.build_agent(self, id, agent_file)
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
return agent
def _update_agents(self, timestep):
#print("len(agents)=",len(self.agents))
for agent in self.agents:
if (agent is not None):
agent.update(timestep)
return
def _reset_env(self):
self.env.reset()
return
def _reset_agents(self):
for agent in self.agents:
if (agent != None):
agent.reset()
return
def _end_episode_agents(self):
for agent in self.agents:
if (agent != None):
agent.end_episode()
return
def _build_agent(self, id, agent_file):
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
if (agent_file == 'none'):
agent = None
else:
agent = AgentBuilder.build_agent(self, id, agent_file)
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
return agent

View File

@@ -8,96 +8,97 @@ from pybullet_utils.logger import Logger
from pybullet_envs.deep_mimic.learning.solvers.solver import Solver
class MPISolver(Solver):
CHECK_SYNC_ITERS = 1000
CHECK_SYNC_ITERS = 1000
def __init__(self, sess, optimizer, vars):
super().__init__(vars)
self.sess = sess
self.optimizer = optimizer
self._build_grad_feed(vars)
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
def __init__(self, sess, optimizer, vars):
super().__init__(vars)
self.sess = sess
self.optimizer = optimizer
self._build_grad_feed(vars)
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
self.iter = 0
grad_dim = self._calc_grad_dim()
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
return
self.iter = 0
grad_dim = self._calc_grad_dim()
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
def get_stepsize(self):
return self.optimizer._learning_rate_tensor.eval()
return
def update(self, grads=None, grad_scale=1.0):
if grads is not None:
self._flat_grad = MathUtil.flatten(grads)
else:
self._flat_grad.fill(0)
return self.update_flatgrad(self._flat_grad, grad_scale)
def get_stepsize(self):
return self.optimizer._learning_rate_tensor.eval()
def update_flatgrad(self, flat_grad, grad_scale=1.0):
if self.iter % self.CHECK_SYNC_ITERS == 0:
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
if grad_scale != 1.0:
flat_grad *= grad_scale
def update(self, grads=None, grad_scale=1.0):
if grads is not None:
self._flat_grad = MathUtil.flatten(grads)
else:
self._flat_grad.fill(0)
return self.update_flatgrad(self._flat_grad, grad_scale)
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
self._global_flat_grad /= MPIUtil.get_num_procs()
def update_flatgrad(self, flat_grad, grad_scale=1.0):
if self.iter % self.CHECK_SYNC_ITERS == 0:
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
self._load_flat_grad(self._global_flat_grad)
self.sess.run([self._update], self._grad_feed)
self.iter += 1
if grad_scale != 1.0:
flat_grad *= grad_scale
return
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
self._global_flat_grad /= MPIUtil.get_num_procs()
def sync(self):
vars = self._get_flat_vars()
MPIUtil.bcast(vars)
self._set_flat_vars(vars)
return
self._load_flat_grad(self._global_flat_grad)
self.sess.run([self._update], self._grad_feed)
self.iter += 1
def check_synced(self):
synced = True
if self._is_root():
vars = self._get_flat_vars()
MPIUtil.bcast(vars)
else:
vars_local = self._get_flat_vars()
vars_root = np.empty_like(vars_local)
MPIUtil.bcast(vars_root)
synced = (vars_local == vars_root).all()
return synced
return
def _is_root(self):
return MPIUtil.is_root_proc()
def _build_grad_feed(self, vars):
self._grad_tf_list = []
self._grad_buffers = []
for v in self.vars:
shape = v.get_shape()
grad = np.zeros(shape)
grad_tf = tf.placeholder(tf.float32, shape=shape)
self._grad_buffers.append(grad)
self._grad_tf_list.append(grad_tf)
def sync(self):
vars = self._get_flat_vars()
MPIUtil.bcast(vars)
self._set_flat_vars(vars)
return
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
return
def check_synced(self):
synced = True
if self._is_root():
vars = self._get_flat_vars()
MPIUtil.bcast(vars)
else:
vars_local = self._get_flat_vars()
vars_root = np.empty_like(vars_local)
MPIUtil.bcast(vars_root)
synced = (vars_local == vars_root).all()
return synced
def _calc_grad_dim(self):
grad_dim = 0
for grad in self._grad_buffers:
grad_dim += grad.size
return grad_dim
def _is_root(self):
return MPIUtil.is_root_proc()
def _load_flat_grad(self, flat_grad):
start = 0
for g in self._grad_buffers:
size = g.size
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
start += size
return
def _build_grad_feed(self, vars):
self._grad_tf_list = []
self._grad_buffers = []
for v in self.vars:
shape = v.get_shape()
grad = np.zeros(shape)
grad_tf = tf.placeholder(tf.float32, shape=shape)
self._grad_buffers.append(grad)
self._grad_tf_list.append(grad_tf)
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
return
def _calc_grad_dim(self):
grad_dim = 0
for grad in self._grad_buffers:
grad_dim += grad.size
return grad_dim
def _load_flat_grad(self, flat_grad):
start = 0
for g in self._grad_buffers:
size = g.size
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
start += size
return

View File

@@ -1,15 +1,17 @@
from abc import abstractmethod
import sys, abc
if sys.version_info >= (3, 4):
ABC = abc.ABC
ABC = abc.ABC
else:
ABC = abc.ABCMeta('ABC', (), {})
ABC = abc.ABCMeta('ABC', (), {})
class Solver(ABC):
def __init__(self, vars):
self.vars = vars
return
@abstractmethod
def update(self, grads):
pass
def __init__(self, vars):
self.vars = vars
return
@abstractmethod
def update(self, grads):
pass

View File

@@ -6,144 +6,148 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
from pybullet_utils.logger import Logger
from pybullet_envs.deep_mimic.learning.tf_normalizer import TFNormalizer
class TFAgent(RLAgent):
RESOURCE_SCOPE = 'resource'
SOLVER_SCOPE = 'solvers'
RESOURCE_SCOPE = 'resource'
SOLVER_SCOPE = 'solvers'
def __init__(self, world, id, json_data):
self.tf_scope = 'agent'
self.graph = tf.Graph()
self.sess = tf.Session(graph=self.graph)
def __init__(self, world, id, json_data):
self.tf_scope = 'agent'
self.graph = tf.Graph()
self.sess = tf.Session(graph=self.graph)
super().__init__(world, id, json_data)
self._build_graph(json_data)
self._init_normalizers()
return
super().__init__(world, id, json_data)
self._build_graph(json_data)
self._init_normalizers()
return
def __del__(self):
self.sess.close()
return
def __del__(self):
self.sess.close()
return
def save_model(self, out_path):
with self.sess.as_default(), self.graph.as_default():
try:
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
Logger.print2('Model saved to: ' + save_path)
except:
Logger.print2("Failed to save model to: " + save_path)
return
def save_model(self, out_path):
with self.sess.as_default(), self.graph.as_default():
try:
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
Logger.print2('Model saved to: ' + save_path)
except:
Logger.print2("Failed to save model to: " + save_path)
return
def load_model(self, in_path):
with self.sess.as_default(), self.graph.as_default():
self.saver.restore(self.sess, in_path)
self._load_normalizers()
Logger.print2('Model loaded from: ' + in_path)
return
def load_model(self, in_path):
with self.sess.as_default(), self.graph.as_default():
self.saver.restore(self.sess, in_path)
self._load_normalizers()
Logger.print2('Model loaded from: ' + in_path)
return
def _get_output_path(self):
assert(self.output_dir != '')
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
return file_path
def _get_output_path(self):
assert (self.output_dir != '')
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
return file_path
def _get_int_output_path(self):
assert(self.int_output_dir != '')
file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
return file_path
def _get_int_output_path(self):
assert (self.int_output_dir != '')
file_path = self.int_output_dir + (
'/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
return file_path
def _build_graph(self, json_data):
with self.sess.as_default(), self.graph.as_default():
with tf.variable_scope(self.tf_scope):
self._build_nets(json_data)
with tf.variable_scope(self.SOLVER_SCOPE):
self._build_losses(json_data)
self._build_solvers(json_data)
def _build_graph(self, json_data):
with self.sess.as_default(), self.graph.as_default():
with tf.variable_scope(self.tf_scope):
self._build_nets(json_data)
self._initialize_vars()
self._build_saver()
return
with tf.variable_scope(self.SOLVER_SCOPE):
self._build_losses(json_data)
self._build_solvers(json_data)
def _init_normalizers(self):
with self.sess.as_default(), self.graph.as_default():
# update normalizers to sync the tensorflow tensors
self.s_norm.update()
self.g_norm.update()
self.a_norm.update()
return
self._initialize_vars()
self._build_saver()
return
@abstractmethod
def _build_nets(self, json_data):
pass
def _init_normalizers(self):
with self.sess.as_default(), self.graph.as_default():
# update normalizers to sync the tensorflow tensors
self.s_norm.update()
self.g_norm.update()
self.a_norm.update()
return
@abstractmethod
def _build_losses(self, json_data):
pass
@abstractmethod
def _build_nets(self, json_data):
pass
@abstractmethod
def _build_solvers(self, json_data):
pass
@abstractmethod
def _build_losses(self, json_data):
pass
def _tf_vars(self, scope=''):
with self.sess.as_default(), self.graph.as_default():
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
assert len(res) > 0
return res
@abstractmethod
def _build_solvers(self, json_data):
pass
def _build_normalizers(self):
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
with tf.variable_scope(self.RESOURCE_SCOPE):
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
state_offset = -self.world.env.build_state_offset(self.id)
print("state_offset=",state_offset)
state_scale = 1 / self.world.env.build_state_scale(self.id)
print("state_scale=",state_scale)
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
1 / self.world.env.build_state_scale(self.id))
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
1 / self.world.env.build_goal_scale(self.id))
def _tf_vars(self, scope=''):
with self.sess.as_default(), self.graph.as_default():
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
assert len(res) > 0
return res
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
1 / self.world.env.build_action_scale(self.id))
return
def _build_normalizers(self):
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
with tf.variable_scope(self.RESOURCE_SCOPE):
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(),
self.world.env.build_state_norm_groups(self.id))
state_offset = -self.world.env.build_state_offset(self.id)
print("state_offset=", state_offset)
state_scale = 1 / self.world.env.build_state_scale(self.id)
print("state_scale=", state_scale)
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
1 / self.world.env.build_state_scale(self.id))
def _load_normalizers(self):
self.s_norm.load()
self.g_norm.load()
self.a_norm.load()
return
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(),
self.world.env.build_goal_norm_groups(self.id))
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
1 / self.world.env.build_goal_scale(self.id))
def _update_normalizers(self):
with self.sess.as_default(), self.graph.as_default():
super()._update_normalizers()
return
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
1 / self.world.env.build_action_scale(self.id))
return
def _initialize_vars(self):
self.sess.run(tf.global_variables_initializer())
return
def _load_normalizers(self):
self.s_norm.load()
self.g_norm.load()
self.a_norm.load()
return
def _build_saver(self):
vars = self._get_saver_vars()
self.saver = tf.train.Saver(vars, max_to_keep=0)
return
def _update_normalizers(self):
with self.sess.as_default(), self.graph.as_default():
super()._update_normalizers()
return
def _get_saver_vars(self):
with self.sess.as_default(), self.graph.as_default():
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
#vars = [v for v in vars if '/target/' not in v.name]
assert len(vars) > 0
return vars
def _weight_decay_loss(self, scope):
vars = self._tf_vars(scope)
vars_no_bias = [v for v in vars if 'bias' not in v.name]
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
return loss
def _initialize_vars(self):
self.sess.run(tf.global_variables_initializer())
return
def _train(self):
with self.sess.as_default(), self.graph.as_default():
super()._train()
return
def _build_saver(self):
vars = self._get_saver_vars()
self.saver = tf.train.Saver(vars, max_to_keep=0)
return
def _get_saver_vars(self):
with self.sess.as_default(), self.graph.as_default():
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
#vars = [v for v in vars if '/target/' not in v.name]
assert len(vars) > 0
return vars
def _weight_decay_loss(self, scope):
vars = self._tf_vars(scope)
vars_no_bias = [v for v in vars if 'bias' not in v.name]
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
return loss
def _train(self):
with self.sess.as_default(), self.graph.as_default():
super()._train()
return

View File

@@ -3,65 +3,72 @@ import copy
import tensorflow as tf
from pybullet_envs.deep_mimic.learning.normalizer import Normalizer
class TFNormalizer(Normalizer):
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
self.sess = sess
self.scope = scope
super().__init__(size, groups_ids, eps, clip)
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
self.sess = sess
self.scope = scope
super().__init__(size, groups_ids, eps, clip)
with tf.variable_scope(self.scope):
self._build_resource_tf()
return
with tf.variable_scope(self.scope):
self._build_resource_tf()
return
# initialze count when loading saved values so that things don't change to quickly during updates
def load(self):
self.count = self.count_tf.eval()[0]
self.mean = self.mean_tf.eval()
self.std = self.std_tf.eval()
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
return
# initialze count when loading saved values so that things don't change to quickly during updates
def load(self):
self.count = self.count_tf.eval()[0]
self.mean = self.mean_tf.eval()
self.std = self.std_tf.eval()
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
return
def update(self):
super().update()
self._update_resource_tf()
return
def update(self):
super().update()
self._update_resource_tf()
return
def set_mean_std(self, mean, std):
super().set_mean_std(mean, std)
self._update_resource_tf()
return
def set_mean_std(self, mean, std):
super().set_mean_std(mean, std)
self._update_resource_tf()
return
def normalize_tf(self, x):
norm_x = (x - self.mean_tf) / self.std_tf
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
return norm_x
def normalize_tf(self, x):
norm_x = (x - self.mean_tf) / self.std_tf
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
return norm_x
def unnormalize_tf(self, norm_x):
x = norm_x * self.std_tf + self.mean_tf
return x
def _build_resource_tf(self):
self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
self._update_op = tf.group(
self.count_tf.assign(self.count_ph),
self.mean_tf.assign(self.mean_ph),
self.std_tf.assign(self.std_ph)
)
return
def unnormalize_tf(self, norm_x):
x = norm_x * self.std_tf + self.mean_tf
return x
def _update_resource_tf(self):
feed = {
self.count_ph: np.array([self.count], dtype=np.int32),
self.mean_ph: self.mean,
self.std_ph: self.std
}
self.sess.run(self._update_op, feed_dict=feed)
return
def _build_resource_tf(self):
self.count_tf = tf.get_variable(dtype=tf.int32,
name='count',
initializer=np.array([self.count], dtype=np.int32),
trainable=False)
self.mean_tf = tf.get_variable(dtype=tf.float32,
name='mean',
initializer=self.mean.astype(np.float32),
trainable=False)
self.std_tf = tf.get_variable(dtype=tf.float32,
name='std',
initializer=self.std.astype(np.float32),
trainable=False)
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
self._update_op = tf.group(self.count_tf.assign(self.count_ph),
self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
return
def _update_resource_tf(self):
feed = {
self.count_ph: np.array([self.count], dtype=np.int32),
self.mean_ph: self.mean,
self.std_ph: self.std
}
self.sess.run(self._update_op, feed_dict=feed)
return

View File

@@ -4,101 +4,116 @@ import os
xavier_initializer = tf.contrib.layers.xavier_initializer()
def disable_gpu():
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
return
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
return
def var_shape(x):
out = [k.value for k in x.get_shape()]
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
return out
out = [k.value for k in x.get_shape()]
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
return out
def intprod(x):
return int(np.prod(x))
return int(np.prod(x))
def numel(x):
n = intprod(var_shape(x))
return n
n = intprod(var_shape(x))
return n
def flat_grad(loss, var_list, grad_ys=None):
grads = tf.gradients(loss, var_list, grad_ys)
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
grads = tf.gradients(loss, var_list, grad_ys)
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
curr_tf = input
for i, size in enumerate(layers_sizes):
with tf.variable_scope(str(i), reuse=reuse):
curr_tf = tf.layers.dense(inputs=curr_tf,
units=size,
kernel_initializer=xavier_initializer,
activation = activation if i < len(layers_sizes)-1 else None)
if flatten:
assert layers_sizes[-1] == 1
curr_tf = tf.reshape(curr_tf, [-1])
return curr_tf
def fc_net(input, layers_sizes, activation, reuse=None,
flatten=False): # build fully connected network
curr_tf = input
for i, size in enumerate(layers_sizes):
with tf.variable_scope(str(i), reuse=reuse):
curr_tf = tf.layers.dense(inputs=curr_tf,
units=size,
kernel_initializer=xavier_initializer,
activation=activation if i < len(layers_sizes) - 1 else None)
if flatten:
assert layers_sizes[-1] == 1
curr_tf = tf.reshape(curr_tf, [-1])
return curr_tf
def copy(sess, src, dst):
assert len(src) == len(dst)
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
return
assert len(src) == len(dst)
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
return
def flat_grad(loss, var_list):
grads = tf.gradients(loss, var_list)
return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
for (v, grad) in zip(var_list, grads)])
grads = tf.gradients(loss, var_list)
return tf.concat(axis=0,
values=[tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)])
def calc_logp_gaussian(x_tf, mean_tf, std_tf):
dim = tf.to_float(tf.shape(x_tf)[-1])
dim = tf.to_float(tf.shape(x_tf)[-1])
if mean_tf is None:
diff_tf = x_tf
else:
diff_tf = x_tf - mean_tf
if mean_tf is None:
diff_tf = x_tf
else:
diff_tf = x_tf - mean_tf
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
return logp_tf
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
return logp_tf
def calc_bound_loss(x_tf, bound_min, bound_max):
# penalty for violating bounds
violation_min = tf.minimum(x_tf - bound_min, 0)
violation_max = tf.maximum(x_tf - bound_max, 0)
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
loss = 0.5 * tf.reduce_mean(violation)
return loss
# penalty for violating bounds
violation_min = tf.minimum(x_tf - bound_min, 0)
violation_max = tf.maximum(x_tf - bound_max, 0)
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(
tf.square(violation_max), axis=-1)
loss = 0.5 * tf.reduce_mean(violation)
return loss
class SetFromFlat(object):
def __init__(self, sess, var_list, dtype=tf.float32):
assigns = []
shapes = list(map(var_shape, var_list))
total_size = np.sum([intprod(shape) for shape in shapes])
self.sess = sess
self.theta = tf.placeholder(dtype,[total_size])
start=0
assigns = []
def __init__(self, sess, var_list, dtype=tf.float32):
assigns = []
shapes = list(map(var_shape, var_list))
total_size = np.sum([intprod(shape) for shape in shapes])
for (shape,v) in zip(shapes,var_list):
size = intprod(shape)
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
start += size
self.sess = sess
self.theta = tf.placeholder(dtype, [total_size])
start = 0
assigns = []
self.op = tf.group(*assigns)
for (shape, v) in zip(shapes, var_list):
size = intprod(shape)
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start + size], shape)))
start += size
return
self.op = tf.group(*assigns)
return
def __call__(self, theta):
self.sess.run(self.op, feed_dict={self.theta: theta})
return
def __call__(self, theta):
self.sess.run(self.op, feed_dict={self.theta:theta})
return
class GetFlat(object):
def __init__(self, sess, var_list):
self.sess = sess
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
return
def __call__(self):
return self.sess.run(self.op)
def __init__(self, sess, var_list):
self.sess = sess
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
return
def __call__(self):
return self.sess.run(self.op)

View File

@@ -1,26 +1,28 @@
import numpy as np
import numpy as np
from quaternion import qrot, qinverse
def normalize_screen_coordinates(X, w, h):
assert X.shape[-1] == 2
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
return X/w*2 - [1, h/w]
def normalize_screen_coordinates(X, w, h):
assert X.shape[-1] == 2
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
return X / w * 2 - [1, h / w]
def image_coordinates(X, w, h):
assert X.shape[-1] == 2
# Reverse camera frame normalization
return (X + [1, h/w])*w/2
assert X.shape[-1] == 2
# Reverse camera frame normalization
return (X + [1, h / w]) * w / 2
def world_to_camera(X, R, t):
Rt = qinverse(R)
Q = np.tile(Rt, (*X.shape[:-1], 1))
V = X - t
return qrot(Q, V)
def camera_to_world(X, R, t):
Q = np.tile(R, (*X.shape[:-1], 1))
V = X
return qrot(Q, V) + t
Rt = qinverse(R)
Q = np.tile(Rt, (*X.shape[:-1], 1))
V = X - t
return qrot(Q, V)
def camera_to_world(X, R, t):
Q = np.tile(R, (*X.shape[:-1], 1))
V = X
return qrot(Q, V) + t

View File

@@ -3,54 +3,63 @@ from h36m_dataset import Human36mDataset
from camera import *
import numpy as np
# In[2]:
# In[2]:
joint_info = {
'joint_name':['root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle', 'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder', 'right_elbow', 'right_wrist'],
'father':[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
'side':['middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle', 'middle', 'left', 'left', 'left', 'right', 'right', 'right']
'joint_name': [
'root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle',
'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist',
'right_shoulder', 'right_elbow', 'right_wrist'
],
'father': [0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
'side': [
'middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle',
'middle', 'left', 'left', 'left', 'right', 'right', 'right'
]
}
# In[3]:
def init_fb_h36m_dataset(dataset_path):
dataset = Human36mDataset(dataset_path)
print('Preparing Facebook Human3.6M Dataset...')
for subject in dataset.subjects():
for action in dataset[subject].keys():
anim = dataset[subject][action]
positions_3d = []
for cam in anim['cameras']:
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
positions_3d.append(pos_3d)
anim['positions_3d'] = positions_3d
return dataset
dataset = Human36mDataset(dataset_path)
print('Preparing Facebook Human3.6M Dataset...')
for subject in dataset.subjects():
for action in dataset[subject].keys():
anim = dataset[subject][action]
positions_3d = []
for cam in anim['cameras']:
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
pos_3d[:, 1:] -= pos_3d[:, :
1] # Remove global offset, but keep trajectory in first position
positions_3d.append(pos_3d)
anim['positions_3d'] = positions_3d
return dataset
def pose3D_from_fb_h36m(dataset, subject, action, shift):
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
trajectory = pose_seq[:, :1]
pose_seq[:, 1:] += trajectory
# Invert camera transformation
cam = dataset.cameras()[subject][0]
pose_seq = camera_to_world(pose_seq,
R=cam['orientation'],
t=cam['translation'])
x = pose_seq[:,:,0:1]
y = pose_seq[:,:,1:2] * -1
z = pose_seq[:,:,2:3]
pose_seq = np.concatenate((x,z,y),axis=2)
# plus shift
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])])
return pose_seq
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
trajectory = pose_seq[:, :1]
pose_seq[:, 1:] += trajectory
# Invert camera transformation
cam = dataset.cameras()[subject][0]
pose_seq = camera_to_world(pose_seq, R=cam['orientation'], t=cam['translation'])
x = pose_seq[:, :, 0:1]
y = pose_seq[:, :, 1:2] * -1
z = pose_seq[:, :, 2:3]
pose_seq = np.concatenate((x, z, y), axis=2)
# plus shift
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])
])
return pose_seq
def rot_seq_to_deepmimic_json(rot_seq, loop, json_path):
to_json = {"Loop": loop, "Frames":[]}
rot_seq = np.around(rot_seq, decimals=6)
to_json["Frames"] = rot_seq.tolist()
# In[14]:
to_file = json.dumps(to_json)
file = open(json_path,"w")
file.write(to_file)
file.close()
to_json = {"Loop": loop, "Frames": []}
rot_seq = np.around(rot_seq, decimals=6)
to_json["Frames"] = rot_seq.tolist()
# In[14]:
to_file = json.dumps(to_json)
file = open(json_path, "w")
file.write(to_file)
file.close()

View File

@@ -3,11 +3,13 @@ import copy
from skeleton import Skeleton
from mocap_dataset import MocapDataset
from camera import normalize_screen_coordinates, image_coordinates
h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
h36m_skeleton = Skeleton(parents=[
-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12, 16, 17, 18, 19, 20, 19, 22, 12, 24,
25, 26, 27, 28, 27, 30
],
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
h36m_cameras_intrinsic_params = [
{
@@ -18,7 +20,7 @@ h36m_cameras_intrinsic_params = [
'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
'res_w': 1000,
'res_h': 1002,
'azimuth': 70, # Only used for visualization
'azimuth': 70, # Only used for visualization
},
{
'id': '55011271',
@@ -28,7 +30,7 @@ h36m_cameras_intrinsic_params = [
'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
'res_w': 1000,
'res_h': 1000,
'azimuth': -70, # Only used for visualization
'azimuth': -70, # Only used for visualization
},
{
'id': '58860488',
@@ -38,7 +40,7 @@ h36m_cameras_intrinsic_params = [
'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
'res_w': 1000,
'res_h': 1000,
'azimuth': 110, # Only used for visualization
'azimuth': 110, # Only used for visualization
},
{
'id': '60457274',
@@ -48,26 +50,34 @@ h36m_cameras_intrinsic_params = [
'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
'res_w': 1000,
'res_h': 1002,
'azimuth': -110, # Only used for visualization
'azimuth': -110, # Only used for visualization
},
]
h36m_cameras_extrinsic_params = {
'S1': [
{
'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
'orientation': [
0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088
],
'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
},
{
'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
'orientation': [
0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205
],
'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
},
{
'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
'orientation': [
0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696
],
'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
},
{
'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
'orientation': [
0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435
],
'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
},
],
@@ -91,158 +101,206 @@ h36m_cameras_extrinsic_params = {
],
'S5': [
{
'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
'orientation': [
0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332
],
'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
},
{
'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
'orientation': [
0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915
],
'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
},
{
'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
'orientation': [
0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576
],
'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
},
{
'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
'orientation': [
0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092
],
'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
},
],
'S6': [
{
'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
'orientation': [
0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938
],
'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
},
{
'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
'orientation': [
0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428
],
'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
},
{
'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
'orientation': [
0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334
],
'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
},
{
'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
'orientation': [
0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802
],
'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
},
],
'S7': [
{
'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
'orientation': [
0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778
],
'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
},
{
'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
'orientation': [
0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508
],
'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
},
{
'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
'orientation': [
0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278
],
'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
},
{
'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
'orientation': [
0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523
],
'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
},
],
'S8': [
{
'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
'orientation': [
0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773
],
'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
},
{
'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
'orientation': [
0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615
],
'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
},
{
'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
'orientation': [
0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755
],
'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
},
{
'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
'orientation': [
0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708
],
'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
},
],
'S9': [
{
'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
'orientation': [
0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243
],
'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
},
{
'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
'orientation': [
0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801
],
'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
},
{
'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
'orientation': [
0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915
],
'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
},
{
'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
'orientation': [
0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822
],
'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
},
],
'S11': [
{
'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
'orientation': [
0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467
],
'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
},
{
'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
'orientation': [
0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085
],
'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
},
{
'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
'orientation': [
0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407
],
'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
},
{
'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
'orientation': [
0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809
],
'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
},
],
}
class Human36mDataset(MocapDataset):
def __init__(self, path, remove_static_joints=True):
super().__init__(fps=50, skeleton=h36m_skeleton)
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
for cameras in self._cameras.values():
for i, cam in enumerate(cameras):
cam.update(h36m_cameras_intrinsic_params[i])
for k, v in cam.items():
if k not in ['id', 'res_w', 'res_h']:
cam[k] = np.array(v, dtype='float32')
# Normalize camera frame
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype('float32')
cam['focal_length'] = cam['focal_length']/cam['res_w']*2
if 'translation' in cam:
cam['translation'] = cam['translation']/1000 # mm to meters
# Add intrinsic parameters vector
cam['intrinsic'] = np.concatenate((cam['focal_length'],
cam['center'],
cam['radial_distortion'],
cam['tangential_distortion']))
# Load serialized dataset
data = np.load(path)['positions_3d'].item()
self._data = {}
for subject, actions in data.items():
self._data[subject] = {}
for action_name, positions in actions.items():
self._data[subject][action_name] = {
'positions': positions,
'cameras': self._cameras[subject],
}
if remove_static_joints:
# Bring the skeleton to 17 joints instead of the original 32
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
# Rewire shoulders to the correct parents
self._skeleton._parents[11] = 8
self._skeleton._parents[14] = 8
def supports_semi_supervised(self):
return True
def __init__(self, path, remove_static_joints=True):
super().__init__(fps=50, skeleton=h36m_skeleton)
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
for cameras in self._cameras.values():
for i, cam in enumerate(cameras):
cam.update(h36m_cameras_intrinsic_params[i])
for k, v in cam.items():
if k not in ['id', 'res_w', 'res_h']:
cam[k] = np.array(v, dtype='float32')
# Normalize camera frame
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'],
h=cam['res_h']).astype('float32')
cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
if 'translation' in cam:
cam['translation'] = cam['translation'] / 1000 # mm to meters
# Add intrinsic parameters vector
cam['intrinsic'] = np.concatenate((cam['focal_length'], cam['center'],
cam['radial_distortion'], cam['tangential_distortion']))
# Load serialized dataset
data = np.load(path)['positions_3d'].item()
self._data = {}
for subject, actions in data.items():
self._data[subject] = {}
for action_name, positions in actions.items():
self._data[subject][action_name] = {
'positions': positions,
'cameras': self._cameras[subject],
}
if remove_static_joints:
# Bring the skeleton to 17 joints instead of the original 32
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
# Rewire shoulders to the correct parents
self._skeleton._parents[11] = 8
self._skeleton._parents[14] = 8
def supports_semi_supervised(self):
return True

View File

@@ -9,142 +9,138 @@ from transformation import *
from pyquaternion import Quaternion
def get_angle(vec1, vec2):
cos_theta = np.dot(vec1, vec2)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))
return acos(cos_theta)
cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
return acos(cos_theta)
def get_quaternion(ox, oy, oz, x, y, z):
# given transformed axis in x-y-z order return a quaternion
ox /= np.linalg.norm(ox)
oy /= np.linalg.norm(oy)
oz /= np.linalg.norm(oz)
# given transformed axis in x-y-z order return a quaternion
ox /= np.linalg.norm(ox)
oy /= np.linalg.norm(oy)
oz /= np.linalg.norm(oz)
set1 = np.vstack((ox,oy,oz))
set1 = np.vstack((ox, oy, oz))
x /= np.linalg.norm(x)
y /= np.linalg.norm(y)
z /= np.linalg.norm(z)
x /= np.linalg.norm(x)
y /= np.linalg.norm(y)
z /= np.linalg.norm(z)
set2 = np.vstack((x,y,z))
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
rot_qua = quaternion_from_matrix(rot_mat)
set2 = np.vstack((x, y, z))
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
rot_qua = quaternion_from_matrix(rot_mat)
return rot_qua
return rot_qua
# 3D coord to deepmimic rotations
def coord_to_rot(frameNum, frame, frame_duration):
eps = 0.001
axis_rotate_rate = 0.3
eps = 0.001
axis_rotate_rate = 0.3
frame = np.array(frame)
tmp = [[] for i in range(15)]
# duration of frame in seconds (1D),
tmp[0] = [frame_duration]
# root position (3D),
tmp[1] = frame[0]
# root rotation (4D),
root_y = (frame[7] - frame[0])
root_z = (frame[1] - frame[0])
root_x = np.cross(root_y, root_z)
frame = np.array(frame)
tmp = [[] for i in range(15)]
# duration of frame in seconds (1D),
tmp[0] = [frame_duration]
# root position (3D),
tmp[1] = frame[0]
# root rotation (4D),
root_y = (frame[7] - frame[0])
root_z = (frame[1] - frame[0])
root_x = np.cross(root_y, root_z)
x = np.array([1.0,0,0])
y = np.array([0,1.0,0])
z = np.array([0,0,1.0])
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
tmp[2] = list(rot_qua)
x = np.array([1.0, 0, 0])
y = np.array([0, 1.0, 0])
z = np.array([0, 0, 1.0])
# chest rotation (4D),
chest_y = (frame[8] - frame[7])
chest_z = (frame[14] - frame[8])
chest_x = np.cross(chest_y, chest_z)
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
tmp[3] = list(rot_qua)
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
tmp[2] = list(rot_qua)
# neck rotation (4D),
neck_y = (frame[10] - frame[8])
neck_z = np.cross(frame[10]-frame[9], frame[8]-frame[9])
neck_x = np.cross(neck_y, neck_z)
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
tmp[4] = list(rot_qua)
# chest rotation (4D),
chest_y = (frame[8] - frame[7])
chest_z = (frame[14] - frame[8])
chest_x = np.cross(chest_y, chest_z)
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
tmp[3] = list(rot_qua)
# right hip rotation (4D),
r_hip_y = (frame[1] - frame[2])
r_hip_z = np.cross(frame[1]-frame[2], frame[3]-frame[2])
r_hip_x = np.cross(r_hip_y, r_hip_z)
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
tmp[5] = list(rot_qua)
# neck rotation (4D),
neck_y = (frame[10] - frame[8])
neck_z = np.cross(frame[10] - frame[9], frame[8] - frame[9])
neck_x = np.cross(neck_y, neck_z)
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
tmp[4] = list(rot_qua)
# right knee rotation (1D),
vec1 = frame[1] - frame[2]
vec2 = frame[3] - frame[2]
angle1 = get_angle(vec1, vec2)
tmp[6] = [angle1-pi]
# right hip rotation (4D),
r_hip_y = (frame[1] - frame[2])
r_hip_z = np.cross(frame[1] - frame[2], frame[3] - frame[2])
r_hip_x = np.cross(r_hip_y, r_hip_z)
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
tmp[5] = list(rot_qua)
# right ankle rotation (4D),
tmp[7] = [1,0,0,0]
# right knee rotation (1D),
vec1 = frame[1] - frame[2]
vec2 = frame[3] - frame[2]
angle1 = get_angle(vec1, vec2)
tmp[6] = [angle1 - pi]
# right shoulder rotation (4D),
r_shou_y = (frame[14] - frame[15])
r_shou_z = np.cross(frame[16]-frame[15], frame[14]-frame[15])
r_shou_x = np.cross(r_shou_y, r_shou_z)
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
tmp[8] = list(rot_qua)
# right ankle rotation (4D),
tmp[7] = [1, 0, 0, 0]
# right elbow rotation (1D),
vec1 = frame[14] - frame[15]
vec2 = frame[16] - frame[15]
angle1 = get_angle(vec1, vec2)
tmp[9] = [pi-angle1]
# right shoulder rotation (4D),
r_shou_y = (frame[14] - frame[15])
r_shou_z = np.cross(frame[16] - frame[15], frame[14] - frame[15])
r_shou_x = np.cross(r_shou_y, r_shou_z)
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
tmp[8] = list(rot_qua)
# left hip rotation (4D),
l_hip_y = (frame[4] - frame[5])
l_hip_z = np.cross(frame[4]-frame[5], frame[6]-frame[5])
l_hip_x = np.cross(l_hip_y, l_hip_z)
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
tmp[10] = list(rot_qua)
# left knee rotation (1D),
vec1 = frame[4] - frame[5]
vec2 = frame[6] - frame[5]
angle1 = get_angle(vec1, vec2)
tmp[11] = [angle1-pi]
# left ankle rotation (4D),
tmp[12] = [1,0,0,0]
# right elbow rotation (1D),
vec1 = frame[14] - frame[15]
vec2 = frame[16] - frame[15]
angle1 = get_angle(vec1, vec2)
tmp[9] = [pi - angle1]
# left shoulder rotation (4D),
l_shou_y = (frame[11] - frame[12])
l_shou_z = np.cross(frame[13]-frame[12], frame[11]-frame[12])
l_shou_x = np.cross(l_shou_y, l_shou_z)
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
tmp[13] = list(rot_qua)
# left hip rotation (4D),
l_hip_y = (frame[4] - frame[5])
l_hip_z = np.cross(frame[4] - frame[5], frame[6] - frame[5])
l_hip_x = np.cross(l_hip_y, l_hip_z)
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
tmp[10] = list(rot_qua)
# left elbow rotation (1D)
vec1 = frame[11] - frame[12]
vec2 = frame[13] - frame[12]
angle1 = get_angle(vec1, vec2)
tmp[14] = [pi-angle1]
# left knee rotation (1D),
vec1 = frame[4] - frame[5]
vec2 = frame[6] - frame[5]
angle1 = get_angle(vec1, vec2)
tmp[11] = [angle1 - pi]
# left ankle rotation (4D),
tmp[12] = [1, 0, 0, 0]
# left shoulder rotation (4D),
l_shou_y = (frame[11] - frame[12])
l_shou_z = np.cross(frame[13] - frame[12], frame[11] - frame[12])
l_shou_x = np.cross(l_shou_y, l_shou_z)
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
tmp[13] = list(rot_qua)
# left elbow rotation (1D)
vec1 = frame[11] - frame[12]
vec2 = frame[13] - frame[12]
angle1 = get_angle(vec1, vec2)
tmp[14] = [pi - angle1]
ret = []
for i in tmp:
ret += list(i)
return np.array(ret)
ret = []
for i in tmp:
ret += list(i)
return np.array(ret)
# In[6]:
def coord_seq_to_rot_seq(coord_seq, frame_duration):
ret = []
for i in range(len(coord_seq)):
tmp = coord_to_rot( i, coord_seq[i], frame_duration)
ret.append(list(tmp))
return ret
ret = []
for i in range(len(coord_seq)):
tmp = coord_to_rot(i, coord_seq[i], frame_duration)
ret.append(list(tmp))
return ret

View File

@@ -1,36 +1,37 @@
import numpy as np
from skeleton import Skeleton
class MocapDataset:
def __init__(self, fps, skeleton):
self._skeleton = skeleton
self._fps = fps
self._data = None # Must be filled by subclass
self._cameras = None # Must be filled by subclass
def remove_joints(self, joints_to_remove):
kept_joints = self._skeleton.remove_joints(joints_to_remove)
for subject in self._data.keys():
for action in self._data[subject].keys():
s = self._data[subject][action]
s['positions'] = s['positions'][:, kept_joints]
def __getitem__(self, key):
return self._data[key]
def subjects(self):
return self._data.keys()
def fps(self):
return self._fps
def skeleton(self):
return self._skeleton
def cameras(self):
return self._cameras
def supports_semi_supervised(self):
# This method can be overridden
return False
def __init__(self, fps, skeleton):
self._skeleton = skeleton
self._fps = fps
self._data = None # Must be filled by subclass
self._cameras = None # Must be filled by subclass
def remove_joints(self, joints_to_remove):
kept_joints = self._skeleton.remove_joints(joints_to_remove)
for subject in self._data.keys():
for action in self._data[subject].keys():
s = self._data[subject][action]
s['positions'] = s['positions'][:, kept_joints]
def __getitem__(self, key):
return self._data[key]
def subjects(self):
return self._data.keys()
def fps(self):
return self._fps
def skeleton(self):
return self._skeleton
def cameras(self):
return self._cameras
def supports_semi_supervised(self):
# This method can be overridden
return False

View File

@@ -1,30 +1,31 @@
import numpy as np
def qrot(q, v):
"""
"""
Rotate vector(s) v about the rotation described by quaternion(s) q.
Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
where * denotes any number of dimensions.
Returns a tensor of shape (*, 3).
"""
assert q.shape[-1] == 4
assert v.shape[-1] == 3
assert q.shape[:-1] == v.shape[:-1]
assert q.shape[-1] == 4
assert v.shape[-1] == 3
assert q.shape[:-1] == v.shape[:-1]
qvec = q[..., 1:]
uv = np.cross(qvec, v)
uuv = np.cross(qvec, uv)
return (v + 2 * (q[..., :1] * uv + uuv))
qvec = q[..., 1:]
uv = np.cross(qvec, v)
uuv = np.cross(qvec, uv)
return (v + 2 * (q[..., :1] * uv + uuv))
def qinverse(q, inplace=False):
# We assume the quaternion to be normalized
if inplace:
q[..., 1:] *= -1
return q
else:
w = q[..., :1]
xyz = q[..., 1:]
return np.hstack((w, -xyz))
# We assume the quaternion to be normalized
if inplace:
q[..., 1:] *= -1
return q
else:
w = q[..., :1]
xyz = q[..., 1:]
return np.hstack((w, -xyz))

View File

@@ -1,7 +1,7 @@
import os, inspect
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
print('parent:', parentdir)
@@ -10,7 +10,6 @@ import pybullet
import time
import random
from pybullet_utils.bullet_client import BulletClient
from deep_mimic.env.motion_capture_data import MotionCaptureData
@@ -26,21 +25,28 @@ import pybullet as p
import numpy as np
import argparse
parser = argparse.ArgumentParser(description='Arguments for loading reference for learning.')
# General arguments
parser.add_argument('--dataset_path', default='data/data_3d_h36m.npz', type=str, help='target dataset') # h36m or humaneva
parser.add_argument('--json_path', default='data/Walking.json', type=str, help='json file path for storing the deepmimic-format json created by inverse-kinect.')
parser.add_argument('--dataset_path',
default='data/data_3d_h36m.npz',
type=str,
help='target dataset') # h36m or humaneva
parser.add_argument(
'--json_path',
default='data/Walking.json',
type=str,
help='json file path for storing the deepmimic-format json created by inverse-kinect.')
parser.add_argument('--fps', default=24, type=int, help='frame per second')
parser.add_argument('--subject', default='S11', type=str, help='camera subject.')
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
parser.add_argument('--loop', default='wrap', type=str, help='loop information in deepmimic, wrap or none.')
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
parser.add_argument('--loop',
default='wrap',
type=str,
help='loop information in deepmimic, wrap or none.')
parser.add_argument('--draw_gt', action='store_true', help='draw ground truth or not.')
args = parser.parse_args()
args = parser.parse_args()
dataset_path = args.dataset_path
json_path = args.json_path
@@ -52,93 +58,75 @@ draw_gt = args.draw_gt
def draw_ground_truth(coord_seq, frame, duration, shift):
global joint_info
joint = coord_seq[frame]
shift = np.array(shift)
for i in range(1, 17):
# print(x[11], x[14])
joint_fa = joint_info['father'][i]
if joint_info['side'][i] == 'right':
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
lineToXYZ=joint[joint_fa]+shift,
lineColorRGB=(255,0,0),
lineWidth=1,
lifeTime=duration)
else:
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
lineToXYZ=joint[joint_fa]+shift,
lineColorRGB=(0,0,0),
lineWidth=1,
lifeTime=duration)
global joint_info
joint = coord_seq[frame]
shift = np.array(shift)
for i in range(1, 17):
# print(x[11], x[14])
joint_fa = joint_info['father'][i]
if joint_info['side'][i] == 'right':
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
lineToXYZ=joint[joint_fa] + shift,
lineColorRGB=(255, 0, 0),
lineWidth=1,
lifeTime=duration)
else:
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
lineToXYZ=joint[joint_fa] + shift,
lineColorRGB=(0, 0, 0),
lineWidth=1,
lifeTime=duration)
dataset = init_fb_h36m_dataset(dataset_path)
ground_truth = pose3D_from_fb_h36m(dataset,
subject = subject,
action = action,
shift = [1.0,0.0,0.0])
rot_seq = coord_seq_to_rot_seq(coord_seq = ground_truth,
frame_duration = 1/fps)
rot_seq_to_deepmimic_json( rot_seq = rot_seq,
loop = loop,
json_path = json_path)
ground_truth = pose3D_from_fb_h36m(dataset, subject=subject, action=action, shift=[1.0, 0.0, 0.0])
rot_seq = coord_seq_to_rot_seq(coord_seq=ground_truth, frame_duration=1 / fps)
rot_seq_to_deepmimic_json(rot_seq=rot_seq, loop=loop, json_path=json_path)
bc = BulletClient(connection_mode=pybullet.GUI)
bc.setAdditionalSearchPath(pybullet_data.getDataPath())
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP,1)
bc.setGravity(0,-9.8,0)
motion=MotionCaptureData()
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP, 1)
bc.setGravity(0, -9.8, 0)
motion = MotionCaptureData()
motionPath = json_path
motion.Load(motionPath)
print("numFrames = ", motion.NumFrames())
simTimeId = bc.addUserDebugParameter("simTime", 0, motion.NumFrames() - 1.1, 0)
y2zOrn = bc.getQuaternionFromEuler([-1.57, 0, 0])
bc.loadURDF("plane.urdf", [0, -0.04, 0], y2zOrn)
simTimeId= bc.addUserDebugParameter("simTime",0,motion.NumFrames()-1.1,0)
y2zOrn = bc.getQuaternionFromEuler([-1.57,0,0])
bc.loadURDF("plane.urdf",[0,-0.04,0], y2zOrn)
humanoid = Humanoid(bc, motion, [0,0,0]) #这是初始位置的坐标
humanoid = Humanoid(bc, motion, [0, 0, 0]) #这是初始位置的坐标
print(p.getBasePositionAndOrientation(humanoid._humanoid))
simTime = 0
keyFrameDuration = motion.KeyFrameDuraction()
print("keyFrameDuration=",keyFrameDuration)
print("keyFrameDuration=", keyFrameDuration)
for utNum in range(motion.NumFrames()):
bc.stepSimulation()
humanoid.RenderReference(utNum * keyFrameDuration)
if draw_gt:
draw_ground_truth(coord_seq = ground_truth,
frame = utNum,
duration = keyFrameDuration,
shift = [-1.0, 0.0, 1.0])
time.sleep(0.001)
bc.stepSimulation()
humanoid.RenderReference(utNum * keyFrameDuration)
if draw_gt:
draw_ground_truth(coord_seq=ground_truth,
frame=utNum,
duration=keyFrameDuration,
shift=[-1.0, 0.0, 1.0])
time.sleep(0.001)
stage = 0
def Reset(humanoid):
global simTime
humanoid.Reset()
simTime = 0
humanoid.SetSimTime(simTime)
pose = humanoid.InitializePoseFromMotionData()
humanoid.ApplyPose(pose, True, True, humanoid._humanoid,bc)
global simTime
humanoid.Reset()
simTime = 0
humanoid.SetSimTime(simTime)
pose = humanoid.InitializePoseFromMotionData()
humanoid.ApplyPose(pose, True, True, humanoid._humanoid, bc)
Reset(humanoid)
p.disconnect()

View File

@@ -1,81 +1,82 @@
import numpy as np
class Skeleton:
def __init__(self, parents, joints_left, joints_right):
assert len(joints_left) == len(joints_right)
self._parents = np.array(parents)
self._joints_left = joints_left
self._joints_right = joints_right
self._compute_metadata()
def num_joints(self):
return len(self._parents)
def parents(self):
return self._parents
def has_children(self):
return self._has_children
def children(self):
return self._children
def remove_joints(self, joints_to_remove):
"""
def __init__(self, parents, joints_left, joints_right):
assert len(joints_left) == len(joints_right)
self._parents = np.array(parents)
self._joints_left = joints_left
self._joints_right = joints_right
self._compute_metadata()
def num_joints(self):
return len(self._parents)
def parents(self):
return self._parents
def has_children(self):
return self._has_children
def children(self):
return self._children
def remove_joints(self, joints_to_remove):
"""
Remove the joints specified in 'joints_to_remove'.
"""
valid_joints = []
for joint in range(len(self._parents)):
if joint not in joints_to_remove:
valid_joints.append(joint)
valid_joints = []
for joint in range(len(self._parents)):
if joint not in joints_to_remove:
valid_joints.append(joint)
for i in range(len(self._parents)):
while self._parents[i] in joints_to_remove:
self._parents[i] = self._parents[self._parents[i]]
index_offsets = np.zeros(len(self._parents), dtype=int)
new_parents = []
for i, parent in enumerate(self._parents):
if i not in joints_to_remove:
new_parents.append(parent - index_offsets[parent])
else:
index_offsets[i:] += 1
self._parents = np.array(new_parents)
if self._joints_left is not None:
new_joints_left = []
for joint in self._joints_left:
if joint in valid_joints:
new_joints_left.append(joint - index_offsets[joint])
self._joints_left = new_joints_left
if self._joints_right is not None:
new_joints_right = []
for joint in self._joints_right:
if joint in valid_joints:
new_joints_right.append(joint - index_offsets[joint])
self._joints_right = new_joints_right
for i in range(len(self._parents)):
while self._parents[i] in joints_to_remove:
self._parents[i] = self._parents[self._parents[i]]
self._compute_metadata()
return valid_joints
def joints_left(self):
return self._joints_left
def joints_right(self):
return self._joints_right
def _compute_metadata(self):
self._has_children = np.zeros(len(self._parents)).astype(bool)
for i, parent in enumerate(self._parents):
if parent != -1:
self._has_children[parent] = True
index_offsets = np.zeros(len(self._parents), dtype=int)
new_parents = []
for i, parent in enumerate(self._parents):
if i not in joints_to_remove:
new_parents.append(parent - index_offsets[parent])
else:
index_offsets[i:] += 1
self._parents = np.array(new_parents)
self._children = []
for i, parent in enumerate(self._parents):
self._children.append([])
for i, parent in enumerate(self._parents):
if parent != -1:
self._children[parent].append(i)
if self._joints_left is not None:
new_joints_left = []
for joint in self._joints_left:
if joint in valid_joints:
new_joints_left.append(joint - index_offsets[joint])
self._joints_left = new_joints_left
if self._joints_right is not None:
new_joints_right = []
for joint in self._joints_right:
if joint in valid_joints:
new_joints_right.append(joint - index_offsets[joint])
self._joints_right = new_joints_right
self._compute_metadata()
return valid_joints
def joints_left(self):
return self._joints_left
def joints_right(self):
return self._joints_right
def _compute_metadata(self):
self._has_children = np.zeros(len(self._parents)).astype(bool)
for i, parent in enumerate(self._parents):
if parent != -1:
self._has_children[parent] = True
self._children = []
for i, parent in enumerate(self._parents):
self._children.append([])
for i, parent in enumerate(self._parents):
if parent != -1:
self._children[parent].append(i)

View File

@@ -3,21 +3,23 @@ import subprocess
from pybullet_utils.arg_parser import ArgParser
from pybullet_utils.logger import Logger
def main():
# Command line arguments
args = sys.argv[1:]
arg_parser = ArgParser()
arg_parser.load_args(args)
# Command line arguments
args = sys.argv[1:]
arg_parser = ArgParser()
arg_parser.load_args(args)
num_workers = arg_parser.parse_int('num_workers', 1)
assert(num_workers > 0)
num_workers = arg_parser.parse_int('num_workers', 1)
assert (num_workers > 0)
Logger.print2('Running with {:d} workers'.format(num_workers))
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
cmd += ' '.join(args)
Logger.print2('cmd: ' + cmd)
subprocess.call(cmd, shell=True)
return
Logger.print2('Running with {:d} workers'.format(num_workers))
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
cmd += ' '.join(args)
Logger.print2('cmd: ' + cmd)
subprocess.call(cmd, shell=True)
return
if __name__ == '__main__':
main()
main()

View File

@@ -2,8 +2,8 @@ import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
print("parentdir=",parentdir)
os.sys.path.insert(0, parentdir)
print("parentdir=", parentdir)
import json
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
@@ -15,81 +15,78 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim
import sys
import random
update_timestep = 1./240.
update_timestep = 1. / 240.
animating = True
def update_world(world, time_elapsed):
timeStep = update_timestep
world.update(timeStep)
reward = world.env.calc_reward(agent_id=0)
#print("reward=",reward)
end_episode = world.env.is_episode_end()
if (end_episode):
world.end_episode()
world.reset()
return
timeStep = update_timestep
world.update(timeStep)
reward = world.env.calc_reward(agent_id=0)
#print("reward=",reward)
end_episode = world.env.is_episode_end()
if (end_episode):
world.end_episode()
world.reset()
return
def build_arg_parser(args):
arg_parser = ArgParser()
arg_parser.load_args(args)
arg_parser = ArgParser()
arg_parser.load_args(args)
arg_file = arg_parser.parse_string('arg_file', '')
if (arg_file != ''):
path = pybullet_data.getDataPath() + "/args/" + arg_file
succ = arg_parser.load_file(path)
Logger.print2(arg_file)
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
return arg_parser
arg_file = arg_parser.parse_string('arg_file', '')
if (arg_file != ''):
path = pybullet_data.getDataPath()+"/args/"+arg_file
succ = arg_parser.load_file(path)
Logger.print2(arg_file)
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
return arg_parser
args = sys.argv[1:]
def build_world(args, enable_draw):
arg_parser = build_arg_parser(args)
print("enable_draw=",enable_draw)
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
world = RLWorld(env, arg_parser)
#world.env.set_playback_speed(playback_speed)
arg_parser = build_arg_parser(args)
print("enable_draw=", enable_draw)
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
world = RLWorld(env, arg_parser)
#world.env.set_playback_speed(playback_speed)
motion_file = arg_parser.parse_string("motion_file")
print("motion_file=",motion_file)
bodies = arg_parser.parse_ints("fall_contact_bodies")
print("bodies=",bodies)
int_output_path = arg_parser.parse_string("int_output_path")
print("int_output_path=",int_output_path)
agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
motion_file = arg_parser.parse_string("motion_file")
print("motion_file=", motion_file)
bodies = arg_parser.parse_ints("fall_contact_bodies")
print("bodies=", bodies)
int_output_path = arg_parser.parse_string("int_output_path")
print("int_output_path=", int_output_path)
agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")
AGENT_TYPE_KEY = "AgentType"
AGENT_TYPE_KEY = "AgentType"
print("agent_file=",agent_files)
with open(agent_files) as data_file:
json_data = json.load(data_file)
print("json_data=",json_data)
assert AGENT_TYPE_KEY in json_data
agent_type = json_data[AGENT_TYPE_KEY]
print("agent_type=",agent_type)
agent = PPOAgent(world, id, json_data)
agent.set_enable_training(False)
world.reset()
return world
print("agent_file=", agent_files)
with open(agent_files) as data_file:
json_data = json.load(data_file)
print("json_data=", json_data)
assert AGENT_TYPE_KEY in json_data
agent_type = json_data[AGENT_TYPE_KEY]
print("agent_type=", agent_type)
agent = PPOAgent(world, id, json_data)
agent.set_enable_training(False)
world.reset()
return world
if __name__ == '__main__':
world = build_world(args, True)
while (world.env._pybullet_client.isConnected()):
timeStep = update_timestep
keys = world.env.getKeyboardEvents()
if world.env.isKeyTriggered(keys, ' '):
animating = not animating
if (animating):
update_world(world, timeStep)
#animating=False
world = build_world(args, True)
while (world.env._pybullet_client.isConnected()):
timeStep = update_timestep
keys = world.env.getKeyboardEvents()
if world.env.isKeyTriggered(keys, ' '):
animating = not animating
if (animating):
update_world(world, timeStep)
#animating=False

View File

@@ -5,134 +5,134 @@ from pybullet_utils import bullet_client
from pkg_resources import parse_version
class MJCFBaseBulletEnv(gym.Env):
"""
"""
Base class for Bullet physics simulation loading MJCF (MuJoCo .xml) environments in a Scene.
These environments create single-player scenes and behave like normal Gym environments, if
you don't use multiplayer.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 60
}
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60}
def __init__(self, robot, render=False):
self.scene = None
self.physicsClientId = -1
self.ownsPhysicsClient = 0
self.camera = Camera()
self.isRender = render
self.robot = robot
self.seed()
self._cam_dist = 3
self._cam_yaw = 0
self._cam_pitch = -30
self._render_width =320
self._render_height = 240
def __init__(self, robot, render=False):
self.scene = None
self.physicsClientId = -1
self.ownsPhysicsClient = 0
self.camera = Camera()
self.isRender = render
self.robot = robot
self.seed()
self._cam_dist = 3
self._cam_yaw = 0
self._cam_pitch = -30
self._render_width = 320
self._render_height = 240
self.action_space = robot.action_space
self.observation_space = robot.observation_space
self.action_space = robot.action_space
self.observation_space = robot.observation_space
def configure(self, args):
self.robot.args = args
def configure(self, args):
self.robot.args = args
def seed(self, seed=None):
self.np_random, seed = gym.utils.seeding.np_random(seed)
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
return [seed]
def seed(self, seed=None):
self.np_random, seed = gym.utils.seeding.np_random(seed)
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
return [seed]
def reset(self):
if (self.physicsClientId<0):
self.ownsPhysicsClient = True
def reset(self):
if (self.physicsClientId < 0):
self.ownsPhysicsClient = True
if self.isRender:
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._p = bullet_client.BulletClient()
if self.isRender:
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
else:
self._p = bullet_client.BulletClient()
self.physicsClientId = self._p._client
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0)
self.physicsClientId = self._p._client
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
if self.scene is None:
self.scene = self.create_single_player_scene(self._p)
if not self.scene.multiplayer and self.ownsPhysicsClient:
self.scene.episode_restart(self._p)
if self.scene is None:
self.scene = self.create_single_player_scene(self._p)
if not self.scene.multiplayer and self.ownsPhysicsClient:
self.scene.episode_restart(self._p)
self.robot.scene = self.scene
self.robot.scene = self.scene
self.frame = 0
self.done = 0
self.reward = 0
dump = 0
s = self.robot.reset(self._p)
self.potential = self.robot.calc_potential()
return s
self.frame = 0
self.done = 0
self.reward = 0
dump = 0
s = self.robot.reset(self._p)
self.potential = self.robot.calc_potential()
return s
def render(self, mode='human', close=False):
if mode == "human":
self.isRender = True
if mode != "rgb_array":
return np.array([])
def render(self, mode='human', close=False):
if mode == "human":
self.isRender = True
if mode != "rgb_array":
return np.array([])
base_pos=[0,0,0]
if (hasattr(self,'robot')):
if (hasattr(self.robot,'body_xyz')):
base_pos = self.robot.body_xyz
base_pos = [0, 0, 0]
if (hasattr(self, 'robot')):
if (hasattr(self.robot, 'body_xyz')):
base_pos = self.robot.body_xyz
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(
fov=60, aspect=float(self._render_width)/self._render_height,
nearVal=0.1, farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(
width=self._render_width, height=self._render_height, viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL
)
rgb_array = np.array(px)
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
rgb_array = rgb_array[:, :, :3]
return rgb_array
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
distance=self._cam_dist,
yaw=self._cam_yaw,
pitch=self._cam_pitch,
roll=0,
upAxisIndex=2)
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
aspect=float(self._render_width) /
self._render_height,
nearVal=0.1,
farVal=100.0)
(_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
height=self._render_height,
viewMatrix=view_matrix,
projectionMatrix=proj_matrix,
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
rgb_array = np.array(px)
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
rgb_array = rgb_array[:, :, :3]
return rgb_array
def close(self):
if (self.ownsPhysicsClient):
if (self.physicsClientId >= 0):
self._p.disconnect()
self.physicsClientId = -1
def close(self):
if (self.ownsPhysicsClient):
if (self.physicsClientId>=0):
self._p.disconnect()
self.physicsClientId = -1
def HUD(self, state, a, done):
pass
def HUD(self, state, a, done):
pass
# def step(self, *args, **kwargs):
# if self.isRender:
# base_pos=[0,0,0]
# if (hasattr(self,'robot')):
# if (hasattr(self.robot,'body_xyz')):
# base_pos = self.robot.body_xyz
# # Keep the previous orientation of the camera set by the user.
# #[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
# self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
#
#
# return self.step(*args, **kwargs)
if parse_version(gym.__version__) < parse_version('0.9.6'):
_render = render
_reset = reset
_seed = seed
# def step(self, *args, **kwargs):
# if self.isRender:
# base_pos=[0,0,0]
# if (hasattr(self,'robot')):
# if (hasattr(self.robot,'body_xyz')):
# base_pos = self.robot.body_xyz
# # Keep the previous orientation of the camera set by the user.
# #[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
# self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
#
#
# return self.step(*args, **kwargs)
if parse_version(gym.__version__) < parse_version('0.9.6'):
_render = render
_reset = reset
_seed = seed
class Camera:
def __init__(self):
pass
def move_and_look_at(self,i,j,k,x,y,z):
lookat = [x,y,z]
distance = 10
yaw = 10
self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
def __init__(self):
pass
def move_and_look_at(self, i, j, k, x, y, z):
lookat = [x, y, z]
distance = 10
yaw = 10
self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)

View File

@@ -7,31 +7,38 @@ import time
p = bc.BulletClient(connection_mode=pybullet.GUI)
p.setAdditionalSearchPath(pd.getDataPath())
p.loadURDF("plane_transparent.urdf", useMaximalCoordinates=True)
p#.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)
p #.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)
p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION, 1)
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 0)
p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION,1)
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,0)
y2z = p.getQuaternionFromEuler([0, 0, 1.57])
meshScale = [1, 1, 1]
visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,
fileName="domino/domino.obj",
rgbaColor=[1, 1, 1, 1],
specularColor=[0.4, .4, 0],
visualFrameOrientation=y2z,
meshScale=meshScale)
y2z = p.getQuaternionFromEuler([0,0,1.57])
meshScale = [1,1,1]
visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,fileName="domino/domino.obj", rgbaColor=[1,1,1,1], specularColor=[0.4,.4,0], visualFrameOrientation=y2z, meshScale=meshScale)
boxDimensions = [0.5 * 0.00635, 0.5 * 0.0254, 0.5 * 0.0508]
collisionShapeId = p.createCollisionShape(p.GEOM_BOX, halfExtents=boxDimensions)
boxDimensions = [0.5*0.00635, 0.5*0.0254, 0.5*0.0508]
collisionShapeId = p.createCollisionShape(p.GEOM_BOX,halfExtents=boxDimensions)
for j in range (12):
print("j=",j)
for i in range (35):
for j in range(12):
print("j=", j)
for i in range(35):
#p.loadURDF("domino/domino.urdf",[i*0.04,0, 0.06])
p.createMultiBody(baseMass=0.025,baseCollisionShapeIndex = collisionShapeId,baseVisualShapeIndex = visualShapeId, basePosition = [i*0.04,j*0.05, 0.06], useMaximalCoordinates=True)
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
p.createMultiBody(baseMass=0.025,
baseCollisionShapeIndex=collisionShapeId,
baseVisualShapeIndex=visualShapeId,
basePosition=[i * 0.04, j * 0.05, 0.06],
useMaximalCoordinates=True)
p.setGravity(0,0,-9.8)
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 1)
p.setGravity(0, 0, -9.8)
p.setRealTimeSimulation(1)
while (1):
p.setGravity(0,0,-9.8)
p.setGravity(0, 0, -9.8)
#p.stepSimulation(1./100.)
time.sleep(1./240.)
time.sleep(1. / 240.)

View File

@@ -2,179 +2,582 @@
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
os.sys.path.insert(0, parentdir)
import gym
import numpy as np
import pybullet_envs
import time
def relu(x):
return np.maximum(x, 0)
return np.maximum(x, 0)
class SmallReactivePolicy:
"Simple multi-layer perceptron policy, no internal state"
def __init__(self, observation_space, action_space):
assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
assert weights_dense2_w.shape == (64.0, 32.0)
assert weights_final_w.shape == (32.0, action_space.shape[0])
"Simple multi-layer perceptron policy, no internal state"
def __init__(self, observation_space, action_space):
assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
assert weights_dense2_w.shape == (64.0, 32.0)
assert weights_final_w.shape == (32.0, action_space.shape[0])
def act(self, ob):
x = ob
x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
x = np.dot(x, weights_final_w) + weights_final_b
return x
def act(self, ob):
x = ob
x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
x = np.dot(x, weights_final_w) + weights_final_b
return x
def main():
env = gym.make("InvertedDoublePendulumBulletEnv-v0")
env.render(mode="human")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
env = gym.make("InvertedDoublePendulumBulletEnv-v0")
env.render(mode="human")
pi = SmallReactivePolicy(env.observation_space, env.action_space)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
time.sleep(1. / 60.)
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open == False:
return
if not done: continue
if restart_delay == 0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60 * 2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay > 0: continue
break
while 1:
time.sleep(1./60.)
a = pi.act(obs)
obs, r, done, _ = env.step(a)
score += r
frame += 1
still_open = env.render("human")
if still_open==False:
return
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 60*2 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay > 0: continue
break
weights_dense1_w = np.array([
[ -0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635, +0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103, -0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873, +0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775, +0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772, -0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238, +0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136, +0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053],
[ +0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329, +0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789, +0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111, -0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025, -0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544, -0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160, -0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338, +0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542, +0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497],
[ +0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275, -0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475, +0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116, +0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376, -0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006, +0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254, -0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988, -0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917, +0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461],
[ -0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620, -0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623, -0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583, -0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976, +0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028, -0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796, -0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178, +0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677, -0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499],
[ -0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923, +0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939, +0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605, +0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779, -1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488, +0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217, +0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856, +0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319, -1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379],
[ -0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654, -0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597, +0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612, -0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645, -0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316, -0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299, +0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317, +0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417, +0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510],
[ -0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278, -0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225, -0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054, -0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766, -0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353, +0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276, -0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668, -0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096, +0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095],
[ -0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748, -0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466, +0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886, +0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910, +1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375, +0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775, +0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551, +2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858, -0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526],
[ -0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696, +0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166, +0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092, -0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364, +0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155, +1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972, +0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436, +0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860, -0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653]
# yapf: disable
weights_dense1_w = np.array(
[[
-0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635,
+0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103,
-0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873,
+0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775,
+0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772,
-0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238,
+0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136,
+0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053
],
[
+0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329,
+0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789,
+0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111,
-0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025,
-0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544,
-0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160,
-0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338,
+0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542,
+0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497
],
[
+0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275,
-0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475,
+0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116,
+0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376,
-0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006,
+0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254,
-0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988,
-0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917,
+0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461
],
[
-0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620,
-0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623,
-0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583,
-0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976,
+0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028,
-0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796,
-0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178,
+0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677,
-0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499
],
[
-0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923,
+0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939,
+0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605,
+0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779,
-1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488,
+0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217,
+0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856,
+0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319,
-1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379
],
[
-0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654,
-0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597,
+0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612,
-0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645,
-0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316,
-0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299,
+0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317,
+0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417,
+0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510
],
[
-0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278,
-0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225,
-0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054,
-0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766,
-0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353,
+0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276,
-0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668,
-0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096,
+0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095
],
[
-0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748,
-0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466,
+0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886,
+0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910,
+1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375,
+0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775,
+0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551,
+2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858,
-0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526
],
[
-0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696,
+0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166,
+0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092,
-0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364,
+0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155,
+1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972,
+0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436,
+0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860,
-0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653
]])
weights_dense1_b = np.array([
-0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653,
-0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766,
-0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020,
+0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861,
-0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341,
+0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965,
+0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015,
-0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002
])
weights_dense1_b = np.array([ -0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653, -0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766, -0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020, +0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861, -0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341, +0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965, +0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015, -0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002])
weights_dense2_w = np.array([
[ +0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487, -0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587, +0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651, +0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464],
[ -0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537, +0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456, -1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150, +0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069],
[ -0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368, -0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538, -0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137, +0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103],
[ +0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471, -0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623, -0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380, +0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408],
[ -0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138, -0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430, +0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884, +0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970],
[ -0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775, +0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001, -2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046, -0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834],
[ +0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120, -0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580, -0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292, +0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216],
[ -0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024, -0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753, -0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135, -0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326],
[ -0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622, -0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433, +0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523, -0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888],
[ -0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912, -0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493, +0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154, +0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376],
[ +0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421, +0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814, -0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114, +0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472],
[ -0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903, +0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340, -0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040, +0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559],
[ -2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148, +0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294, -0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810, -0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121],
[ +0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243, -0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378, +0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373, -0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342],
[ -0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869, -0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602, +0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859, +0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606],
[ -0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674, +0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866, +0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561, -0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292],
[ +0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751, -0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726, -0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577, +0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703],
[ +0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834, -0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480, -1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690, +0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458],
[ -0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322, -0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259, +0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802, -0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385],
[ -0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096, -0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244, -0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607, -0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395],
[ -0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933, -0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092, +0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312, -1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362],
[ -0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326, +0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396, -0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956, -0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031],
[ -0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019, +0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483, +0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817, +0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293],
[ -0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420, -0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275, +0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132, +0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180],
[ +0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570, +0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583, -0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517, +0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957],
[ -0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281, -0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552, +0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115, +0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348],
[ +0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334, -0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206, +0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920, +0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610],
[ -0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155, +0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819, -0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387, -0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126],
[ -0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029, +0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843, +0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216, -0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118],
[ +0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258, -0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226, -0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867, +0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963],
[ -0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855, -0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356, -0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960, +0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031],
[ -0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370, +0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931, -1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618, +0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057],
[ -0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112, +0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134, -0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309, -0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595],
[ -0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012, -0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965, -0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998, -0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348],
[ -0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324, -0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282, -0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981, +0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953],
[ -1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996, +0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737, +0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547, -0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028],
[ -0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505, -0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640, +0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320, -0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598],
[ +0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197, +0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527, -0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201, +0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352],
[ +0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081, -0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971, +0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751, -0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704],
[ +0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459, -0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804, +0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337, +0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116],
[ -0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673, -0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075, +0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300, +0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208],
[ -0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458, +0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083, +0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889, -0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233],
[ -1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200, +0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202, -0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213, -0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751],
[ -0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622, -0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433, -0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791, -0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889],
[ -0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755, -0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882, -1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337, +0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094],
[ -0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384, +0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254, -1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178, -0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780],
[ -0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675, +0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719, +0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575, +0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501],
[ +0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824, -0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309, -1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339, -0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057],
[ -1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777, +0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094, +0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276, +0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592],
[ -0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112, +0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131, +0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610, +0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988],
[ +0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340, -0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615, -0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777, -0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007],
[ -0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609, -0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724, -0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266, -0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877],
[ +0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262, -0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311, +0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197, +0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254],
[ -0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151, -0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641, -0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869, +0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954],
[ -0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488, -0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223, -0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526, -0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021],
[ -1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977, +0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785, +0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432, +0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737],
[ +0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536, +0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048, +0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776, -0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029],
[ -0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881, -0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195, -0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149, +0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349],
[ +0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919, -0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240, +0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926, +0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688],
[ +0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318, +0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741, +0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212, +0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240],
[ -0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519, -0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904, -0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612, +0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115],
[ -0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213, -0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276, +0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660, -0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325],
[ +0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695, +0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828, -0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883, +0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273],
[ +0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365, -0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427, -0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372, +0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229]
[
+0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487,
-0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587,
+0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651,
+0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464
],
[
-0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537,
+0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456,
-1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150,
+0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069
],
[
-0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368,
-0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538,
-0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137,
+0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103
],
[
+0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471,
-0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623,
-0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380,
+0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408
],
[
-0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138,
-0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430,
+0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884,
+0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970
],
[
-0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775,
+0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001,
-2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046,
-0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834
],
[
+0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120,
-0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580,
-0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292,
+0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216
],
[
-0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024,
-0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753,
-0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135,
-0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326
],
[
-0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622,
-0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433,
+0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523,
-0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888
],
[
-0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912,
-0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493,
+0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154,
+0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376
],
[
+0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421,
+0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814,
-0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114,
+0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472
],
[
-0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903,
+0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340,
-0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040,
+0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559
],
[
-2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148,
+0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294,
-0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810,
-0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121
],
[
+0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243,
-0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378,
+0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373,
-0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342
],
[
-0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869,
-0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602,
+0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859,
+0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606
],
[
-0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674,
+0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866,
+0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561,
-0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292
],
[
+0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751,
-0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726,
-0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577,
+0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703
],
[
+0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834,
-0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480,
-1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690,
+0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458
],
[
-0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322,
-0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259,
+0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802,
-0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385
],
[
-0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096,
-0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244,
-0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607,
-0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395
],
[
-0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933,
-0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092,
+0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312,
-1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362
],
[
-0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326,
+0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396,
-0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956,
-0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031
],
[
-0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019,
+0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483,
+0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817,
+0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293
],
[
-0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420,
-0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275,
+0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132,
+0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180
],
[
+0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570,
+0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583,
-0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517,
+0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957
],
[
-0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281,
-0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552,
+0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115,
+0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348
],
[
+0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334,
-0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206,
+0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920,
+0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610
],
[
-0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155,
+0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819,
-0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387,
-0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126
],
[
-0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029,
+0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843,
+0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216,
-0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118
],
[
+0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258,
-0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226,
-0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867,
+0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963
],
[
-0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855,
-0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356,
-0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960,
+0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031
],
[
-0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370,
+0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931,
-1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618,
+0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057
],
[
-0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112,
+0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134,
-0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309,
-0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595
],
[
-0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012,
-0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965,
-0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998,
-0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348
],
[
-0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324,
-0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282,
-0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981,
+0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953
],
[
-1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996,
+0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737,
+0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547,
-0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028
],
[
-0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505,
-0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640,
+0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320,
-0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598
],
[
+0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197,
+0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527,
-0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201,
+0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352
],
[
+0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081,
-0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971,
+0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751,
-0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704
],
[
+0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459,
-0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804,
+0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337,
+0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116
],
[
-0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673,
-0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075,
+0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300,
+0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208
],
[
-0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458,
+0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083,
+0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889,
-0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233
],
[
-1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200,
+0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202,
-0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213,
-0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751
],
[
-0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622,
-0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433,
-0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791,
-0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889
],
[
-0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755,
-0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882,
-1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337,
+0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094
],
[
-0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384,
+0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254,
-1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178,
-0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780
],
[
-0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675,
+0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719,
+0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575,
+0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501
],
[
+0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824,
-0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309,
-1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339,
-0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057
],
[
-1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777,
+0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094,
+0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276,
+0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592
],
[
-0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112,
+0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131,
+0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610,
+0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988
],
[
+0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340,
-0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615,
-0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777,
-0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007
],
[
-0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609,
-0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724,
-0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266,
-0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877
],
[
+0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262,
-0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311,
+0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197,
+0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254
],
[
-0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151,
-0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641,
-0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869,
+0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954
],
[
-0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488,
-0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223,
-0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526,
-0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021
],
[
-1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977,
+0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785,
+0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432,
+0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737
],
[
+0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536,
+0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048,
+0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776,
-0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029
],
[
-0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881,
-0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195,
-0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149,
+0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349
],
[
+0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919,
-0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240,
+0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926,
+0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688
],
[
+0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318,
+0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741,
+0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212,
+0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240
],
[
-0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519,
-0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904,
-0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612,
+0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115
],
[
-0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213,
-0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276,
+0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660,
-0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325
],
[
+0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695,
+0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828,
-0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883,
+0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273
],
[
+0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365,
-0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427,
-0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372,
+0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229
]
])
weights_dense2_b = np.array([ -0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403, -0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473, +0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726, -0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111])
weights_final_w = np.array([
[ +1.0397],
[ +0.7049],
[ -0.2128],
[ +0.2172],
[ +0.3027],
[ -0.1991],
[ +0.3398],
[ -0.5932],
[ -0.1439],
[ -0.0236],
[ +0.5679],
[ +0.8571],
[ +0.1934],
[ -0.1652],
[ +0.6933],
[ -0.5510],
[ -1.0587],
[ +0.6996],
[ -0.5009],
[ -0.4000],
[ -0.6958],
[ +0.7716],
[ -0.5342],
[ -0.5095],
[ +0.3040],
[ -1.1986],
[ -0.4900],
[ +0.7726],
[ +0.5871],
[ -0.2533],
[ +0.2633],
[ -0.0004]
weights_dense2_b = np.array([
-0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403,
-0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473,
+0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726,
-0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111
])
weights_final_b = np.array([ +0.0190])
weights_final_w = np.array([[+1.0397], [+0.7049], [-0.2128], [+0.2172],
[+0.3027], [-0.1991], [+0.3398], [-0.5932],
[-0.1439], [-0.0236], [+0.5679], [+0.8571],
[+0.1934], [-0.1652], [+0.6933], [-0.5510],
[-1.0587], [+0.6996], [-0.5009], [-0.4000],
[-0.6958], [+0.7716], [-0.5342], [-0.5095],
[+0.3040], [-1.1986], [-0.4900], [+0.7726],
[+0.5871], [-0.2533], [+0.2633], [-0.0004]])
if __name__=="__main__":
main()
weights_final_b = np.array([+0.0190])
# yapf: enable
if __name__ == "__main__":
main()

Some files were not shown because too many files have changed in this diff Show More