add yapf style and apply yapf to format all Python files
This recreates pull request #2192
This commit is contained in:
@@ -4,7 +4,7 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
# Importing the libraries
|
||||
import os
|
||||
@@ -20,17 +20,17 @@ import argparse
|
||||
|
||||
# Setting the Hyper Parameters
|
||||
class Hp():
|
||||
|
||||
def __init__(self):
|
||||
self.nb_steps = 10000
|
||||
self.episode_length = 1000
|
||||
self.learning_rate = 0.02
|
||||
self.nb_directions = 16
|
||||
self.nb_best_directions = 16
|
||||
assert self.nb_best_directions <= self.nb_directions
|
||||
self.noise = 0.03
|
||||
self.seed = 1
|
||||
self.env_name = 'HalfCheetahBulletEnv-v0'
|
||||
|
||||
def __init__(self):
|
||||
self.nb_steps = 10000
|
||||
self.episode_length = 1000
|
||||
self.learning_rate = 0.02
|
||||
self.nb_directions = 16
|
||||
self.nb_best_directions = 16
|
||||
assert self.nb_best_directions <= self.nb_directions
|
||||
self.noise = 0.03
|
||||
self.seed = 1
|
||||
self.env_name = 'HalfCheetahBulletEnv-v0'
|
||||
|
||||
|
||||
# Multiprocess Exploring the policy on one specific direction and over one episode
|
||||
@@ -39,111 +39,37 @@ _RESET = 1
|
||||
_CLOSE = 2
|
||||
_EXPLORE = 3
|
||||
|
||||
def ExploreWorker(rank,childPipe, envname, args):
|
||||
env = gym.make(envname)
|
||||
nb_inputs = env.observation_space.shape[0]
|
||||
normalizer = Normalizer(nb_inputs)
|
||||
observation_n = env.reset()
|
||||
n=0
|
||||
while True:
|
||||
n+=1
|
||||
try:
|
||||
# Only block for short times to have keyboard exceptions be raised.
|
||||
if not childPipe.poll(0.001):
|
||||
continue
|
||||
message, payload = childPipe.recv()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
break
|
||||
if message == _RESET:
|
||||
observation_n = env.reset()
|
||||
childPipe.send(["reset ok"])
|
||||
|
||||
def ExploreWorker(rank, childPipe, envname, args):
|
||||
env = gym.make(envname)
|
||||
nb_inputs = env.observation_space.shape[0]
|
||||
normalizer = Normalizer(nb_inputs)
|
||||
observation_n = env.reset()
|
||||
n = 0
|
||||
while True:
|
||||
n += 1
|
||||
try:
|
||||
# Only block for short times to have keyboard exceptions be raised.
|
||||
if not childPipe.poll(0.001):
|
||||
continue
|
||||
if message == _EXPLORE:
|
||||
#normalizer = payload[0] #use our local normalizer
|
||||
policy = payload[1]
|
||||
hp = payload[2]
|
||||
direction = payload[3]
|
||||
delta = payload[4]
|
||||
state = env.reset()
|
||||
done = False
|
||||
num_plays = 0.
|
||||
sum_rewards = 0
|
||||
while not done and num_plays < hp.episode_length:
|
||||
normalizer.observe(state)
|
||||
state = normalizer.normalize(state)
|
||||
action = policy.evaluate(state, delta, direction,hp)
|
||||
state, reward, done, _ = env.step(action)
|
||||
reward = max(min(reward, 1), -1)
|
||||
sum_rewards += reward
|
||||
num_plays += 1
|
||||
childPipe.send([sum_rewards])
|
||||
continue
|
||||
if message == _CLOSE:
|
||||
childPipe.send(["close ok"])
|
||||
break
|
||||
childPipe.close()
|
||||
|
||||
|
||||
# Normalizing the states
|
||||
|
||||
class Normalizer():
|
||||
|
||||
def __init__(self, nb_inputs):
|
||||
self.n = np.zeros(nb_inputs)
|
||||
self.mean = np.zeros(nb_inputs)
|
||||
self.mean_diff = np.zeros(nb_inputs)
|
||||
self.var = np.zeros(nb_inputs)
|
||||
|
||||
def observe(self, x):
|
||||
self.n += 1.
|
||||
last_mean = self.mean.copy()
|
||||
self.mean += (x - self.mean) / self.n
|
||||
self.mean_diff += (x - last_mean) * (x - self.mean)
|
||||
self.var = (self.mean_diff / self.n).clip(min = 1e-2)
|
||||
|
||||
def normalize(self, inputs):
|
||||
obs_mean = self.mean
|
||||
obs_std = np.sqrt(self.var)
|
||||
return (inputs - obs_mean) / obs_std
|
||||
|
||||
# Building the AI
|
||||
|
||||
class Policy():
|
||||
def __init__(self, input_size, output_size, env_name, args):
|
||||
try:
|
||||
self.theta = np.load(args.policy)
|
||||
except:
|
||||
self.theta = np.zeros((output_size, input_size))
|
||||
self.env_name = env_name
|
||||
print("Starting policy theta=",self.theta)
|
||||
def evaluate(self, input, delta, direction, hp):
|
||||
if direction is None:
|
||||
return np.clip(self.theta.dot(input), -1.0, 1.0)
|
||||
elif direction == "positive":
|
||||
return np.clip((self.theta + hp.noise*delta).dot(input), -1.0, 1.0)
|
||||
else:
|
||||
return np.clip((self.theta - hp.noise*delta).dot(input), -1.0, 1.0)
|
||||
|
||||
def sample_deltas(self):
|
||||
return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
|
||||
|
||||
def update(self, rollouts, sigma_r, args):
|
||||
step = np.zeros(self.theta.shape)
|
||||
for r_pos, r_neg, d in rollouts:
|
||||
step += (r_pos - r_neg) * d
|
||||
self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
|
||||
timestr = time.strftime("%Y%m%d-%H%M%S")
|
||||
np.save(args.logdir+"/policy_"+self.env_name+"_"+timestr+".npy", self.theta)
|
||||
|
||||
|
||||
# Exploring the policy on one specific direction and over one episode
|
||||
|
||||
def explore(env, normalizer, policy, direction, delta, hp):
|
||||
state = env.reset()
|
||||
done = False
|
||||
num_plays = 0.
|
||||
sum_rewards = 0
|
||||
while not done and num_plays < hp.episode_length:
|
||||
message, payload = childPipe.recv()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
break
|
||||
if message == _RESET:
|
||||
observation_n = env.reset()
|
||||
childPipe.send(["reset ok"])
|
||||
continue
|
||||
if message == _EXPLORE:
|
||||
#normalizer = payload[0] #use our local normalizer
|
||||
policy = payload[1]
|
||||
hp = payload[2]
|
||||
direction = payload[3]
|
||||
delta = payload[4]
|
||||
state = env.reset()
|
||||
done = False
|
||||
num_plays = 0.
|
||||
sum_rewards = 0
|
||||
while not done and num_plays < hp.episode_length:
|
||||
normalizer.observe(state)
|
||||
state = normalizer.normalize(state)
|
||||
action = policy.evaluate(state, delta, direction, hp)
|
||||
@@ -151,127 +77,217 @@ def explore(env, normalizer, policy, direction, delta, hp):
|
||||
reward = max(min(reward, 1), -1)
|
||||
sum_rewards += reward
|
||||
num_plays += 1
|
||||
return sum_rewards
|
||||
childPipe.send([sum_rewards])
|
||||
continue
|
||||
if message == _CLOSE:
|
||||
childPipe.send(["close ok"])
|
||||
break
|
||||
childPipe.close()
|
||||
|
||||
|
||||
# Normalizing the states
|
||||
|
||||
|
||||
class Normalizer():
|
||||
|
||||
def __init__(self, nb_inputs):
|
||||
self.n = np.zeros(nb_inputs)
|
||||
self.mean = np.zeros(nb_inputs)
|
||||
self.mean_diff = np.zeros(nb_inputs)
|
||||
self.var = np.zeros(nb_inputs)
|
||||
|
||||
def observe(self, x):
|
||||
self.n += 1.
|
||||
last_mean = self.mean.copy()
|
||||
self.mean += (x - self.mean) / self.n
|
||||
self.mean_diff += (x - last_mean) * (x - self.mean)
|
||||
self.var = (self.mean_diff / self.n).clip(min=1e-2)
|
||||
|
||||
def normalize(self, inputs):
|
||||
obs_mean = self.mean
|
||||
obs_std = np.sqrt(self.var)
|
||||
return (inputs - obs_mean) / obs_std
|
||||
|
||||
|
||||
# Building the AI
|
||||
|
||||
|
||||
class Policy():
|
||||
|
||||
def __init__(self, input_size, output_size, env_name, args):
|
||||
try:
|
||||
self.theta = np.load(args.policy)
|
||||
except:
|
||||
self.theta = np.zeros((output_size, input_size))
|
||||
self.env_name = env_name
|
||||
print("Starting policy theta=", self.theta)
|
||||
|
||||
def evaluate(self, input, delta, direction, hp):
|
||||
if direction is None:
|
||||
return np.clip(self.theta.dot(input), -1.0, 1.0)
|
||||
elif direction == "positive":
|
||||
return np.clip((self.theta + hp.noise * delta).dot(input), -1.0, 1.0)
|
||||
else:
|
||||
return np.clip((self.theta - hp.noise * delta).dot(input), -1.0, 1.0)
|
||||
|
||||
def sample_deltas(self):
|
||||
return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
|
||||
|
||||
def update(self, rollouts, sigma_r, args):
|
||||
step = np.zeros(self.theta.shape)
|
||||
for r_pos, r_neg, d in rollouts:
|
||||
step += (r_pos - r_neg) * d
|
||||
self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
|
||||
timestr = time.strftime("%Y%m%d-%H%M%S")
|
||||
np.save(args.logdir + "/policy_" + self.env_name + "_" + timestr + ".npy", self.theta)
|
||||
|
||||
|
||||
# Exploring the policy on one specific direction and over one episode
|
||||
|
||||
|
||||
def explore(env, normalizer, policy, direction, delta, hp):
|
||||
state = env.reset()
|
||||
done = False
|
||||
num_plays = 0.
|
||||
sum_rewards = 0
|
||||
while not done and num_plays < hp.episode_length:
|
||||
normalizer.observe(state)
|
||||
state = normalizer.normalize(state)
|
||||
action = policy.evaluate(state, delta, direction, hp)
|
||||
state, reward, done, _ = env.step(action)
|
||||
reward = max(min(reward, 1), -1)
|
||||
sum_rewards += reward
|
||||
num_plays += 1
|
||||
return sum_rewards
|
||||
|
||||
|
||||
# Training the AI
|
||||
|
||||
|
||||
def train(env, policy, normalizer, hp, parentPipes, args):
|
||||
|
||||
for step in range(hp.nb_steps):
|
||||
|
||||
# Initializing the perturbations deltas and the positive/negative rewards
|
||||
deltas = policy.sample_deltas()
|
||||
positive_rewards = [0] * hp.nb_directions
|
||||
negative_rewards = [0] * hp.nb_directions
|
||||
|
||||
if parentPipes:
|
||||
for k in range(hp.nb_directions):
|
||||
parentPipe = parentPipes[k]
|
||||
parentPipe.send([_EXPLORE,[normalizer, policy, hp, "positive", deltas[k]]])
|
||||
for k in range(hp.nb_directions):
|
||||
positive_rewards[k] = parentPipes[k].recv()[0]
|
||||
|
||||
for k in range(hp.nb_directions):
|
||||
parentPipe = parentPipes[k]
|
||||
parentPipe.send([_EXPLORE,[normalizer, policy, hp, "negative", deltas[k]]])
|
||||
for k in range(hp.nb_directions):
|
||||
negative_rewards[k] = parentPipes[k].recv()[0]
|
||||
|
||||
else:
|
||||
# Getting the positive rewards in the positive directions
|
||||
for k in range(hp.nb_directions):
|
||||
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
|
||||
|
||||
|
||||
# Getting the negative rewards in the negative/opposite directions
|
||||
for k in range(hp.nb_directions):
|
||||
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
|
||||
|
||||
|
||||
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
|
||||
all_rewards = np.array(positive_rewards + negative_rewards)
|
||||
sigma_r = all_rewards.std()
|
||||
|
||||
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
|
||||
scores = {k:max(r_pos, r_neg) for k,(r_pos,r_neg) in enumerate(zip(positive_rewards, negative_rewards))}
|
||||
order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions]
|
||||
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
|
||||
|
||||
# Updating our policy
|
||||
policy.update(rollouts, sigma_r, args)
|
||||
|
||||
# Printing the final reward of the policy after the update
|
||||
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
|
||||
print('Step:', step, 'Reward:', reward_evaluation)
|
||||
|
||||
for step in range(hp.nb_steps):
|
||||
|
||||
# Initializing the perturbations deltas and the positive/negative rewards
|
||||
deltas = policy.sample_deltas()
|
||||
positive_rewards = [0] * hp.nb_directions
|
||||
negative_rewards = [0] * hp.nb_directions
|
||||
|
||||
if parentPipes:
|
||||
for k in range(hp.nb_directions):
|
||||
parentPipe = parentPipes[k]
|
||||
parentPipe.send([_EXPLORE, [normalizer, policy, hp, "positive", deltas[k]]])
|
||||
for k in range(hp.nb_directions):
|
||||
positive_rewards[k] = parentPipes[k].recv()[0]
|
||||
|
||||
for k in range(hp.nb_directions):
|
||||
parentPipe = parentPipes[k]
|
||||
parentPipe.send([_EXPLORE, [normalizer, policy, hp, "negative", deltas[k]]])
|
||||
for k in range(hp.nb_directions):
|
||||
negative_rewards[k] = parentPipes[k].recv()[0]
|
||||
|
||||
else:
|
||||
# Getting the positive rewards in the positive directions
|
||||
for k in range(hp.nb_directions):
|
||||
positive_rewards[k] = explore(env, normalizer, policy, "positive", deltas[k], hp)
|
||||
|
||||
# Getting the negative rewards in the negative/opposite directions
|
||||
for k in range(hp.nb_directions):
|
||||
negative_rewards[k] = explore(env, normalizer, policy, "negative", deltas[k], hp)
|
||||
|
||||
# Gathering all the positive/negative rewards to compute the standard deviation of these rewards
|
||||
all_rewards = np.array(positive_rewards + negative_rewards)
|
||||
sigma_r = all_rewards.std()
|
||||
|
||||
# Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
|
||||
scores = {
|
||||
k: max(r_pos, r_neg)
|
||||
for k, (r_pos, r_neg) in enumerate(zip(positive_rewards, negative_rewards))
|
||||
}
|
||||
order = sorted(scores.keys(), key=lambda x: scores[x])[:hp.nb_best_directions]
|
||||
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
|
||||
|
||||
# Updating our policy
|
||||
policy.update(rollouts, sigma_r, args)
|
||||
|
||||
# Printing the final reward of the policy after the update
|
||||
reward_evaluation = explore(env, normalizer, policy, None, None, hp)
|
||||
print('Step:', step, 'Reward:', reward_evaluation)
|
||||
|
||||
|
||||
# Running the main code
|
||||
|
||||
|
||||
def mkdir(base, name):
|
||||
path = os.path.join(base, name)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
return path
|
||||
|
||||
|
||||
path = os.path.join(base, name)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
return path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mp.freeze_support()
|
||||
mp.freeze_support()
|
||||
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--env', help='Gym environment name', type=str, default='HalfCheetahBulletEnv-v0')
|
||||
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
|
||||
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
|
||||
parser.add_argument('--movie',help='rgb_array gym movie',type=int, default=0)
|
||||
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
|
||||
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
|
||||
parser.add_argument('--logdir', help='Directory root to log policy files (npy)', type=str, default='.')
|
||||
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
hp = Hp()
|
||||
hp.env_name = args.env
|
||||
hp.seed = args.seed
|
||||
hp.nb_steps = args.steps
|
||||
print("seed = ", hp.seed)
|
||||
np.random.seed(hp.seed)
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--env',
|
||||
help='Gym environment name',
|
||||
type=str,
|
||||
default='HalfCheetahBulletEnv-v0')
|
||||
parser.add_argument('--seed', help='RNG seed', type=int, default=1)
|
||||
parser.add_argument('--render', help='OpenGL Visualizer', type=int, default=0)
|
||||
parser.add_argument('--movie', help='rgb_array gym movie', type=int, default=0)
|
||||
parser.add_argument('--steps', help='Number of steps', type=int, default=10000)
|
||||
parser.add_argument('--policy', help='Starting policy file (npy)', type=str, default='')
|
||||
parser.add_argument('--logdir',
|
||||
help='Directory root to log policy files (npy)',
|
||||
type=str,
|
||||
default='.')
|
||||
parser.add_argument('--mp', help='Enable multiprocessing', type=int, default=1)
|
||||
|
||||
parentPipes = None
|
||||
if args.mp:
|
||||
num_processes = hp.nb_directions
|
||||
processes = []
|
||||
childPipes = []
|
||||
parentPipes = []
|
||||
|
||||
for pr in range (num_processes):
|
||||
parentPipe, childPipe = Pipe()
|
||||
parentPipes.append(parentPipe)
|
||||
childPipes.append(childPipe)
|
||||
|
||||
for rank in range(num_processes):
|
||||
p = mp.Process(target=ExploreWorker, args=(rank,childPipes[rank], hp.env_name, args))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
|
||||
work_dir = mkdir('exp', 'brs')
|
||||
monitor_dir = mkdir(work_dir, 'monitor')
|
||||
env = gym.make(hp.env_name)
|
||||
if args.render:
|
||||
env.render(mode = "human")
|
||||
if args.movie:
|
||||
env = wrappers.Monitor(env, monitor_dir, force = True)
|
||||
nb_inputs = env.observation_space.shape[0]
|
||||
nb_outputs = env.action_space.shape[0]
|
||||
policy = Policy(nb_inputs, nb_outputs,hp.env_name, args)
|
||||
normalizer = Normalizer(nb_inputs)
|
||||
|
||||
print("start training")
|
||||
train(env, policy, normalizer, hp, parentPipes, args)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mp:
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_CLOSE,"pay2"])
|
||||
|
||||
for p in processes:
|
||||
p.join()
|
||||
hp = Hp()
|
||||
hp.env_name = args.env
|
||||
hp.seed = args.seed
|
||||
hp.nb_steps = args.steps
|
||||
print("seed = ", hp.seed)
|
||||
np.random.seed(hp.seed)
|
||||
|
||||
parentPipes = None
|
||||
if args.mp:
|
||||
num_processes = hp.nb_directions
|
||||
processes = []
|
||||
childPipes = []
|
||||
parentPipes = []
|
||||
|
||||
for pr in range(num_processes):
|
||||
parentPipe, childPipe = Pipe()
|
||||
parentPipes.append(parentPipe)
|
||||
childPipes.append(childPipe)
|
||||
|
||||
for rank in range(num_processes):
|
||||
p = mp.Process(target=ExploreWorker, args=(rank, childPipes[rank], hp.env_name, args))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
|
||||
work_dir = mkdir('exp', 'brs')
|
||||
monitor_dir = mkdir(work_dir, 'monitor')
|
||||
env = gym.make(hp.env_name)
|
||||
if args.render:
|
||||
env.render(mode="human")
|
||||
if args.movie:
|
||||
env = wrappers.Monitor(env, monitor_dir, force=True)
|
||||
nb_inputs = env.observation_space.shape[0]
|
||||
nb_outputs = env.action_space.shape[0]
|
||||
policy = Policy(nb_inputs, nb_outputs, hp.env_name, args)
|
||||
normalizer = Normalizer(nb_inputs)
|
||||
|
||||
print("start training")
|
||||
train(env, policy, normalizer, hp, parentPipes, args)
|
||||
|
||||
if args.mp:
|
||||
for parentPipe in parentPipes:
|
||||
parentPipe.send([_CLOSE, "pay2"])
|
||||
|
||||
for p in processes:
|
||||
p.join()
|
||||
|
||||
@@ -1,25 +1,28 @@
|
||||
import gym
|
||||
from gym.envs.registration import registry, make, spec
|
||||
def register(id,*args,**kvargs):
|
||||
if id in registry.env_specs:
|
||||
return
|
||||
else:
|
||||
return gym.envs.registration.register(id,*args,**kvargs)
|
||||
|
||||
|
||||
def register(id, *args, **kvargs):
|
||||
if id in registry.env_specs:
|
||||
return
|
||||
else:
|
||||
return gym.envs.registration.register(id, *args, **kvargs)
|
||||
|
||||
|
||||
# ------------bullet-------------
|
||||
|
||||
register(
|
||||
id='HumanoidDeepMimicBulletEnv-v1',
|
||||
entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=20000.0,
|
||||
id='HumanoidDeepMimicBulletEnv-v1',
|
||||
entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=20000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='CartPoleBulletEnv-v1',
|
||||
entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=190.0,
|
||||
id='CartPoleBulletEnv-v1',
|
||||
entry_point='pybullet_envs.bullet:CartPoleBulletEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=190.0,
|
||||
)
|
||||
|
||||
register(
|
||||
@@ -36,7 +39,6 @@ register(
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
|
||||
register(
|
||||
id='MinitaurReactiveEnv-v0',
|
||||
entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
|
||||
@@ -44,7 +46,6 @@ register(
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
|
||||
register(
|
||||
id='MinitaurBallGymEnv-v0',
|
||||
entry_point='pybullet_envs.minitaur.envs:MinitaurBallGymEnv',
|
||||
@@ -52,7 +53,6 @@ register(
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
|
||||
register(
|
||||
id='MinitaurTrottingEnv-v0',
|
||||
entry_point='pybullet_envs.minitaur.envs:MinitaurTrottingEnv',
|
||||
@@ -81,8 +81,6 @@ register(
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
|
||||
|
||||
register(
|
||||
id='RacecarBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:RacecarGymEnv',
|
||||
@@ -91,128 +89,113 @@ register(
|
||||
)
|
||||
|
||||
register(
|
||||
id='RacecarZedBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
|
||||
register(
|
||||
id='KukaBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
id='RacecarZedBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:RacecarZEDGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='KukaCamBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaCamGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
id='KukaBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='KukaDiverseObjectGrasping-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
id='KukaCamBulletEnv-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaCamGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedPendulumBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedDoublePendulumBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=9100.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedPendulumSwingupBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ReacherBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
|
||||
max_episode_steps=150,
|
||||
reward_threshold=18.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='PusherBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
|
||||
max_episode_steps=150,
|
||||
reward_threshold=18.0,
|
||||
id='KukaDiverseObjectGrasping-v0',
|
||||
entry_point='pybullet_envs.bullet:KukaDiverseObjectEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=5.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ThrowerBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=18.0,
|
||||
id='InvertedPendulumBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='StrikerBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=18.0,
|
||||
id='InvertedDoublePendulumBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedDoublePendulumBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=9100.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Walker2DBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
register(
|
||||
id='HalfCheetahBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3000.0
|
||||
)
|
||||
id='InvertedPendulumSwingupBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_pendulum_envs:InvertedPendulumSwingupBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='AntBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
id='ReacherBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:ReacherBulletEnv',
|
||||
max_episode_steps=150,
|
||||
reward_threshold=18.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HopperBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0
|
||||
)
|
||||
id='PusherBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:PusherBulletEnv',
|
||||
max_episode_steps=150,
|
||||
reward_threshold=18.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
|
||||
max_episode_steps=1000
|
||||
)
|
||||
id='ThrowerBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:ThrowerBulletEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=18.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidFlagrunBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2000.0
|
||||
)
|
||||
id='StrikerBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_manipulator_envs:StrikerBulletEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=18.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidFlagrunHarderBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
|
||||
max_episode_steps=1000
|
||||
)
|
||||
register(id='Walker2DBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:Walker2DBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0)
|
||||
register(id='HalfCheetahBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HalfCheetahBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3000.0)
|
||||
|
||||
register(id='AntBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:AntBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0)
|
||||
|
||||
register(id='HopperBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HopperBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2500.0)
|
||||
|
||||
register(id='HumanoidBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidBulletEnv',
|
||||
max_episode_steps=1000)
|
||||
|
||||
register(id='HumanoidFlagrunBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunBulletEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=2000.0)
|
||||
|
||||
register(id='HumanoidFlagrunHarderBulletEnv-v0',
|
||||
entry_point='pybullet_envs.gym_locomotion_envs:HumanoidFlagrunHarderBulletEnv',
|
||||
max_episode_steps=1000)
|
||||
|
||||
#register(
|
||||
# id='AtlasBulletEnv-v0',
|
||||
@@ -220,6 +203,7 @@ register(
|
||||
# max_episode_steps=1000
|
||||
# )
|
||||
|
||||
|
||||
def getList():
|
||||
btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet')>=0]
|
||||
return btenvs
|
||||
btenvs = ['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet') >= 0]
|
||||
return btenvs
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Executable scripts for reinforcement learning."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Example configurations using the PPO algorithm."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -29,6 +28,7 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
|
||||
import pybullet_envs
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def default():
|
||||
"""Default configuration for PPO."""
|
||||
# General
|
||||
@@ -38,10 +38,7 @@ def default():
|
||||
use_gpu = False
|
||||
# Network
|
||||
network = networks.feed_forward_gaussian
|
||||
weight_summaries = dict(
|
||||
all=r'.*',
|
||||
policy=r'.*/policy/.*',
|
||||
value=r'.*/value/.*')
|
||||
weight_summaries = dict(all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
|
||||
policy_layers = 200, 100
|
||||
value_layers = 200, 100
|
||||
init_mean_factor = 0.1
|
||||
@@ -52,7 +49,7 @@ def default():
|
||||
optimizer = tf.train.AdamOptimizer
|
||||
update_epochs_policy = 64
|
||||
update_epochs_value = 64
|
||||
learning_rate = 1e-4
|
||||
learning_rate = 1e-4
|
||||
# Losses
|
||||
discount = 0.995
|
||||
kl_target = 1e-2
|
||||
@@ -69,6 +66,7 @@ def pybullet_pendulum():
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_doublependulum():
|
||||
locals().update(default())
|
||||
env = 'InvertedDoublePendulumBulletEnv-v0'
|
||||
@@ -76,6 +74,7 @@ def pybullet_doublependulum():
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_pendulumswingup():
|
||||
locals().update(default())
|
||||
env = 'InvertedPendulumSwingupBulletEnv-v0'
|
||||
@@ -83,6 +82,7 @@ def pybullet_pendulumswingup():
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_cheetah():
|
||||
"""Configuration for MuJoCo's half cheetah task."""
|
||||
locals().update(default())
|
||||
@@ -92,6 +92,7 @@ def pybullet_cheetah():
|
||||
steps = 1e8 # 100M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_ant():
|
||||
locals().update(default())
|
||||
env = 'AntBulletEnv-v0'
|
||||
@@ -99,6 +100,7 @@ def pybullet_ant():
|
||||
steps = 5e7 # 50M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_kuka_grasping():
|
||||
"""Configuration for Bullet Kuka grasping task."""
|
||||
locals().update(default())
|
||||
@@ -113,7 +115,7 @@ def pybullet_racecar():
|
||||
"""Configuration for Bullet MIT Racecar task."""
|
||||
locals().update(default())
|
||||
# Environment
|
||||
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
|
||||
env = 'RacecarBulletEnv-v0' #functools.partial(racecarGymEnv.RacecarGymEnv, isDiscrete=False, renders=True)
|
||||
max_length = 10
|
||||
steps = 1e7 # 10M
|
||||
return locals()
|
||||
@@ -132,29 +134,27 @@ def pybullet_minitaur():
|
||||
"""Configuration specific to minitaur_gym_env.MinitaurBulletEnv class."""
|
||||
locals().update(default())
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
env = functools.partial(
|
||||
minitaur_gym_env.MinitaurBulletEnv,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
pd_control_enabled=True,
|
||||
env_randomizer=randomizer,
|
||||
render=False)
|
||||
env = functools.partial(minitaur_gym_env.MinitaurBulletEnv,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
pd_control_enabled=True,
|
||||
env_randomizer=randomizer,
|
||||
render=False)
|
||||
max_length = 1000
|
||||
steps = 3e7 # 30M
|
||||
return locals()
|
||||
|
||||
|
||||
def pybullet_duck_minitaur():
|
||||
"""Configuration specific to minitaur_gym_env.MinitaurBulletDuckEnv class."""
|
||||
locals().update(default())
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
env = functools.partial(
|
||||
minitaur_gym_env.MinitaurBulletDuckEnv,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
pd_control_enabled=True,
|
||||
env_randomizer=randomizer,
|
||||
render=False)
|
||||
env = functools.partial(minitaur_gym_env.MinitaurBulletDuckEnv,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
pd_control_enabled=True,
|
||||
env_randomizer=randomizer,
|
||||
render=False)
|
||||
max_length = 1000
|
||||
steps = 3e7 # 30M
|
||||
return locals()
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Network definitions for the PPO algorithm."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -24,13 +23,10 @@ import operator
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
NetworkOutput = collections.namedtuple(
|
||||
'NetworkOutput', 'policy, mean, logstd, value, state')
|
||||
NetworkOutput = collections.namedtuple('NetworkOutput', 'policy, mean, logstd, value, state')
|
||||
|
||||
|
||||
def feed_forward_gaussian(
|
||||
config, action_size, observations, unused_length, state=None):
|
||||
def feed_forward_gaussian(config, action_size, observations, unused_length, state=None):
|
||||
"""Independent feed forward networks for policy and value.
|
||||
|
||||
The policy network outputs the mean action and the log standard deviation
|
||||
@@ -50,20 +46,22 @@ def feed_forward_gaussian(
|
||||
factor=config.init_mean_factor)
|
||||
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
|
||||
flat_observations = tf.reshape(observations, [
|
||||
tf.shape(observations)[0], tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
|
||||
tf.shape(observations)[0],
|
||||
tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul,
|
||||
observations.shape.as_list()[2:], 1)
|
||||
])
|
||||
with tf.variable_scope('policy'):
|
||||
x = flat_observations
|
||||
for size in config.policy_layers:
|
||||
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
|
||||
mean = tf.contrib.layers.fully_connected(
|
||||
x, action_size, tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable(
|
||||
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(
|
||||
logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
mean = tf.contrib.layers.fully_connected(x,
|
||||
action_size,
|
||||
tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
with tf.variable_scope('value'):
|
||||
x = flat_observations
|
||||
for size in config.value_layers:
|
||||
@@ -72,13 +70,11 @@ def feed_forward_gaussian(
|
||||
mean = tf.check_numerics(mean, 'mean')
|
||||
logstd = tf.check_numerics(logstd, 'logstd')
|
||||
value = tf.check_numerics(value, 'value')
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(
|
||||
mean, tf.exp(logstd))
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
|
||||
return NetworkOutput(policy, mean, logstd, value, state)
|
||||
|
||||
|
||||
def recurrent_gaussian(
|
||||
config, action_size, observations, length, state=None):
|
||||
def recurrent_gaussian(config, action_size, observations, length, state=None):
|
||||
"""Independent recurrent policy and feed forward value networks.
|
||||
|
||||
The policy network outputs the mean action and the log standard deviation
|
||||
@@ -100,21 +96,23 @@ def recurrent_gaussian(
|
||||
logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
|
||||
cell = tf.contrib.rnn.GRUBlockCell(config.policy_layers[-1])
|
||||
flat_observations = tf.reshape(observations, [
|
||||
tf.shape(observations)[0], tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])
|
||||
tf.shape(observations)[0],
|
||||
tf.shape(observations)[1],
|
||||
functools.reduce(operator.mul,
|
||||
observations.shape.as_list()[2:], 1)
|
||||
])
|
||||
with tf.variable_scope('policy'):
|
||||
x = flat_observations
|
||||
for size in config.policy_layers[:-1]:
|
||||
x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
|
||||
x, state = tf.nn.dynamic_rnn(cell, x, length, state, tf.float32)
|
||||
mean = tf.contrib.layers.fully_connected(
|
||||
x, action_size, tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable(
|
||||
'logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(
|
||||
logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
mean = tf.contrib.layers.fully_connected(x,
|
||||
action_size,
|
||||
tf.tanh,
|
||||
weights_initializer=mean_weights_initializer)
|
||||
logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer)
|
||||
logstd = tf.tile(logstd[None, None],
|
||||
[tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
|
||||
with tf.variable_scope('value'):
|
||||
x = flat_observations
|
||||
for size in config.value_layers:
|
||||
@@ -123,7 +121,6 @@ def recurrent_gaussian(
|
||||
mean = tf.check_numerics(mean, 'mean')
|
||||
logstd = tf.check_numerics(logstd, 'logstd')
|
||||
value = tf.check_numerics(value, 'value')
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(
|
||||
mean, tf.exp(logstd))
|
||||
policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))
|
||||
# assert state.shape.as_list()[0] is not None
|
||||
return NetworkOutput(policy, mean, logstd, value, state)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Proximal Policy Optimization algorithm."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Proximal Policy Optimization algorithm.
|
||||
|
||||
Based on John Schulman's implementation in Python and Theano:
|
||||
@@ -49,51 +48,51 @@ class PPOAlgorithm(object):
|
||||
self._is_training = is_training
|
||||
self._should_log = should_log
|
||||
self._config = config
|
||||
self._observ_filter = normalize.StreamingNormalize(
|
||||
self._batch_env.observ[0], center=True, scale=True, clip=5,
|
||||
name='normalize_observ')
|
||||
self._reward_filter = normalize.StreamingNormalize(
|
||||
self._batch_env.reward[0], center=False, scale=True, clip=10,
|
||||
name='normalize_reward')
|
||||
self._observ_filter = normalize.StreamingNormalize(self._batch_env.observ[0],
|
||||
center=True,
|
||||
scale=True,
|
||||
clip=5,
|
||||
name='normalize_observ')
|
||||
self._reward_filter = normalize.StreamingNormalize(self._batch_env.reward[0],
|
||||
center=False,
|
||||
scale=True,
|
||||
clip=10,
|
||||
name='normalize_reward')
|
||||
# Memory stores tuple of observ, action, mean, logstd, reward.
|
||||
template = (
|
||||
self._batch_env.observ[0], self._batch_env.action[0],
|
||||
self._batch_env.action[0], self._batch_env.action[0],
|
||||
self._batch_env.reward[0])
|
||||
self._memory = memory.EpisodeMemory(
|
||||
template, config.update_every, config.max_length, 'memory')
|
||||
template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0],
|
||||
self._batch_env.action[0], self._batch_env.reward[0])
|
||||
self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory')
|
||||
self._memory_index = tf.Variable(0, False)
|
||||
use_gpu = self._config.use_gpu and utility.available_gpus()
|
||||
with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
|
||||
# Create network variables for later calls to reuse.
|
||||
action_size = self._batch_env.action.shape[1].value
|
||||
self._network = tf.make_template(
|
||||
'network', functools.partial(config.network, config, action_size))
|
||||
self._network = tf.make_template('network',
|
||||
functools.partial(config.network, config, action_size))
|
||||
output = self._network(
|
||||
tf.zeros_like(self._batch_env.observ)[:, None],
|
||||
tf.ones(len(self._batch_env)))
|
||||
tf.zeros_like(self._batch_env.observ)[:, None], tf.ones(len(self._batch_env)))
|
||||
with tf.variable_scope('ppo_temporary'):
|
||||
self._episodes = memory.EpisodeMemory(
|
||||
template, len(batch_env), config.max_length, 'episodes')
|
||||
self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length,
|
||||
'episodes')
|
||||
if output.state is None:
|
||||
self._last_state = None
|
||||
else:
|
||||
# Ensure the batch dimension is set.
|
||||
tf.contrib.framework.nest.map_structure(
|
||||
lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]),
|
||||
output.state)
|
||||
lambda x: x.set_shape([len(batch_env)] + x.shape.as_list()[1:]), output.state)
|
||||
# pylint: disable=undefined-variable
|
||||
self._last_state = tf.contrib.framework.nest.map_structure(
|
||||
lambda x: tf.Variable(lambda: tf.zeros_like(x), False),
|
||||
output.state)
|
||||
self._last_action = tf.Variable(
|
||||
tf.zeros_like(self._batch_env.action), False, name='last_action')
|
||||
self._last_mean = tf.Variable(
|
||||
tf.zeros_like(self._batch_env.action), False, name='last_mean')
|
||||
self._last_logstd = tf.Variable(
|
||||
tf.zeros_like(self._batch_env.action), False, name='last_logstd')
|
||||
self._penalty = tf.Variable(
|
||||
self._config.kl_init_penalty, False, dtype=tf.float32)
|
||||
lambda x: tf.Variable(lambda: tf.zeros_like(x), False), output.state)
|
||||
self._last_action = tf.Variable(tf.zeros_like(self._batch_env.action),
|
||||
False,
|
||||
name='last_action')
|
||||
self._last_mean = tf.Variable(tf.zeros_like(self._batch_env.action),
|
||||
False,
|
||||
name='last_mean')
|
||||
self._last_logstd = tf.Variable(tf.zeros_like(self._batch_env.action),
|
||||
False,
|
||||
name='last_logstd')
|
||||
self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32)
|
||||
self._optimizer = self._config.optimizer(self._config.learning_rate)
|
||||
|
||||
def begin_episode(self, agent_indices):
|
||||
@@ -109,8 +108,7 @@ class PPOAlgorithm(object):
|
||||
if self._last_state is None:
|
||||
reset_state = tf.no_op()
|
||||
else:
|
||||
reset_state = utility.reinit_nested_vars(
|
||||
self._last_state, agent_indices)
|
||||
reset_state = utility.reinit_nested_vars(self._last_state, agent_indices)
|
||||
reset_buffer = self._episodes.clear(agent_indices)
|
||||
with tf.control_dependencies([reset_state, reset_buffer]):
|
||||
return tf.constant('')
|
||||
@@ -130,36 +128,33 @@ class PPOAlgorithm(object):
|
||||
if self._last_state is None:
|
||||
state = None
|
||||
else:
|
||||
state = tf.contrib.framework.nest.map_structure(
|
||||
lambda x: tf.gather(x, agent_indices), self._last_state)
|
||||
state = tf.contrib.framework.nest.map_structure(lambda x: tf.gather(x, agent_indices),
|
||||
self._last_state)
|
||||
output = self._network(observ[:, None], tf.ones(observ.shape[0]), state)
|
||||
action = tf.cond(
|
||||
self._is_training, output.policy.sample, lambda: output.mean)
|
||||
action = tf.cond(self._is_training, output.policy.sample, lambda: output.mean)
|
||||
logprob = output.policy.log_prob(action)[:, 0]
|
||||
# pylint: disable=g-long-lambda
|
||||
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
|
||||
tf.summary.histogram('mean', output.mean[:, 0]),
|
||||
tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
|
||||
tf.summary.histogram('action', action[:, 0]),
|
||||
tf.summary.histogram('logprob', logprob)]), str)
|
||||
summary = tf.cond(
|
||||
self._should_log, lambda: tf.summary.merge([
|
||||
tf.summary.histogram('mean', output.mean[:, 0]),
|
||||
tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
|
||||
tf.summary.histogram('action', action[:, 0]),
|
||||
tf.summary.histogram('logprob', logprob)
|
||||
]), str)
|
||||
# Remember current policy to append to memory in the experience callback.
|
||||
if self._last_state is None:
|
||||
assign_state = tf.no_op()
|
||||
else:
|
||||
assign_state = utility.assign_nested_vars(
|
||||
self._last_state, output.state, agent_indices)
|
||||
assign_state = utility.assign_nested_vars(self._last_state, output.state, agent_indices)
|
||||
with tf.control_dependencies([
|
||||
assign_state,
|
||||
tf.scatter_update(
|
||||
self._last_action, agent_indices, action[:, 0]),
|
||||
tf.scatter_update(
|
||||
self._last_mean, agent_indices, output.mean[:, 0]),
|
||||
tf.scatter_update(
|
||||
self._last_logstd, agent_indices, output.logstd[:, 0])]):
|
||||
tf.scatter_update(self._last_action, agent_indices, action[:, 0]),
|
||||
tf.scatter_update(self._last_mean, agent_indices, output.mean[:, 0]),
|
||||
tf.scatter_update(self._last_logstd, agent_indices, output.logstd[:, 0])
|
||||
]):
|
||||
return tf.check_numerics(action[:, 0], 'action'), tf.identity(summary)
|
||||
|
||||
def experience(
|
||||
self, agent_indices, observ, action, reward, unused_done, unused_nextob):
|
||||
def experience(self, agent_indices, observ, action, reward, unused_done, unused_nextob):
|
||||
"""Process the transition tuple of the current step.
|
||||
|
||||
When training, add the current transition tuple to the memory and update
|
||||
@@ -181,34 +176,36 @@ class PPOAlgorithm(object):
|
||||
return tf.cond(
|
||||
self._is_training,
|
||||
# pylint: disable=g-long-lambda
|
||||
lambda: self._define_experience(
|
||||
agent_indices, observ, action, reward), str)
|
||||
lambda: self._define_experience(agent_indices, observ, action, reward),
|
||||
str)
|
||||
|
||||
def _define_experience(self, agent_indices, observ, action, reward):
|
||||
"""Implement the branch of experience() entered during training."""
|
||||
update_filters = tf.summary.merge([
|
||||
self._observ_filter.update(observ),
|
||||
self._reward_filter.update(reward)])
|
||||
update_filters = tf.summary.merge(
|
||||
[self._observ_filter.update(observ),
|
||||
self._reward_filter.update(reward)])
|
||||
with tf.control_dependencies([update_filters]):
|
||||
if self._config.train_on_agent_action:
|
||||
# NOTE: Doesn't seem to change much.
|
||||
action = self._last_action
|
||||
batch = (
|
||||
observ, action, tf.gather(self._last_mean, agent_indices),
|
||||
tf.gather(self._last_logstd, agent_indices), reward)
|
||||
batch = (observ, action, tf.gather(self._last_mean,
|
||||
agent_indices), tf.gather(self._last_logstd,
|
||||
agent_indices), reward)
|
||||
append = self._episodes.append(batch, agent_indices)
|
||||
with tf.control_dependencies([append]):
|
||||
norm_observ = self._observ_filter.transform(observ)
|
||||
norm_reward = tf.reduce_mean(self._reward_filter.transform(reward))
|
||||
# pylint: disable=g-long-lambda
|
||||
summary = tf.cond(self._should_log, lambda: tf.summary.merge([
|
||||
update_filters,
|
||||
self._observ_filter.summary(),
|
||||
self._reward_filter.summary(),
|
||||
tf.summary.scalar('memory_size', self._memory_index),
|
||||
tf.summary.histogram('normalized_observ', norm_observ),
|
||||
tf.summary.histogram('action', self._last_action),
|
||||
tf.summary.scalar('normalized_reward', norm_reward)]), str)
|
||||
summary = tf.cond(
|
||||
self._should_log, lambda: tf.summary.merge([
|
||||
update_filters,
|
||||
self._observ_filter.summary(),
|
||||
self._reward_filter.summary(),
|
||||
tf.summary.scalar('memory_size', self._memory_index),
|
||||
tf.summary.histogram('normalized_observ', norm_observ),
|
||||
tf.summary.histogram('action', self._last_action),
|
||||
tf.summary.scalar('normalized_reward', norm_reward)
|
||||
]), str)
|
||||
return summary
|
||||
|
||||
def end_episode(self, agent_indices):
|
||||
@@ -226,20 +223,16 @@ class PPOAlgorithm(object):
|
||||
Summary tensor.
|
||||
"""
|
||||
with tf.name_scope('end_episode/'):
|
||||
return tf.cond(
|
||||
self._is_training,
|
||||
lambda: self._define_end_episode(agent_indices), str)
|
||||
return tf.cond(self._is_training, lambda: self._define_end_episode(agent_indices), str)
|
||||
|
||||
def _define_end_episode(self, agent_indices):
|
||||
"""Implement the branch of end_episode() entered during training."""
|
||||
episodes, length = self._episodes.data(agent_indices)
|
||||
space_left = self._config.update_every - self._memory_index
|
||||
use_episodes = tf.range(tf.minimum(
|
||||
tf.shape(agent_indices)[0], space_left))
|
||||
use_episodes = tf.range(tf.minimum(tf.shape(agent_indices)[0], space_left))
|
||||
episodes = [tf.gather(elem, use_episodes) for elem in episodes]
|
||||
append = self._memory.replace(
|
||||
episodes, tf.gather(length, use_episodes),
|
||||
use_episodes + self._memory_index)
|
||||
append = self._memory.replace(episodes, tf.gather(length, use_episodes),
|
||||
use_episodes + self._memory_index)
|
||||
with tf.control_dependencies([append]):
|
||||
inc_index = self._memory_index.assign_add(tf.shape(use_episodes)[0])
|
||||
with tf.control_dependencies([inc_index]):
|
||||
@@ -256,8 +249,7 @@ class PPOAlgorithm(object):
|
||||
Summary tensor.
|
||||
"""
|
||||
with tf.name_scope('training'):
|
||||
assert_full = tf.assert_equal(
|
||||
self._memory_index, self._config.update_every)
|
||||
assert_full = tf.assert_equal(self._memory_index, self._config.update_every)
|
||||
with tf.control_dependencies([assert_full]):
|
||||
data = self._memory.data()
|
||||
(observ, action, old_mean, old_logstd, reward), length = data
|
||||
@@ -265,22 +257,18 @@ class PPOAlgorithm(object):
|
||||
length = tf.identity(length)
|
||||
observ = self._observ_filter.transform(observ)
|
||||
reward = self._reward_filter.transform(reward)
|
||||
update_summary = self._perform_update_steps(
|
||||
observ, action, old_mean, old_logstd, reward, length)
|
||||
update_summary = self._perform_update_steps(observ, action, old_mean, old_logstd, reward,
|
||||
length)
|
||||
with tf.control_dependencies([update_summary]):
|
||||
penalty_summary = self._adjust_penalty(
|
||||
observ, old_mean, old_logstd, length)
|
||||
penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length)
|
||||
with tf.control_dependencies([penalty_summary]):
|
||||
clear_memory = tf.group(
|
||||
self._memory.clear(), self._memory_index.assign(0))
|
||||
clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0))
|
||||
with tf.control_dependencies([clear_memory]):
|
||||
weight_summary = utility.variable_summaries(
|
||||
tf.trainable_variables(), self._config.weight_summaries)
|
||||
return tf.summary.merge([
|
||||
update_summary, penalty_summary, weight_summary])
|
||||
weight_summary = utility.variable_summaries(tf.trainable_variables(),
|
||||
self._config.weight_summaries)
|
||||
return tf.summary.merge([update_summary, penalty_summary, weight_summary])
|
||||
|
||||
def _perform_update_steps(
|
||||
self, observ, action, old_mean, old_logstd, reward, length):
|
||||
def _perform_update_steps(self, observ, action, old_mean, old_logstd, reward, length):
|
||||
"""Perform multiple update steps of value function and policy.
|
||||
|
||||
The advantage is computed once at the beginning and shared across
|
||||
@@ -298,37 +286,29 @@ class PPOAlgorithm(object):
|
||||
Returns:
|
||||
Summary tensor.
|
||||
"""
|
||||
return_ = utility.discounted_return(
|
||||
reward, length, self._config.discount)
|
||||
return_ = utility.discounted_return(reward, length, self._config.discount)
|
||||
value = self._network(observ, length).value
|
||||
if self._config.gae_lambda:
|
||||
advantage = utility.lambda_return(
|
||||
reward, value, length, self._config.discount,
|
||||
self._config.gae_lambda)
|
||||
advantage = utility.lambda_return(reward, value, length, self._config.discount,
|
||||
self._config.gae_lambda)
|
||||
else:
|
||||
advantage = return_ - value
|
||||
mean, variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True)
|
||||
advantage = (advantage - mean) / (tf.sqrt(variance) + 1e-8)
|
||||
advantage = tf.Print(
|
||||
advantage, [tf.reduce_mean(return_), tf.reduce_mean(value)],
|
||||
'return and value: ')
|
||||
advantage = tf.Print(
|
||||
advantage, [tf.reduce_mean(advantage)],
|
||||
'normalized advantage: ')
|
||||
advantage = tf.Print(advantage,
|
||||
[tf.reduce_mean(return_), tf.reduce_mean(value)], 'return and value: ')
|
||||
advantage = tf.Print(advantage, [tf.reduce_mean(advantage)], 'normalized advantage: ')
|
||||
# pylint: disable=g-long-lambda
|
||||
value_loss, policy_loss, summary = tf.scan(
|
||||
lambda _1, _2: self._update_step(
|
||||
observ, action, old_mean, old_logstd, reward, advantage, length),
|
||||
tf.range(self._config.update_epochs),
|
||||
[0., 0., ''], parallel_iterations=1)
|
||||
print_losses = tf.group(
|
||||
tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
|
||||
tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
|
||||
value_loss, policy_loss, summary = tf.scan(lambda _1, _2: self._update_step(
|
||||
observ, action, old_mean, old_logstd, reward, advantage, length),
|
||||
tf.range(self._config.update_epochs), [0., 0., ''],
|
||||
parallel_iterations=1)
|
||||
print_losses = tf.group(tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
|
||||
tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
|
||||
with tf.control_dependencies([value_loss, policy_loss, print_losses]):
|
||||
return summary[self._config.update_epochs // 2]
|
||||
|
||||
def _update_step(
|
||||
self, observ, action, old_mean, old_logstd, reward, advantage, length):
|
||||
def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length):
|
||||
"""Compute the current combined loss and perform a gradient update step.
|
||||
|
||||
Args:
|
||||
@@ -345,27 +325,20 @@ class PPOAlgorithm(object):
|
||||
"""
|
||||
value_loss, value_summary = self._value_loss(observ, reward, length)
|
||||
network = self._network(observ, length)
|
||||
policy_loss, policy_summary = self._policy_loss(
|
||||
network.mean, network.logstd, old_mean, old_logstd, action,
|
||||
advantage, length)
|
||||
value_gradients, value_variables = (
|
||||
zip(*self._optimizer.compute_gradients(value_loss)))
|
||||
policy_gradients, policy_variables = (
|
||||
zip(*self._optimizer.compute_gradients(policy_loss)))
|
||||
policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean,
|
||||
old_logstd, action, advantage, length)
|
||||
value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss)))
|
||||
policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss)))
|
||||
all_gradients = value_gradients + policy_gradients
|
||||
all_variables = value_variables + policy_variables
|
||||
optimize = self._optimizer.apply_gradients(
|
||||
zip(all_gradients, all_variables))
|
||||
optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables))
|
||||
summary = tf.summary.merge([
|
||||
value_summary, policy_summary,
|
||||
tf.summary.scalar(
|
||||
'value_gradient_norm', tf.global_norm(value_gradients)),
|
||||
tf.summary.scalar(
|
||||
'policy_gradient_norm', tf.global_norm(policy_gradients)),
|
||||
utility.gradient_summaries(
|
||||
zip(value_gradients, value_variables), dict(value=r'.*')),
|
||||
utility.gradient_summaries(
|
||||
zip(policy_gradients, policy_variables), dict(policy=r'.*'))])
|
||||
tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)),
|
||||
tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)),
|
||||
utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')),
|
||||
utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*'))
|
||||
])
|
||||
with tf.control_dependencies([optimize]):
|
||||
return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
|
||||
|
||||
@@ -385,18 +358,17 @@ class PPOAlgorithm(object):
|
||||
"""
|
||||
with tf.name_scope('value_loss'):
|
||||
value = self._network(observ, length).value
|
||||
return_ = utility.discounted_return(
|
||||
reward, length, self._config.discount)
|
||||
return_ = utility.discounted_return(reward, length, self._config.discount)
|
||||
advantage = return_ - value
|
||||
value_loss = 0.5 * self._mask(advantage ** 2, length)
|
||||
value_loss = 0.5 * self._mask(advantage**2, length)
|
||||
summary = tf.summary.merge([
|
||||
tf.summary.histogram('value_loss', value_loss),
|
||||
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))])
|
||||
tf.summary.scalar('avg_value_loss', tf.reduce_mean(value_loss))
|
||||
])
|
||||
value_loss = tf.reduce_mean(value_loss)
|
||||
return tf.check_numerics(value_loss, 'value_loss'), summary
|
||||
|
||||
def _policy_loss(
|
||||
self, mean, logstd, old_mean, old_logstd, action, advantage, length):
|
||||
def _policy_loss(self, mean, logstd, old_mean, old_logstd, action, advantage, length):
|
||||
"""Compute the policy loss composed of multiple components.
|
||||
|
||||
1. The policy gradient loss is importance sampled from the data-collecting
|
||||
@@ -420,24 +392,20 @@ class PPOAlgorithm(object):
|
||||
"""
|
||||
with tf.name_scope('policy_loss'):
|
||||
entropy = utility.diag_normal_entropy(mean, logstd)
|
||||
kl = tf.reduce_mean(self._mask(utility.diag_normal_kl(
|
||||
old_mean, old_logstd, mean, logstd), length), 1)
|
||||
kl = tf.reduce_mean(
|
||||
self._mask(utility.diag_normal_kl(old_mean, old_logstd, mean, logstd), length), 1)
|
||||
policy_gradient = tf.exp(
|
||||
utility.diag_normal_logpdf(mean, logstd, action) -
|
||||
utility.diag_normal_logpdf(old_mean, old_logstd, action))
|
||||
surrogate_loss = -tf.reduce_mean(self._mask(
|
||||
policy_gradient * tf.stop_gradient(advantage), length), 1)
|
||||
surrogate_loss = -tf.reduce_mean(
|
||||
self._mask(policy_gradient * tf.stop_gradient(advantage), length), 1)
|
||||
kl_penalty = self._penalty * kl
|
||||
cutoff_threshold = self._config.kl_target * self._config.kl_cutoff_factor
|
||||
cutoff_count = tf.reduce_sum(
|
||||
tf.cast(kl > cutoff_threshold, tf.int32))
|
||||
with tf.control_dependencies([tf.cond(
|
||||
cutoff_count > 0,
|
||||
lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
|
||||
kl_cutoff = (
|
||||
self._config.kl_cutoff_coef *
|
||||
tf.cast(kl > cutoff_threshold, tf.float32) *
|
||||
(kl - cutoff_threshold) ** 2)
|
||||
cutoff_count = tf.reduce_sum(tf.cast(kl > cutoff_threshold, tf.int32))
|
||||
with tf.control_dependencies(
|
||||
[tf.cond(cutoff_count > 0, lambda: tf.Print(0, [cutoff_count], 'kl cutoff! '), int)]):
|
||||
kl_cutoff = (self._config.kl_cutoff_coef * tf.cast(kl > cutoff_threshold, tf.float32) *
|
||||
(kl - cutoff_threshold)**2)
|
||||
policy_loss = surrogate_loss + kl_penalty + kl_cutoff
|
||||
summary = tf.summary.merge([
|
||||
tf.summary.histogram('entropy', entropy),
|
||||
@@ -449,7 +417,8 @@ class PPOAlgorithm(object):
|
||||
tf.summary.histogram('policy_loss', policy_loss),
|
||||
tf.summary.scalar('avg_surr_loss', tf.reduce_mean(surrogate_loss)),
|
||||
tf.summary.scalar('avg_kl_penalty', tf.reduce_mean(kl_penalty)),
|
||||
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))])
|
||||
tf.summary.scalar('avg_policy_loss', tf.reduce_mean(policy_loss))
|
||||
])
|
||||
policy_loss = tf.reduce_mean(policy_loss, 0)
|
||||
return tf.check_numerics(policy_loss, 'policy_loss'), summary
|
||||
|
||||
@@ -471,30 +440,30 @@ class PPOAlgorithm(object):
|
||||
"""
|
||||
with tf.name_scope('adjust_penalty'):
|
||||
network = self._network(observ, length)
|
||||
assert_change = tf.assert_equal(
|
||||
tf.reduce_all(tf.equal(network.mean, old_mean)), False,
|
||||
message='policy should change')
|
||||
assert_change = tf.assert_equal(tf.reduce_all(tf.equal(network.mean, old_mean)),
|
||||
False,
|
||||
message='policy should change')
|
||||
print_penalty = tf.Print(0, [self._penalty], 'current penalty: ')
|
||||
with tf.control_dependencies([assert_change, print_penalty]):
|
||||
kl_change = tf.reduce_mean(self._mask(utility.diag_normal_kl(
|
||||
old_mean, old_logstd, network.mean, network.logstd), length))
|
||||
kl_change = tf.reduce_mean(
|
||||
self._mask(utility.diag_normal_kl(old_mean, old_logstd, network.mean, network.logstd),
|
||||
length))
|
||||
kl_change = tf.Print(kl_change, [kl_change], 'kl change: ')
|
||||
maybe_increase = tf.cond(
|
||||
kl_change > 1.3 * self._config.kl_target,
|
||||
# pylint: disable=g-long-lambda
|
||||
lambda: tf.Print(self._penalty.assign(
|
||||
self._penalty * 1.5), [0], 'increase penalty '),
|
||||
lambda: tf.Print(self._penalty.assign(self._penalty * 1.5), [0], 'increase penalty '),
|
||||
float)
|
||||
maybe_decrease = tf.cond(
|
||||
kl_change < 0.7 * self._config.kl_target,
|
||||
# pylint: disable=g-long-lambda
|
||||
lambda: tf.Print(self._penalty.assign(
|
||||
self._penalty / 1.5), [0], 'decrease penalty '),
|
||||
lambda: tf.Print(self._penalty.assign(self._penalty / 1.5), [0], 'decrease penalty '),
|
||||
float)
|
||||
with tf.control_dependencies([maybe_increase, maybe_decrease]):
|
||||
return tf.summary.merge([
|
||||
tf.summary.scalar('kl_change', kl_change),
|
||||
tf.summary.scalar('penalty', self._penalty)])
|
||||
tf.summary.scalar('penalty', self._penalty)
|
||||
])
|
||||
|
||||
def _mask(self, tensor, length):
|
||||
"""Set padding elements of a batch of sequences to zero.
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Memory that stores episodes."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -43,10 +42,9 @@ class EpisodeMemory(object):
|
||||
self._scope = var_scope
|
||||
self._length = tf.Variable(tf.zeros(capacity, tf.int32), False)
|
||||
self._buffers = [
|
||||
tf.Variable(tf.zeros(
|
||||
[capacity, max_length] + elem.shape.as_list(),
|
||||
elem.dtype), False)
|
||||
for elem in template]
|
||||
tf.Variable(tf.zeros([capacity, max_length] + elem.shape.as_list(), elem.dtype), False)
|
||||
for elem in template
|
||||
]
|
||||
|
||||
def length(self, rows=None):
|
||||
"""Tensor holding the current length of episodes.
|
||||
@@ -72,13 +70,11 @@ class EpisodeMemory(object):
|
||||
"""
|
||||
rows = tf.range(self._capacity) if rows is None else rows
|
||||
assert rows.shape.ndims == 1
|
||||
assert_capacity = tf.assert_less(
|
||||
rows, self._capacity,
|
||||
message='capacity exceeded')
|
||||
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
|
||||
with tf.control_dependencies([assert_capacity]):
|
||||
assert_max_length = tf.assert_less(
|
||||
tf.gather(self._length, rows), self._max_length,
|
||||
message='max length exceeded')
|
||||
assert_max_length = tf.assert_less(tf.gather(self._length, rows),
|
||||
self._max_length,
|
||||
message='max length exceeded')
|
||||
append_ops = []
|
||||
with tf.control_dependencies([assert_max_length]):
|
||||
for buffer_, elements in zip(self._buffers, transitions):
|
||||
@@ -86,8 +82,7 @@ class EpisodeMemory(object):
|
||||
indices = tf.stack([rows, timestep], 1)
|
||||
append_ops.append(tf.scatter_nd_update(buffer_, indices, elements))
|
||||
with tf.control_dependencies(append_ops):
|
||||
episode_mask = tf.reduce_sum(tf.one_hot(
|
||||
rows, self._capacity, dtype=tf.int32), 0)
|
||||
episode_mask = tf.reduce_sum(tf.one_hot(rows, self._capacity, dtype=tf.int32), 0)
|
||||
return self._length.assign_add(episode_mask)
|
||||
|
||||
def replace(self, episodes, length, rows=None):
|
||||
@@ -103,11 +98,11 @@ class EpisodeMemory(object):
|
||||
"""
|
||||
rows = tf.range(self._capacity) if rows is None else rows
|
||||
assert rows.shape.ndims == 1
|
||||
assert_capacity = tf.assert_less(
|
||||
rows, self._capacity, message='capacity exceeded')
|
||||
assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded')
|
||||
with tf.control_dependencies([assert_capacity]):
|
||||
assert_max_length = tf.assert_less_equal(
|
||||
length, self._max_length, message='max length exceeded')
|
||||
assert_max_length = tf.assert_less_equal(length,
|
||||
self._max_length,
|
||||
message='max length exceeded')
|
||||
replace_ops = []
|
||||
with tf.control_dependencies([assert_max_length]):
|
||||
for buffer_, elements in zip(self._buffers, episodes):
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Normalize tensors based on streaming estimates of mean and variance."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -24,8 +23,7 @@ import tensorflow as tf
|
||||
class StreamingNormalize(object):
|
||||
"""Normalize tensors based on streaming estimates of mean and variance."""
|
||||
|
||||
def __init__(
|
||||
self, template, center=True, scale=True, clip=10, name='normalize'):
|
||||
def __init__(self, template, center=True, scale=True, clip=10, name='normalize'):
|
||||
"""Normalize tensors based on streaming estimates of mean and variance.
|
||||
|
||||
Centering the value, scaling it by the standard deviation, and clipping
|
||||
@@ -69,8 +67,7 @@ class StreamingNormalize(object):
|
||||
if self._scale:
|
||||
# We cannot scale before seeing at least two samples.
|
||||
value /= tf.cond(
|
||||
self._count > 1, lambda: self._std() + 1e-8,
|
||||
lambda: tf.ones_like(self._var_sum))[None]
|
||||
self._count > 1, lambda: self._std() + 1e-8, lambda: tf.ones_like(self._var_sum))[None]
|
||||
if self._clip:
|
||||
value = tf.clip_by_value(value, -self._clip, self._clip)
|
||||
# Remove batch dimension if necessary.
|
||||
@@ -97,8 +94,7 @@ class StreamingNormalize(object):
|
||||
mean_delta = tf.reduce_sum(value - self._mean[None, ...], 0)
|
||||
new_mean = self._mean + mean_delta / step
|
||||
new_mean = tf.cond(self._count > 1, lambda: new_mean, lambda: value[0])
|
||||
var_delta = (
|
||||
value - self._mean[None, ...]) * (value - new_mean[None, ...])
|
||||
var_delta = (value - self._mean[None, ...]) * (value - new_mean[None, ...])
|
||||
new_var_sum = self._var_sum + tf.reduce_sum(var_delta, 0)
|
||||
with tf.control_dependencies([new_mean, new_var_sum]):
|
||||
update = self._mean.assign(new_mean), self._var_sum.assign(new_var_sum)
|
||||
@@ -116,10 +112,8 @@ class StreamingNormalize(object):
|
||||
Operation.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/reset'):
|
||||
return tf.group(
|
||||
self._count.assign(0),
|
||||
self._mean.assign(tf.zeros_like(self._mean)),
|
||||
self._var_sum.assign(tf.zeros_like(self._var_sum)))
|
||||
return tf.group(self._count.assign(0), self._mean.assign(tf.zeros_like(self._mean)),
|
||||
self._var_sum.assign(tf.zeros_like(self._var_sum)))
|
||||
|
||||
def summary(self):
|
||||
"""Summary string of mean and standard deviation.
|
||||
@@ -128,10 +122,8 @@ class StreamingNormalize(object):
|
||||
Summary tensor.
|
||||
"""
|
||||
with tf.name_scope(self._name + '/summary'):
|
||||
mean_summary = tf.cond(
|
||||
self._count > 0, lambda: self._summary('mean', self._mean), str)
|
||||
std_summary = tf.cond(
|
||||
self._count > 1, lambda: self._summary('stddev', self._std()), str)
|
||||
mean_summary = tf.cond(self._count > 0, lambda: self._summary('mean', self._mean), str)
|
||||
std_summary = tf.cond(self._count > 1, lambda: self._summary('stddev', self._std()), str)
|
||||
return tf.summary.merge([mean_summary, std_summary])
|
||||
|
||||
def _std(self):
|
||||
@@ -143,10 +135,8 @@ class StreamingNormalize(object):
|
||||
Returns:
|
||||
Tensor of current variance.
|
||||
"""
|
||||
variance = tf.cond(
|
||||
self._count > 1,
|
||||
lambda: self._var_sum / tf.cast(self._count - 1, tf.float32),
|
||||
lambda: tf.ones_like(self._var_sum) * float('nan'))
|
||||
variance = tf.cond(self._count > 1, lambda: self._var_sum / tf.cast(
|
||||
self._count - 1, tf.float32), lambda: tf.ones_like(self._var_sum) * float('nan'))
|
||||
# The epsilon corrects for small negative variance values caused by
|
||||
# the algorithm. It was empirically chosen to work with all environments
|
||||
# tested.
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Utilities for the PPO algorithm."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -37,8 +36,7 @@ def reinit_nested_vars(variables, indices=None):
|
||||
Operation.
|
||||
"""
|
||||
if isinstance(variables, (tuple, list)):
|
||||
return tf.group(*[
|
||||
reinit_nested_vars(variable, indices) for variable in variables])
|
||||
return tf.group(*[reinit_nested_vars(variable, indices) for variable in variables])
|
||||
if indices is None:
|
||||
return variables.assign(tf.zeros_like(variables))
|
||||
else:
|
||||
@@ -58,9 +56,8 @@ def assign_nested_vars(variables, tensors, indices=None):
|
||||
Operation.
|
||||
"""
|
||||
if isinstance(variables, (tuple, list)):
|
||||
return tf.group(*[
|
||||
assign_nested_vars(variable, tensor)
|
||||
for variable, tensor in zip(variables, tensors)])
|
||||
return tf.group(
|
||||
*[assign_nested_vars(variable, tensor) for variable, tensor in zip(variables, tensors)])
|
||||
if indices is None:
|
||||
return variables.assign(tensors)
|
||||
else:
|
||||
@@ -71,10 +68,11 @@ def discounted_return(reward, length, discount):
|
||||
"""Discounted Monte-Carlo returns."""
|
||||
timestep = tf.range(reward.shape[1].value)
|
||||
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
|
||||
return_ = tf.reverse(tf.transpose(tf.scan(
|
||||
lambda agg, cur: cur + discount * agg,
|
||||
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
|
||||
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
|
||||
return_ = tf.reverse(
|
||||
tf.transpose(
|
||||
tf.scan(lambda agg, cur: cur + discount * agg,
|
||||
tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
|
||||
tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
|
||||
return tf.check_numerics(tf.stop_gradient(return_), 'return')
|
||||
|
||||
|
||||
@@ -85,9 +83,8 @@ def fixed_step_return(reward, value, length, discount, window):
|
||||
return_ = tf.zeros_like(reward)
|
||||
for _ in range(window):
|
||||
return_ += reward
|
||||
reward = discount * tf.concat(
|
||||
[reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
|
||||
return_ += discount ** window * tf.concat(
|
||||
reward = discount * tf.concat([reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
|
||||
return_ += discount**window * tf.concat(
|
||||
[value[:, window:], tf.zeros_like(value[:, -window:]), 1])
|
||||
return tf.check_numerics(tf.stop_gradient(mask * return_), 'return')
|
||||
|
||||
@@ -99,10 +96,11 @@ def lambda_return(reward, value, length, discount, lambda_):
|
||||
sequence = mask * reward + discount * value * (1 - lambda_)
|
||||
discount = mask * discount * lambda_
|
||||
sequence = tf.stack([sequence, discount], 2)
|
||||
return_ = tf.reverse(tf.transpose(tf.scan(
|
||||
lambda agg, cur: cur[0] + cur[1] * agg,
|
||||
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]),
|
||||
tf.zeros_like(value[:, -1]), 1, False), [1, 0]), [1])
|
||||
return_ = tf.reverse(
|
||||
tf.transpose(
|
||||
tf.scan(lambda agg, cur: cur[0] + cur[1] * agg,
|
||||
tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]), tf.zeros_like(value[:, -1]),
|
||||
1, False), [1, 0]), [1])
|
||||
return tf.check_numerics(tf.stop_gradient(return_), 'return')
|
||||
|
||||
|
||||
@@ -112,27 +110,26 @@ def lambda_advantage(reward, value, length, discount):
|
||||
mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
|
||||
next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
|
||||
delta = reward + discount * next_value - value
|
||||
advantage = tf.reverse(tf.transpose(tf.scan(
|
||||
lambda agg, cur: cur + discount * agg,
|
||||
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]),
|
||||
tf.zeros_like(delta[:, -1]), 1, False), [1, 0]), [1])
|
||||
advantage = tf.reverse(
|
||||
tf.transpose(
|
||||
tf.scan(lambda agg, cur: cur + discount * agg,
|
||||
tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]), tf.zeros_like(delta[:, -1]),
|
||||
1, False), [1, 0]), [1])
|
||||
return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')
|
||||
|
||||
|
||||
def diag_normal_kl(mean0, logstd0, mean1, logstd1):
|
||||
"""Epirical KL divergence of two normals with diagonal covariance."""
|
||||
logstd0_2, logstd1_2 = 2 * logstd0, 2 * logstd1
|
||||
return 0.5 * (
|
||||
tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) +
|
||||
tf.reduce_sum((mean1 - mean0) ** 2 / tf.exp(logstd1_2), -1) +
|
||||
tf.reduce_sum(logstd1_2, -1) - tf.reduce_sum(logstd0_2, -1) -
|
||||
mean0.shape[-1].value)
|
||||
return 0.5 * (tf.reduce_sum(tf.exp(logstd0_2 - logstd1_2), -1) + tf.reduce_sum(
|
||||
(mean1 - mean0)**2 / tf.exp(logstd1_2), -1) + tf.reduce_sum(logstd1_2, -1) -
|
||||
tf.reduce_sum(logstd0_2, -1) - mean0.shape[-1].value)
|
||||
|
||||
|
||||
def diag_normal_logpdf(mean, logstd, loc):
|
||||
"""Log density of a normal with diagonal covariance."""
|
||||
constant = -0.5 * math.log(2 * math.pi) - logstd
|
||||
value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2
|
||||
value = -0.5 * ((loc - mean) / tf.exp(logstd))**2
|
||||
return tf.reduce_sum(constant + value, -1)
|
||||
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tools for reinforcement learning."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Wrap a dictionary to access keys as attributes."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Combine multiple environments to step them in batch."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -83,13 +82,9 @@ class BatchEnv(object):
|
||||
message = 'Invalid action at index {}: {}'
|
||||
raise ValueError(message.format(index, action))
|
||||
if self._blocking:
|
||||
transitions = [
|
||||
env.step(action)
|
||||
for env, action in zip(self._envs, actions)]
|
||||
transitions = [env.step(action) for env, action in zip(self._envs, actions)]
|
||||
else:
|
||||
transitions = [
|
||||
env.step(action, blocking=False)
|
||||
for env, action in zip(self._envs, actions)]
|
||||
transitions = [env.step(action, blocking=False) for env, action in zip(self._envs, actions)]
|
||||
transitions = [transition() for transition in transitions]
|
||||
observs, rewards, dones, infos = zip(*transitions)
|
||||
observ = np.stack(observs)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Count learnable parameters."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Batch of environments inside the TensorFlow graph."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -42,18 +41,18 @@ class InGraphBatchEnv(object):
|
||||
action_shape = self._parse_shape(self._batch_env.action_space)
|
||||
action_dtype = self._parse_dtype(self._batch_env.action_space)
|
||||
with tf.variable_scope('env_temporary'):
|
||||
self._observ = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
|
||||
name='observ', trainable=False)
|
||||
self._action = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
|
||||
name='action', trainable=False)
|
||||
self._reward = tf.Variable(
|
||||
tf.zeros((len(self._batch_env),), tf.float32),
|
||||
name='reward', trainable=False)
|
||||
self._done = tf.Variable(
|
||||
tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
|
||||
name='done', trainable=False)
|
||||
self._observ = tf.Variable(tf.zeros((len(self._batch_env),) + observ_shape, observ_dtype),
|
||||
name='observ',
|
||||
trainable=False)
|
||||
self._action = tf.Variable(tf.zeros((len(self._batch_env),) + action_shape, action_dtype),
|
||||
name='action',
|
||||
trainable=False)
|
||||
self._reward = tf.Variable(tf.zeros((len(self._batch_env),), tf.float32),
|
||||
name='reward',
|
||||
trainable=False)
|
||||
self._done = tf.Variable(tf.cast(tf.ones((len(self._batch_env),)), tf.bool),
|
||||
name='done',
|
||||
trainable=False)
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Forward unimplemented attributes to one of the original environments.
|
||||
@@ -89,16 +88,13 @@ class InGraphBatchEnv(object):
|
||||
if action.dtype in (tf.float16, tf.float32, tf.float64):
|
||||
action = tf.check_numerics(action, 'action')
|
||||
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
|
||||
observ, reward, done = tf.py_func(
|
||||
lambda a: self._batch_env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool], name='step')
|
||||
observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool],
|
||||
name='step')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.check_numerics(reward, 'reward')
|
||||
return tf.group(
|
||||
self._observ.assign(observ),
|
||||
self._action.assign(action),
|
||||
self._reward.assign(reward),
|
||||
self._done.assign(done))
|
||||
return tf.group(self._observ.assign(observ), self._action.assign(action),
|
||||
self._reward.assign(reward), self._done.assign(done))
|
||||
|
||||
def reset(self, indices=None):
|
||||
"""Reset the batch of environments.
|
||||
@@ -112,15 +108,15 @@ class InGraphBatchEnv(object):
|
||||
if indices is None:
|
||||
indices = tf.range(len(self._batch_env))
|
||||
observ_dtype = self._parse_dtype(self._batch_env.observation_space)
|
||||
observ = tf.py_func(
|
||||
self._batch_env.reset, [indices], observ_dtype, name='reset')
|
||||
observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.zeros_like(indices, tf.float32)
|
||||
done = tf.zeros_like(indices, tf.bool)
|
||||
with tf.control_dependencies([
|
||||
tf.scatter_update(self._observ, indices, observ),
|
||||
tf.scatter_update(self._reward, indices, reward),
|
||||
tf.scatter_update(self._done, indices, done)]):
|
||||
tf.scatter_update(self._done, indices, done)
|
||||
]):
|
||||
return tf.identity(observ)
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Put an OpenAI Gym environment into the TensorFlow graph."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -42,16 +41,15 @@ class InGraphEnv(object):
|
||||
action_shape = self._parse_shape(self._env.action_space)
|
||||
action_dtype = self._parse_dtype(self._env.action_space)
|
||||
with tf.name_scope('environment'):
|
||||
self._observ = tf.Variable(
|
||||
tf.zeros(observ_shape, observ_dtype), name='observ', trainable=False)
|
||||
self._action = tf.Variable(
|
||||
tf.zeros(action_shape, action_dtype), name='action', trainable=False)
|
||||
self._reward = tf.Variable(
|
||||
0.0, dtype=tf.float32, name='reward', trainable=False)
|
||||
self._done = tf.Variable(
|
||||
True, dtype=tf.bool, name='done', trainable=False)
|
||||
self._step = tf.Variable(
|
||||
0, dtype=tf.int32, name='step', trainable=False)
|
||||
self._observ = tf.Variable(tf.zeros(observ_shape, observ_dtype),
|
||||
name='observ',
|
||||
trainable=False)
|
||||
self._action = tf.Variable(tf.zeros(action_shape, action_dtype),
|
||||
name='action',
|
||||
trainable=False)
|
||||
self._reward = tf.Variable(0.0, dtype=tf.float32, name='reward', trainable=False)
|
||||
self._done = tf.Variable(True, dtype=tf.bool, name='done', trainable=False)
|
||||
self._step = tf.Variable(0, dtype=tf.int32, name='step', trainable=False)
|
||||
|
||||
def __getattr__(self, name):
|
||||
"""Forward unimplemented attributes to the original environment.
|
||||
@@ -79,17 +77,14 @@ class InGraphEnv(object):
|
||||
if action.dtype in (tf.float16, tf.float32, tf.float64):
|
||||
action = tf.check_numerics(action, 'action')
|
||||
observ_dtype = self._parse_dtype(self._env.observation_space)
|
||||
observ, reward, done = tf.py_func(
|
||||
lambda a: self._env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool], name='step')
|
||||
observ, reward, done = tf.py_func(lambda a: self._env.step(a)[:3], [action],
|
||||
[observ_dtype, tf.float32, tf.bool],
|
||||
name='step')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
reward = tf.check_numerics(reward, 'reward')
|
||||
return tf.group(
|
||||
self._observ.assign(observ),
|
||||
self._action.assign(action),
|
||||
self._reward.assign(reward),
|
||||
self._done.assign(done),
|
||||
self._step.assign_add(1))
|
||||
return tf.group(self._observ.assign(observ), self._action.assign(action),
|
||||
self._reward.assign(reward), self._done.assign(done),
|
||||
self._step.assign_add(1))
|
||||
|
||||
def reset(self):
|
||||
"""Reset the environment.
|
||||
@@ -100,10 +95,10 @@ class InGraphEnv(object):
|
||||
observ_dtype = self._parse_dtype(self._env.observation_space)
|
||||
observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
|
||||
observ = tf.check_numerics(observ, 'observ')
|
||||
with tf.control_dependencies([
|
||||
self._observ.assign(observ),
|
||||
self._reward.assign(0),
|
||||
self._done.assign(False)]):
|
||||
with tf.control_dependencies(
|
||||
[self._observ.assign(observ),
|
||||
self._reward.assign(0),
|
||||
self._done.assign(False)]):
|
||||
return tf.identity(observ)
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Execute operations in a loop and coordinate logging and checkpoints."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -25,10 +24,8 @@ import tensorflow as tf
|
||||
|
||||
from . import streaming_mean
|
||||
|
||||
|
||||
_Phase = collections.namedtuple(
|
||||
'Phase',
|
||||
'name, writer, op, batch, steps, feed, report_every, log_every,'
|
||||
'Phase', 'name, writer, op, batch, steps, feed, report_every, log_every,'
|
||||
'checkpoint_every')
|
||||
|
||||
|
||||
@@ -56,16 +53,22 @@ class Loop(object):
|
||||
reset: Tensor indicating to the model to start a new computation.
|
||||
"""
|
||||
self._logdir = logdir
|
||||
self._step = (
|
||||
tf.Variable(0, False, name='global_step') if step is None else step)
|
||||
self._step = (tf.Variable(0, False, name='global_step') if step is None else step)
|
||||
self._log = tf.placeholder(tf.bool) if log is None else log
|
||||
self._report = tf.placeholder(tf.bool) if report is None else report
|
||||
self._reset = tf.placeholder(tf.bool) if reset is None else reset
|
||||
self._phases = []
|
||||
|
||||
def add_phase(
|
||||
self, name, done, score, summary, steps,
|
||||
report_every=None, log_every=None, checkpoint_every=None, feed=None):
|
||||
def add_phase(self,
|
||||
name,
|
||||
done,
|
||||
score,
|
||||
summary,
|
||||
steps,
|
||||
report_every=None,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed=None):
|
||||
"""Add a phase to the loop protocol.
|
||||
|
||||
If the model breaks long computation into multiple steps, the done tensor
|
||||
@@ -97,13 +100,12 @@ class Loop(object):
|
||||
if done.shape.ndims is None or score.shape.ndims is None:
|
||||
raise ValueError("Rank of 'done' and 'score' tensors must be known.")
|
||||
writer = self._logdir and tf.summary.FileWriter(
|
||||
os.path.join(self._logdir, name), tf.get_default_graph(),
|
||||
flush_secs=60)
|
||||
os.path.join(self._logdir, name), tf.get_default_graph(), flush_secs=60)
|
||||
op = self._define_step(done, score, summary)
|
||||
batch = 1 if score.shape.ndims == 0 else score.shape[0].value
|
||||
self._phases.append(_Phase(
|
||||
name, writer, op, batch, int(steps), feed, report_every,
|
||||
log_every, checkpoint_every))
|
||||
self._phases.append(
|
||||
_Phase(name, writer, op, batch, int(steps), feed, report_every, log_every,
|
||||
checkpoint_every))
|
||||
|
||||
def run(self, sess, saver, max_step=None):
|
||||
"""Run the loop schedule for a specified number of steps.
|
||||
@@ -133,13 +135,11 @@ class Loop(object):
|
||||
tf.logging.info(message.format(phase.name, phase_step, global_step))
|
||||
# Populate book keeping tensors.
|
||||
phase.feed[self._reset] = (steps_in < steps_made)
|
||||
phase.feed[self._log] = (
|
||||
phase.writer and
|
||||
self._is_every_steps(phase_step, phase.batch, phase.log_every))
|
||||
phase.feed[self._report] = (
|
||||
self._is_every_steps(phase_step, phase.batch, phase.report_every))
|
||||
summary, mean_score, global_step, steps_made = sess.run(
|
||||
phase.op, phase.feed)
|
||||
phase.feed[self._log] = (phase.writer and
|
||||
self._is_every_steps(phase_step, phase.batch, phase.log_every))
|
||||
phase.feed[self._report] = (self._is_every_steps(phase_step, phase.batch,
|
||||
phase.report_every))
|
||||
summary, mean_score, global_step, steps_made = sess.run(phase.op, phase.feed)
|
||||
if self._is_every_steps(phase_step, phase.batch, phase.checkpoint_every):
|
||||
self._store_checkpoint(sess, saver, global_step)
|
||||
if self._is_every_steps(phase_step, phase.batch, phase.report_every):
|
||||
@@ -207,8 +207,7 @@ class Loop(object):
|
||||
score_mean = streaming_mean.StreamingMean((), tf.float32)
|
||||
with tf.control_dependencies([done, score, summary]):
|
||||
done_score = tf.gather(score, tf.where(done)[:, 0])
|
||||
submit_score = tf.cond(
|
||||
tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
|
||||
submit_score = tf.cond(tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
|
||||
with tf.control_dependencies([submit_score]):
|
||||
mean_score = tf.cond(self._report, score_mean.clear, float)
|
||||
steps_made = tf.shape(score)[0]
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Mock algorithm for testing reinforcement learning code."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Mock environment for testing reinforcement learning code."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""In-graph simulation step of a vectorized algorithm with environments."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -55,7 +54,8 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
reset_ops = [
|
||||
batch_env.reset(agent_indices),
|
||||
tf.scatter_update(score, agent_indices, zero_scores),
|
||||
tf.scatter_update(length, agent_indices, zero_durations)]
|
||||
tf.scatter_update(length, agent_indices, zero_durations)
|
||||
]
|
||||
with tf.control_dependencies(reset_ops):
|
||||
return algo.begin_episode(agent_indices)
|
||||
|
||||
@@ -78,9 +78,8 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
inc_length = length.assign_add(tf.ones(len(batch_env), tf.int32))
|
||||
with tf.control_dependencies([add_score, inc_length]):
|
||||
agent_indices = tf.range(len(batch_env))
|
||||
experience_summary = algo.experience(
|
||||
agent_indices, prevob, batch_env.action, batch_env.reward,
|
||||
batch_env.done, batch_env.observ)
|
||||
experience_summary = algo.experience(agent_indices, prevob, batch_env.action,
|
||||
batch_env.reward, batch_env.done, batch_env.observ)
|
||||
return tf.summary.merge([step_summary, experience_summary])
|
||||
|
||||
def _define_end_episode(agent_indices):
|
||||
@@ -96,8 +95,7 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
"""
|
||||
assert agent_indices.shape.ndims == 1
|
||||
submit_score = mean_score.submit(tf.gather(score, agent_indices))
|
||||
submit_length = mean_length.submit(
|
||||
tf.cast(tf.gather(length, agent_indices), tf.float32))
|
||||
submit_length = mean_length.submit(tf.cast(tf.gather(length, agent_indices), tf.float32))
|
||||
with tf.control_dependencies([submit_score, submit_length]):
|
||||
return algo.end_episode(agent_indices)
|
||||
|
||||
@@ -107,41 +105,34 @@ def simulate(batch_env, algo, log=True, reset=False):
|
||||
Returns:
|
||||
Summary string.
|
||||
"""
|
||||
score_summary = tf.cond(
|
||||
tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
|
||||
lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
|
||||
length_summary = tf.cond(
|
||||
tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
|
||||
lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
|
||||
score_summary = tf.cond(tf.logical_and(log, tf.cast(
|
||||
mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()),
|
||||
str)
|
||||
length_summary = tf.cond(tf.logical_and(
|
||||
log, tf.cast(mean_length.count,
|
||||
tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
|
||||
return tf.summary.merge([score_summary, length_summary])
|
||||
|
||||
with tf.name_scope('simulate'):
|
||||
log = tf.convert_to_tensor(log)
|
||||
reset = tf.convert_to_tensor(reset)
|
||||
with tf.variable_scope('simulate_temporary'):
|
||||
score = tf.Variable(
|
||||
tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
|
||||
length = tf.Variable(
|
||||
tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
|
||||
score = tf.Variable(tf.zeros(len(batch_env), dtype=tf.float32), False, name='score')
|
||||
length = tf.Variable(tf.zeros(len(batch_env), dtype=tf.int32), False, name='length')
|
||||
mean_score = streaming_mean.StreamingMean((), tf.float32)
|
||||
mean_length = streaming_mean.StreamingMean((), tf.float32)
|
||||
agent_indices = tf.cond(
|
||||
reset,
|
||||
lambda: tf.range(len(batch_env)),
|
||||
lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
|
||||
begin_episode = tf.cond(
|
||||
tf.cast(tf.shape(agent_indices)[0], tf.bool),
|
||||
lambda: _define_begin_episode(agent_indices), str)
|
||||
agent_indices = tf.cond(reset, lambda: tf.range(len(batch_env)), lambda: tf.cast(
|
||||
tf.where(batch_env.done)[:, 0], tf.int32))
|
||||
begin_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
|
||||
tf.bool), lambda: _define_begin_episode(agent_indices), str)
|
||||
with tf.control_dependencies([begin_episode]):
|
||||
step = _define_step()
|
||||
with tf.control_dependencies([step]):
|
||||
agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
|
||||
end_episode = tf.cond(
|
||||
tf.cast(tf.shape(agent_indices)[0], tf.bool),
|
||||
lambda: _define_end_episode(agent_indices), str)
|
||||
end_episode = tf.cond(tf.cast(tf.shape(agent_indices)[0],
|
||||
tf.bool), lambda: _define_end_episode(agent_indices), str)
|
||||
with tf.control_dependencies([end_episode]):
|
||||
summary = tf.summary.merge([
|
||||
_define_summaries(), begin_episode, step, end_episode])
|
||||
summary = tf.summary.merge([_define_summaries(), begin_episode, step, end_episode])
|
||||
with tf.control_dependencies([summary]):
|
||||
done, score = tf.identity(batch_env.done), tf.identity(score)
|
||||
return done, score, summary
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Compute a streaming estimation of the mean of submitted tensors."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -53,9 +52,8 @@ class StreamingMean(object):
|
||||
# Add a batch dimension if necessary.
|
||||
if value.shape.ndims == self._sum.shape.ndims:
|
||||
value = value[None, ...]
|
||||
return tf.group(
|
||||
self._sum.assign_add(tf.reduce_sum(value, 0)),
|
||||
self._count.assign_add(tf.shape(value)[0]))
|
||||
return tf.group(self._sum.assign_add(tf.reduce_sum(value, 0)),
|
||||
self._count.assign_add(tf.shape(value)[0]))
|
||||
|
||||
def clear(self):
|
||||
"""Return the mean estimate and reset the streaming statistics."""
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Wrappers for OpenAI Gym environments."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -149,8 +148,7 @@ class FrameHistory(object):
|
||||
return self._select_frames()
|
||||
|
||||
def _select_frames(self):
|
||||
indices = [
|
||||
(self._step - index) % self._capacity for index in self._past_indices]
|
||||
indices = [(self._step - index) % self._capacity for index in self._past_indices]
|
||||
observ = self._buffer[indices]
|
||||
if self._flatten:
|
||||
observ = np.reshape(observ, (-1,) + observ.shape[2:])
|
||||
@@ -191,14 +189,14 @@ class RangeNormalize(object):
|
||||
|
||||
def __init__(self, env, observ=None, action=None):
|
||||
self._env = env
|
||||
self._should_normalize_observ = (
|
||||
observ is not False and self._is_finite(self._env.observation_space))
|
||||
self._should_normalize_observ = (observ is not False and
|
||||
self._is_finite(self._env.observation_space))
|
||||
if observ is True and not self._should_normalize_observ:
|
||||
raise ValueError('Cannot normalize infinite observation range.')
|
||||
if observ is None and not self._should_normalize_observ:
|
||||
tf.logging.info('Not normalizing infinite observation range.')
|
||||
self._should_normalize_action = (
|
||||
action is not False and self._is_finite(self._env.action_space))
|
||||
self._should_normalize_action = (action is not False and
|
||||
self._is_finite(self._env.action_space))
|
||||
if action is True and not self._should_normalize_action:
|
||||
raise ValueError('Cannot normalize infinite action range.')
|
||||
if action is None and not self._should_normalize_action:
|
||||
@@ -323,8 +321,7 @@ class ExternalProcess(object):
|
||||
action_space: The cached action space of the environment.
|
||||
"""
|
||||
self._conn, conn = multiprocessing.Pipe()
|
||||
self._process = multiprocessing.Process(
|
||||
target=self._worker, args=(constructor, conn))
|
||||
self._process = multiprocessing.Process(target=self._worker, args=(constructor, conn))
|
||||
atexit.register(self.close)
|
||||
self._process.start()
|
||||
self._observ_space = None
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
r"""Script to train a batch reinforcement learning algorithm.
|
||||
|
||||
Command line:
|
||||
@@ -67,21 +66,25 @@ def _define_loop(graph, logdir, train_steps, eval_steps):
|
||||
Returns:
|
||||
Loop object.
|
||||
"""
|
||||
loop = tools.Loop(
|
||||
logdir, graph.step, graph.should_log, graph.do_report,
|
||||
graph.force_reset)
|
||||
loop.add_phase(
|
||||
'train', graph.done, graph.score, graph.summary, train_steps,
|
||||
report_every=train_steps,
|
||||
log_every=train_steps // 2,
|
||||
checkpoint_every=None,
|
||||
feed={graph.is_training: True})
|
||||
loop.add_phase(
|
||||
'eval', graph.done, graph.score, graph.summary, eval_steps,
|
||||
report_every=eval_steps,
|
||||
log_every=eval_steps // 2,
|
||||
checkpoint_every=10 * eval_steps,
|
||||
feed={graph.is_training: False})
|
||||
loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report, graph.force_reset)
|
||||
loop.add_phase('train',
|
||||
graph.done,
|
||||
graph.score,
|
||||
graph.summary,
|
||||
train_steps,
|
||||
report_every=train_steps,
|
||||
log_every=train_steps // 2,
|
||||
checkpoint_every=None,
|
||||
feed={graph.is_training: True})
|
||||
loop.add_phase('eval',
|
||||
graph.done,
|
||||
graph.score,
|
||||
graph.summary,
|
||||
eval_steps,
|
||||
report_every=eval_steps,
|
||||
log_every=eval_steps // 2,
|
||||
checkpoint_every=10 * eval_steps,
|
||||
feed={graph.is_training: False})
|
||||
return loop
|
||||
|
||||
|
||||
@@ -102,18 +105,13 @@ def train(config, env_processes):
|
||||
if config.update_every % config.num_agents:
|
||||
tf.logging.warn('Number of agents should divide episodes per update.')
|
||||
with tf.device('/cpu:0'):
|
||||
batch_env = utility.define_batch_env(
|
||||
lambda: _create_environment(config),
|
||||
config.num_agents, env_processes)
|
||||
graph = utility.define_simulation_graph(
|
||||
batch_env, config.algorithm, config)
|
||||
loop = _define_loop(
|
||||
graph, config.logdir,
|
||||
config.update_every * config.max_length,
|
||||
config.eval_episodes * config.max_length)
|
||||
total_steps = int(
|
||||
config.steps / config.update_every *
|
||||
(config.update_every + config.eval_episodes))
|
||||
batch_env = utility.define_batch_env(lambda: _create_environment(config), config.num_agents,
|
||||
env_processes)
|
||||
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
|
||||
loop = _define_loop(graph, config.logdir, config.update_every * config.max_length,
|
||||
config.eval_episodes * config.max_length)
|
||||
total_steps = int(config.steps / config.update_every *
|
||||
(config.update_every + config.eval_episodes))
|
||||
# Exclude episode related variables since the Python state of environments is
|
||||
# not checkpointed and thus new episodes start after resuming.
|
||||
saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
|
||||
@@ -131,8 +129,8 @@ def main(_):
|
||||
utility.set_up_logging()
|
||||
if not FLAGS.config:
|
||||
raise KeyError('You must specify a configuration.')
|
||||
logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
|
||||
FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
|
||||
logdir = FLAGS.logdir and os.path.expanduser(
|
||||
os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
|
||||
try:
|
||||
config = utility.load_config(logdir)
|
||||
except IOError:
|
||||
@@ -144,16 +142,11 @@ def main(_):
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
tf.app.flags.DEFINE_string(
|
||||
'logdir', None,
|
||||
'Base directory to store logs.')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
|
||||
'Sub directory to store logs.')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'config', None,
|
||||
'Configuration to execute.')
|
||||
tf.app.flags.DEFINE_boolean(
|
||||
'env_processes', True,
|
||||
'Step environments in separate processes to circumvent the GIL.')
|
||||
tf.app.flags.DEFINE_string('logdir', None, 'Base directory to store logs.')
|
||||
tf.app.flags.DEFINE_string('timestamp',
|
||||
datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
|
||||
'Sub directory to store logs.')
|
||||
tf.app.flags.DEFINE_string('config', None, 'Configuration to execute.')
|
||||
tf.app.flags.DEFINE_boolean('env_processes', True,
|
||||
'Step environments in separate processes to circumvent the GIL.')
|
||||
tf.app.run()
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Utilities for using reinforcement learning algorithms."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
@@ -46,8 +45,7 @@ def define_simulation_graph(batch_env, algo_cls, config):
|
||||
do_report = tf.placeholder(tf.bool, name='do_report')
|
||||
force_reset = tf.placeholder(tf.bool, name='force_reset')
|
||||
algo = algo_cls(batch_env, step, is_training, should_log, config)
|
||||
done, score, summary = tools.simulate(
|
||||
batch_env, algo, should_log, force_reset)
|
||||
done, score, summary = tools.simulate(batch_env, algo, should_log, force_reset)
|
||||
message = 'Graph contains {} trainable variables.'
|
||||
tf.logging.info(message.format(tools.count_weights()))
|
||||
# pylint: enable=unused-variable
|
||||
@@ -67,9 +65,7 @@ def define_batch_env(constructor, num_agents, env_processes):
|
||||
"""
|
||||
with tf.variable_scope('environments'):
|
||||
if env_processes:
|
||||
envs = [
|
||||
tools.wrappers.ExternalProcess(constructor)
|
||||
for _ in range(num_agents)]
|
||||
envs = [tools.wrappers.ExternalProcess(constructor) for _ in range(num_agents)]
|
||||
else:
|
||||
envs = [constructor() for _ in range(num_agents)]
|
||||
batch_env = tools.BatchEnv(envs, blocking=not env_processes)
|
||||
@@ -111,9 +107,7 @@ def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
|
||||
ValueError: If resume expected but no log directory specified.
|
||||
RuntimeError: If no resume expected but a checkpoint was found.
|
||||
"""
|
||||
sess.run(tf.group(
|
||||
tf.local_variables_initializer(),
|
||||
tf.global_variables_initializer()))
|
||||
sess.run(tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()))
|
||||
if resume and not (logdir or checkpoint):
|
||||
raise ValueError('Need to specify logdir to resume a checkpoint.')
|
||||
if logdir:
|
||||
@@ -152,9 +146,8 @@ def save_config(config, logdir=None):
|
||||
with tf.gfile.GFile(config_path, 'w') as file_:
|
||||
yaml.dump(config, file_, default_flow_style=False)
|
||||
else:
|
||||
message = (
|
||||
'Start a new run without storing summaries and checkpoints since no '
|
||||
'logging directory was specified.')
|
||||
message = ('Start a new run without storing summaries and checkpoints since no '
|
||||
'logging directory was specified.')
|
||||
tf.logging.info(message)
|
||||
return config
|
||||
|
||||
@@ -173,9 +166,8 @@ def load_config(logdir):
|
||||
"""
|
||||
config_path = logdir and os.path.join(logdir, 'config.yaml')
|
||||
if not config_path or not tf.gfile.Exists(config_path):
|
||||
message = (
|
||||
'Cannot resume an existing run since the logging directory does not '
|
||||
'contain a configuration file.')
|
||||
message = ('Cannot resume an existing run since the logging directory does not '
|
||||
'contain a configuration file.')
|
||||
raise IOError(message)
|
||||
with tf.gfile.FastGFile(config_path, 'r') as file_:
|
||||
config = yaml.load(file_, Loader=yaml.Loader)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
r"""Script to render videos of the Proximal Policy Gradient algorithm.
|
||||
|
||||
Command line:
|
||||
@@ -53,8 +52,7 @@ def _create_environment(config, outdir):
|
||||
setattr(env, 'spec', getattr(env, 'spec', None))
|
||||
if config.max_length:
|
||||
env = tools.wrappers.LimitDuration(env, config.max_length)
|
||||
env = gym.wrappers.Monitor(
|
||||
env, outdir, lambda unused_episode_number: True)
|
||||
env = gym.wrappers.Monitor(env, outdir, lambda unused_episode_number: True)
|
||||
env = tools.wrappers.RangeNormalize(env)
|
||||
env = tools.wrappers.ClipAction(env)
|
||||
env = tools.wrappers.ConvertTo32Bit(env)
|
||||
@@ -71,20 +69,20 @@ def _define_loop(graph, eval_steps):
|
||||
Returns:
|
||||
Loop object.
|
||||
"""
|
||||
loop = tools.Loop(
|
||||
None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
|
||||
loop.add_phase(
|
||||
'eval', graph.done, graph.score, graph.summary, eval_steps,
|
||||
report_every=eval_steps,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed={graph.is_training: False})
|
||||
loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
|
||||
loop.add_phase('eval',
|
||||
graph.done,
|
||||
graph.score,
|
||||
graph.summary,
|
||||
eval_steps,
|
||||
report_every=eval_steps,
|
||||
log_every=None,
|
||||
checkpoint_every=None,
|
||||
feed={graph.is_training: False})
|
||||
return loop
|
||||
|
||||
|
||||
def visualize(
|
||||
logdir, outdir, num_agents, num_episodes, checkpoint=None,
|
||||
env_processes=True):
|
||||
def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
|
||||
"""Recover checkpoint and render videos from it.
|
||||
|
||||
Args:
|
||||
@@ -97,20 +95,16 @@ def visualize(
|
||||
"""
|
||||
config = utility.load_config(logdir)
|
||||
with tf.device('/cpu:0'):
|
||||
batch_env = utility.define_batch_env(
|
||||
lambda: _create_environment(config, outdir),
|
||||
num_agents, env_processes)
|
||||
graph = utility.define_simulation_graph(
|
||||
batch_env, config.algorithm, config)
|
||||
batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
|
||||
env_processes)
|
||||
graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
|
||||
total_steps = num_episodes * config.max_length
|
||||
loop = _define_loop(graph, total_steps)
|
||||
saver = utility.define_saver(
|
||||
exclude=(r'.*_temporary/.*', r'global_step'))
|
||||
saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
|
||||
sess_config = tf.ConfigProto(allow_soft_placement=True)
|
||||
sess_config.gpu_options.allow_growth = True
|
||||
with tf.Session(config=sess_config) as sess:
|
||||
utility.initialize_variables(
|
||||
sess, saver, config.logdir, checkpoint, resume=True)
|
||||
utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
|
||||
for unused_score in loop.run(sess, saver, total_steps):
|
||||
pass
|
||||
batch_env.close()
|
||||
@@ -123,29 +117,18 @@ def main(_):
|
||||
raise KeyError('You must specify logging and outdirs directories.')
|
||||
FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
|
||||
FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
|
||||
visualize(
|
||||
FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
|
||||
FLAGS.checkpoint, FLAGS.env_processes)
|
||||
visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes, FLAGS.checkpoint,
|
||||
FLAGS.env_processes)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
tf.app.flags.DEFINE_string(
|
||||
'logdir', None,
|
||||
'Directory to the checkpoint of a training run.')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'outdir', None,
|
||||
'Local directory for storing the monitoring outdir.')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'checkpoint', None,
|
||||
'Checkpoint name to load; defaults to most recent.')
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'num_agents', 1,
|
||||
'How many environments to step in parallel.')
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'num_episodes', 5,
|
||||
'Minimum number of episodes to render.')
|
||||
tf.app.flags.DEFINE_boolean(
|
||||
'env_processes', True,
|
||||
'Step environments in separate processes to circumvent the GIL.')
|
||||
tf.app.flags.DEFINE_string('logdir', None, 'Directory to the checkpoint of a training run.')
|
||||
tf.app.flags.DEFINE_string('outdir', None, 'Local directory for storing the monitoring outdir.')
|
||||
tf.app.flags.DEFINE_string('checkpoint', None,
|
||||
'Checkpoint name to load; defaults to most recent.')
|
||||
tf.app.flags.DEFINE_integer('num_agents', 1, 'How many environments to step in parallel.')
|
||||
tf.app.flags.DEFINE_integer('num_episodes', 5, 'Minimum number of episodes to render.')
|
||||
tf.app.flags.DEFINE_boolean('env_processes', True,
|
||||
'Step environments in separate processes to circumvent the GIL.')
|
||||
tf.app.run()
|
||||
|
||||
@@ -7,4 +7,3 @@ from . import train_kuka_grasping
|
||||
from . import train_pybullet_cartpole
|
||||
from . import train_pybullet_racecar
|
||||
from . import train_pybullet_zed_racecar
|
||||
|
||||
|
||||
@@ -6,17 +6,17 @@ import numpy as np
|
||||
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
|
||||
from gym import spaces
|
||||
|
||||
|
||||
|
||||
class ContinuousDownwardBiasPolicy(object):
|
||||
"""Policy which takes continuous actions, and is biased to move down.
|
||||
"""
|
||||
|
||||
def __init__(self, height_hack_prob=0.9):
|
||||
"""Initializes the DownwardBiasPolicy.
|
||||
|
||||
@@ -36,25 +36,25 @@ class ContinuousDownwardBiasPolicy(object):
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
|
||||
policy = ContinuousDownwardBiasPolicy()
|
||||
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render(mode='human')
|
||||
act = policy.sample_action(obs, .1)
|
||||
print("Action")
|
||||
print(act)
|
||||
obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
|
||||
policy = ContinuousDownwardBiasPolicy()
|
||||
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render(mode='human')
|
||||
act = policy.sample_action(obs, .1)
|
||||
print("Action")
|
||||
print(act)
|
||||
obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
|
||||
@@ -11,22 +11,22 @@ from baselines import deepq
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = KukaGymEnv(renders=True, isDiscrete=True)
|
||||
act = deepq.load("kuka_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
env = KukaGymEnv(renders=True, isDiscrete=True)
|
||||
act = deepq.load("kuka_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
import time
|
||||
@@ -10,28 +10,29 @@ import time
|
||||
from baselines import deepq
|
||||
from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
|
||||
|
||||
def main():
|
||||
env = gym.make('CartPoleBulletEnv-v1')
|
||||
act = deepq.load("cartpole_model.pkl")
|
||||
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("obs")
|
||||
print(obs)
|
||||
print("type(obs)")
|
||||
print(type(obs))
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
|
||||
o = obs[None]
|
||||
aa = act(o)
|
||||
a = aa[0]
|
||||
obs, rew, done, _ = env.step(a)
|
||||
episode_rew += rew
|
||||
time.sleep(1./240.)
|
||||
print("Episode reward", episode_rew)
|
||||
def main():
|
||||
env = gym.make('CartPoleBulletEnv-v1')
|
||||
act = deepq.load("cartpole_model.pkl")
|
||||
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("obs")
|
||||
print(obs)
|
||||
print("type(obs)")
|
||||
print(type(obs))
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
|
||||
o = obs[None]
|
||||
aa = act(o)
|
||||
a = aa[0]
|
||||
obs, rew, done, _ = env.step(a)
|
||||
episode_rew += rew
|
||||
time.sleep(1. / 240.)
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||
@@ -11,22 +11,22 @@ from baselines import deepq
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = RacecarGymEnv(renders=True,isDiscrete=True)
|
||||
act = deepq.load("racecar_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
env = RacecarGymEnv(renders=True, isDiscrete=True)
|
||||
act = deepq.load("racecar_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
||||
@@ -11,22 +11,22 @@ from baselines import deepq
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = RacecarZEDGymEnv(renders=True)
|
||||
act = deepq.load("racecar_zed_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
env = RacecarZEDGymEnv(renders=True)
|
||||
act = deepq.load("racecar_zed_model.pkl")
|
||||
print(act)
|
||||
while True:
|
||||
obs, done = env.reset(), False
|
||||
print("===================================")
|
||||
print("obs")
|
||||
print(obs)
|
||||
episode_rew = 0
|
||||
while not done:
|
||||
env.render()
|
||||
obs, rew, done, _ = env.step(act(obs[None])[0])
|
||||
episode_rew += rew
|
||||
print("Episode reward", episode_rew)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv
|
||||
@@ -12,39 +12,34 @@ from baselines import deepq
|
||||
import datetime
|
||||
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
#print("totalt")
|
||||
#print(totalt)
|
||||
is_solved = totalt > 2000 and total >= 10
|
||||
return is_solved
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
#print("totalt")
|
||||
#print(totalt)
|
||||
is_solved = totalt > 2000 and total >= 10
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = KukaCamGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.cnn_to_mlp(
|
||||
convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
|
||||
hiddens=[256],
|
||||
dueling=False
|
||||
)
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to kuka_cam_model.pkl")
|
||||
act.save("kuka_cam_model.pkl")
|
||||
|
||||
env = KukaCamGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
|
||||
hiddens=[256],
|
||||
dueling=False)
|
||||
act = deepq.learn(env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback)
|
||||
print("Saving model to kuka_cam_model.pkl")
|
||||
act.save("kuka_cam_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
|
||||
@@ -12,35 +12,32 @@ from baselines import deepq
|
||||
import datetime
|
||||
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
#print("totalt")
|
||||
#print(totalt)
|
||||
is_solved = totalt > 2000 and total >= 10
|
||||
return is_solved
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
#print("totalt")
|
||||
#print(totalt)
|
||||
is_solved = totalt > 2000 and total >= 10
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = KukaGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to kuka_model.pkl")
|
||||
act.save("kuka_model.pkl")
|
||||
|
||||
env = KukaGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback)
|
||||
print("Saving model to kuka_model.pkl")
|
||||
act.save("kuka_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.cartpole_bullet import CartPoleBulletEnv
|
||||
@@ -11,29 +11,27 @@ from baselines import deepq
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
|
||||
return is_solved
|
||||
# stop training if reward exceeds 199
|
||||
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = CartPoleBulletEnv(renders=False)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=100000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to cartpole_model.pkl")
|
||||
act.save("cartpole_model.pkl")
|
||||
|
||||
env = CartPoleBulletEnv(renders=False)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=100000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback)
|
||||
print("Saving model to cartpole_model.pkl")
|
||||
act.save("cartpole_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
|
||||
@@ -12,33 +12,30 @@ from baselines import deepq
|
||||
import datetime
|
||||
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
is_solved = totalt > 2000 and total >= -50
|
||||
return is_solved
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
is_solved = totalt > 2000 and total >= -50
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = RacecarGymEnv(renders=False,isDiscrete=True)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to racecar_model.pkl")
|
||||
act.save("racecar_model.pkl")
|
||||
|
||||
env = RacecarGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback)
|
||||
print("Saving model to racecar_model.pkl")
|
||||
act.save("racecar_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
|
||||
@@ -12,36 +12,32 @@ from baselines import deepq
|
||||
import datetime
|
||||
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
is_solved = totalt > 2000 and total >= -50
|
||||
return is_solved
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
is_solved = totalt > 2000 and total >= -50
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.cnn_to_mlp(
|
||||
convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
|
||||
hiddens=[256],
|
||||
dueling=False
|
||||
)
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to racecar_zed_model.pkl")
|
||||
act.save("racecar_zed_model.pkl")
|
||||
|
||||
env = RacecarZEDGymEnv(renders=False, isDiscrete=True)
|
||||
model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
|
||||
hiddens=[256],
|
||||
dueling=False)
|
||||
act = deepq.learn(env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=10000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback)
|
||||
print("Saving model to racecar_zed_model.pkl")
|
||||
act.save("racecar_zed_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -9,9 +9,9 @@ class BulletClient(object):
|
||||
def __init__(self, connection_mode=pybullet.DIRECT, options=""):
|
||||
"""Create a simulation and connect to it."""
|
||||
self._client = pybullet.connect(pybullet.SHARED_MEMORY)
|
||||
if(self._client<0):
|
||||
print("options=",options)
|
||||
self._client = pybullet.connect(connection_mode,options=options)
|
||||
if (self._client < 0):
|
||||
print("options=", options)
|
||||
self._client = pybullet.connect(connection_mode, options=options)
|
||||
self._shapes = {}
|
||||
|
||||
def __del__(self):
|
||||
@@ -25,5 +25,5 @@ class BulletClient(object):
|
||||
"""Inject the client id into Bullet functions."""
|
||||
attribute = getattr(pybullet, name)
|
||||
if inspect.isbuiltin(attribute):
|
||||
attribute = functools.partial(attribute, physicsClientId=self._client)
|
||||
attribute = functools.partial(attribute, physicsClientId=self._client)
|
||||
return attribute
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
Classic cart-pole system implemented by Rich Sutton et al.
|
||||
Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
|
||||
"""
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import logging
|
||||
import math
|
||||
@@ -21,26 +21,24 @@ from pkg_resources import parse_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CartPoleBulletEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=True):
|
||||
# start the bullet physics server
|
||||
self._renders = renders
|
||||
if (renders):
|
||||
p.connect(p.GUI)
|
||||
p.connect(p.GUI)
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
p.connect(p.DIRECT)
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 0.4 #2.4
|
||||
self.x_threshold = 0.4 #2.4
|
||||
high = np.array([
|
||||
self.x_threshold * 2,
|
||||
np.finfo(np.float32).max,
|
||||
self.theta_threshold_radians * 2,
|
||||
np.finfo(np.float32).max])
|
||||
self.x_threshold * 2,
|
||||
np.finfo(np.float32).max, self.theta_threshold_radians * 2,
|
||||
np.finfo(np.float32).max
|
||||
])
|
||||
|
||||
self.force_mag = 10
|
||||
|
||||
@@ -60,7 +58,7 @@ class CartPoleBulletEnv(gym.Env):
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
force = self.force_mag if action==1 else -self.force_mag
|
||||
force = self.force_mag if action == 1 else -self.force_mag
|
||||
|
||||
p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
|
||||
p.stepSimulation()
|
||||
@@ -78,16 +76,17 @@ class CartPoleBulletEnv(gym.Env):
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
# print("-----------reset simulation---------------")
|
||||
# print("-----------reset simulation---------------")
|
||||
p.resetSimulation()
|
||||
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(),"cartpole.urdf"),[0,0,0])
|
||||
self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
|
||||
[0, 0, 0])
|
||||
p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
|
||||
p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
|
||||
self.timeStep = 0.02
|
||||
p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
|
||||
p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
|
||||
p.setGravity(0,0, -9.8)
|
||||
p.setGravity(0, 0, -9.8)
|
||||
p.setTimeStep(self.timeStep)
|
||||
p.setRealTimeSimulation(0)
|
||||
|
||||
@@ -100,4 +99,4 @@ class CartPoleBulletEnv(gym.Env):
|
||||
return np.array(self.state)
|
||||
|
||||
def render(self, mode='human', close=False):
|
||||
return
|
||||
return
|
||||
|
||||
@@ -22,4 +22,3 @@ class EnvRandomizerBase(object):
|
||||
env: The environment to be randomized.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import pybullet as p
|
||||
import numpy as np
|
||||
@@ -17,49 +17,60 @@ class Kuka:
|
||||
self.timeStep = timeStep
|
||||
self.maxVelocity = .35
|
||||
self.maxForce = 200.
|
||||
self.fingerAForce = 2
|
||||
self.fingerAForce = 2
|
||||
self.fingerBForce = 2.5
|
||||
self.fingerTipForce = 2
|
||||
self.useInverseKinematics = 1
|
||||
self.useSimulation = 1
|
||||
self.useNullSpace =21
|
||||
self.useNullSpace = 21
|
||||
self.useOrientation = 1
|
||||
self.kukaEndEffectorIndex = 6
|
||||
self.kukaGripperIndex = 7
|
||||
#lower limits for null space
|
||||
self.ll=[-.967,-2 ,-2.96,0.19,-2.96,-2.09,-3.05]
|
||||
self.ll = [-.967, -2, -2.96, 0.19, -2.96, -2.09, -3.05]
|
||||
#upper limits for null space
|
||||
self.ul=[.967,2 ,2.96,2.29,2.96,2.09,3.05]
|
||||
self.ul = [.967, 2, 2.96, 2.29, 2.96, 2.09, 3.05]
|
||||
#joint ranges for null space
|
||||
self.jr=[5.8,4,5.8,4,5.8,4,6]
|
||||
self.jr = [5.8, 4, 5.8, 4, 5.8, 4, 6]
|
||||
#restposes for null space
|
||||
self.rp=[0,0,0,0.5*math.pi,0,-math.pi*0.5*0.66,0]
|
||||
self.rp = [0, 0, 0, 0.5 * math.pi, 0, -math.pi * 0.5 * 0.66, 0]
|
||||
#joint damping coefficents
|
||||
self.jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001]
|
||||
self.jd = [
|
||||
0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001, 0.00001,
|
||||
0.00001, 0.00001, 0.00001, 0.00001
|
||||
]
|
||||
self.reset()
|
||||
|
||||
|
||||
def reset(self):
|
||||
objects = p.loadSDF(os.path.join(self.urdfRootPath,"kuka_iiwa/kuka_with_gripper2.sdf"))
|
||||
objects = p.loadSDF(os.path.join(self.urdfRootPath, "kuka_iiwa/kuka_with_gripper2.sdf"))
|
||||
self.kukaUid = objects[0]
|
||||
#for i in range (p.getNumJoints(self.kukaUid)):
|
||||
# print(p.getJointInfo(self.kukaUid,i))
|
||||
p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000])
|
||||
self.jointPositions=[ 0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048, -0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200 ]
|
||||
p.resetBasePositionAndOrientation(self.kukaUid, [-0.100000, 0.000000, 0.070000],
|
||||
[0.000000, 0.000000, 0.000000, 1.000000])
|
||||
self.jointPositions = [
|
||||
0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048,
|
||||
-0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200
|
||||
]
|
||||
self.numJoints = p.getNumJoints(self.kukaUid)
|
||||
for jointIndex in range (self.numJoints):
|
||||
p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex])
|
||||
p.setJointMotorControl2(self.kukaUid,jointIndex,p.POSITION_CONTROL,targetPosition=self.jointPositions[jointIndex],force=self.maxForce)
|
||||
|
||||
self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath,"tray/tray.urdf"), 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000)
|
||||
self.endEffectorPos = [0.537,0.0,0.5]
|
||||
for jointIndex in range(self.numJoints):
|
||||
p.resetJointState(self.kukaUid, jointIndex, self.jointPositions[jointIndex])
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
jointIndex,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=self.jointPositions[jointIndex],
|
||||
force=self.maxForce)
|
||||
|
||||
self.trayUid = p.loadURDF(os.path.join(self.urdfRootPath, "tray/tray.urdf"), 0.640000,
|
||||
0.075000, -0.190000, 0.000000, 0.000000, 1.000000, 0.000000)
|
||||
self.endEffectorPos = [0.537, 0.0, 0.5]
|
||||
self.endEffectorAngle = 0
|
||||
|
||||
|
||||
|
||||
self.motorNames = []
|
||||
self.motorIndices = []
|
||||
|
||||
for i in range (self.numJoints):
|
||||
jointInfo = p.getJointInfo(self.kukaUid,i)
|
||||
|
||||
for i in range(self.numJoints):
|
||||
jointInfo = p.getJointInfo(self.kukaUid, i)
|
||||
qIndex = jointInfo[3]
|
||||
if qIndex > -1:
|
||||
#print("motorname")
|
||||
@@ -70,98 +81,136 @@ class Kuka:
|
||||
def getActionDimension(self):
|
||||
if (self.useInverseKinematics):
|
||||
return len(self.motorIndices)
|
||||
return 6 #position x,y,z and roll/pitch/yaw euler angles of end effector
|
||||
return 6 #position x,y,z and roll/pitch/yaw euler angles of end effector
|
||||
|
||||
def getObservationDimension(self):
|
||||
return len(self.getObservation())
|
||||
|
||||
def getObservation(self):
|
||||
observation = []
|
||||
state = p.getLinkState(self.kukaUid,self.kukaGripperIndex)
|
||||
state = p.getLinkState(self.kukaUid, self.kukaGripperIndex)
|
||||
pos = state[0]
|
||||
orn = state[1]
|
||||
euler = p.getEulerFromQuaternion(orn)
|
||||
|
||||
|
||||
observation.extend(list(pos))
|
||||
observation.extend(list(euler))
|
||||
|
||||
|
||||
return observation
|
||||
|
||||
def applyAction(self, motorCommands):
|
||||
|
||||
|
||||
#print ("self.numJoints")
|
||||
#print (self.numJoints)
|
||||
if (self.useInverseKinematics):
|
||||
|
||||
|
||||
dx = motorCommands[0]
|
||||
dy = motorCommands[1]
|
||||
dz = motorCommands[2]
|
||||
da = motorCommands[3]
|
||||
fingerAngle = motorCommands[4]
|
||||
|
||||
state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex)
|
||||
|
||||
state = p.getLinkState(self.kukaUid, self.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
#print("pos[2] (getLinkState(kukaEndEffectorIndex)")
|
||||
#print(actualEndEffectorPos[2])
|
||||
|
||||
|
||||
|
||||
self.endEffectorPos[0] = self.endEffectorPos[0]+dx
|
||||
if (self.endEffectorPos[0]>0.65):
|
||||
self.endEffectorPos[0]=0.65
|
||||
if (self.endEffectorPos[0]<0.50):
|
||||
self.endEffectorPos[0]=0.50
|
||||
self.endEffectorPos[1] = self.endEffectorPos[1]+dy
|
||||
if (self.endEffectorPos[1]<-0.17):
|
||||
self.endEffectorPos[1]=-0.17
|
||||
if (self.endEffectorPos[1]>0.22):
|
||||
self.endEffectorPos[1]=0.22
|
||||
|
||||
|
||||
self.endEffectorPos[0] = self.endEffectorPos[0] + dx
|
||||
if (self.endEffectorPos[0] > 0.65):
|
||||
self.endEffectorPos[0] = 0.65
|
||||
if (self.endEffectorPos[0] < 0.50):
|
||||
self.endEffectorPos[0] = 0.50
|
||||
self.endEffectorPos[1] = self.endEffectorPos[1] + dy
|
||||
if (self.endEffectorPos[1] < -0.17):
|
||||
self.endEffectorPos[1] = -0.17
|
||||
if (self.endEffectorPos[1] > 0.22):
|
||||
self.endEffectorPos[1] = 0.22
|
||||
|
||||
#print ("self.endEffectorPos[2]")
|
||||
#print (self.endEffectorPos[2])
|
||||
#print("actualEndEffectorPos[2]")
|
||||
#print(actualEndEffectorPos[2])
|
||||
#if (dz<0 or actualEndEffectorPos[2]<0.5):
|
||||
self.endEffectorPos[2] = self.endEffectorPos[2]+dz
|
||||
|
||||
|
||||
self.endEffectorPos[2] = self.endEffectorPos[2] + dz
|
||||
|
||||
self.endEffectorAngle = self.endEffectorAngle + da
|
||||
pos = self.endEffectorPos
|
||||
orn = p.getQuaternionFromEuler([0,-math.pi,0]) # -math.pi,yaw])
|
||||
if (self.useNullSpace==1):
|
||||
if (self.useOrientation==1):
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,self.ll,self.ul,self.jr,self.rp)
|
||||
orn = p.getQuaternionFromEuler([0, -math.pi, 0]) # -math.pi,yaw])
|
||||
if (self.useNullSpace == 1):
|
||||
if (self.useOrientation == 1):
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos,
|
||||
orn, self.ll, self.ul, self.jr, self.rp)
|
||||
else:
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,lowerLimits=self.ll, upperLimits=self.ul, jointRanges=self.jr, restPoses=self.rp)
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,
|
||||
self.kukaEndEffectorIndex,
|
||||
pos,
|
||||
lowerLimits=self.ll,
|
||||
upperLimits=self.ul,
|
||||
jointRanges=self.jr,
|
||||
restPoses=self.rp)
|
||||
else:
|
||||
if (self.useOrientation==1):
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos,orn,jointDamping=self.jd)
|
||||
if (self.useOrientation == 1):
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,
|
||||
self.kukaEndEffectorIndex,
|
||||
pos,
|
||||
orn,
|
||||
jointDamping=self.jd)
|
||||
else:
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid,self.kukaEndEffectorIndex,pos)
|
||||
|
||||
jointPoses = p.calculateInverseKinematics(self.kukaUid, self.kukaEndEffectorIndex, pos)
|
||||
|
||||
#print("jointPoses")
|
||||
#print(jointPoses)
|
||||
#print("self.kukaEndEffectorIndex")
|
||||
#print(self.kukaEndEffectorIndex)
|
||||
if (self.useSimulation):
|
||||
for i in range (self.kukaEndEffectorIndex+1):
|
||||
for i in range(self.kukaEndEffectorIndex + 1):
|
||||
#print(i)
|
||||
p.setJointMotorControl2(bodyUniqueId=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,maxVelocity=self.maxVelocity, positionGain=0.3,velocityGain=1)
|
||||
p.setJointMotorControl2(bodyUniqueId=self.kukaUid,
|
||||
jointIndex=i,
|
||||
controlMode=p.POSITION_CONTROL,
|
||||
targetPosition=jointPoses[i],
|
||||
targetVelocity=0,
|
||||
force=self.maxForce,
|
||||
maxVelocity=self.maxVelocity,
|
||||
positionGain=0.3,
|
||||
velocityGain=1)
|
||||
else:
|
||||
#reset the joint state (ignoring all dynamics, not recommended to use during simulation)
|
||||
for i in range (self.numJoints):
|
||||
p.resetJointState(self.kukaUid,i,jointPoses[i])
|
||||
for i in range(self.numJoints):
|
||||
p.resetJointState(self.kukaUid, i, jointPoses[i])
|
||||
#fingers
|
||||
p.setJointMotorControl2(self.kukaUid,7,p.POSITION_CONTROL,targetPosition=self.endEffectorAngle,force=self.maxForce)
|
||||
p.setJointMotorControl2(self.kukaUid,8,p.POSITION_CONTROL,targetPosition=-fingerAngle,force=self.fingerAForce)
|
||||
p.setJointMotorControl2(self.kukaUid,11,p.POSITION_CONTROL,targetPosition=fingerAngle,force=self.fingerBForce)
|
||||
|
||||
p.setJointMotorControl2(self.kukaUid,10,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
|
||||
p.setJointMotorControl2(self.kukaUid,13,p.POSITION_CONTROL,targetPosition=0,force=self.fingerTipForce)
|
||||
|
||||
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
7,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=self.endEffectorAngle,
|
||||
force=self.maxForce)
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
8,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=-fingerAngle,
|
||||
force=self.fingerAForce)
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
11,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=fingerAngle,
|
||||
force=self.fingerBForce)
|
||||
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
10,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=0,
|
||||
force=self.fingerTipForce)
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
13,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=0,
|
||||
force=self.fingerTipForce)
|
||||
|
||||
else:
|
||||
for action in range (len(motorCommands)):
|
||||
for action in range(len(motorCommands)):
|
||||
motor = self.motorIndices[action]
|
||||
p.setJointMotorControl2(self.kukaUid,motor,p.POSITION_CONTROL,targetPosition=motorCommands[action],force=self.maxForce)
|
||||
|
||||
p.setJointMotorControl2(self.kukaUid,
|
||||
motor,
|
||||
p.POSITION_CONTROL,
|
||||
targetPosition=motorCommands[action],
|
||||
force=self.maxForce)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import math
|
||||
import gym
|
||||
@@ -21,11 +20,9 @@ maxSteps = 1000
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
|
||||
class KukaCamGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot=pybullet_data.getDataPath(),
|
||||
@@ -33,7 +30,7 @@ class KukaCamGymEnv(gym.Env):
|
||||
isEnableSelfCollision=True,
|
||||
renders=False,
|
||||
isDiscrete=False):
|
||||
self._timeStep = 1./240.
|
||||
self._timeStep = 1. / 240.
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
self._isEnableSelfCollision = isEnableSelfCollision
|
||||
@@ -42,14 +39,14 @@ class KukaCamGymEnv(gym.Env):
|
||||
self._renders = renders
|
||||
self._width = 341
|
||||
self._height = 256
|
||||
self._isDiscrete=isDiscrete
|
||||
self._isDiscrete = isDiscrete
|
||||
self.terminated = 0
|
||||
self._p = p
|
||||
if self._renders:
|
||||
cid = p.connect(p.SHARED_MEMORY)
|
||||
if (cid<0):
|
||||
p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
|
||||
if (cid < 0):
|
||||
p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
#timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
|
||||
@@ -67,7 +64,10 @@ class KukaCamGymEnv(gym.Env):
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
|
||||
self.observation_space = spaces.Box(low=0,
|
||||
high=255,
|
||||
shape=(self._height, self._width, 4),
|
||||
dtype=np.uint8)
|
||||
self.viewer = None
|
||||
|
||||
def reset(self):
|
||||
@@ -75,17 +75,19 @@ class KukaCamGymEnv(gym.Env):
|
||||
p.resetSimulation()
|
||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||
p.setTimeStep(self._timeStep)
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
|
||||
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
|
||||
0.000000, 0.000000, 0.0, 1.0)
|
||||
|
||||
xpos = 0.5 +0.2*random.random()
|
||||
ypos = 0 +0.25*random.random()
|
||||
ang = 3.1415925438*random.random()
|
||||
orn = p.getQuaternionFromEuler([0,0,ang])
|
||||
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
|
||||
xpos = 0.5 + 0.2 * random.random()
|
||||
ypos = 0 + 0.25 * random.random()
|
||||
ang = 3.1415925438 * random.random()
|
||||
orn = p.getQuaternionFromEuler([0, 0, ang])
|
||||
self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.1,
|
||||
orn[0], orn[1], orn[2], orn[3])
|
||||
|
||||
p.setGravity(0,0,-10)
|
||||
p.setGravity(0, 0, -10)
|
||||
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
p.stepSimulation()
|
||||
@@ -101,49 +103,59 @@ class KukaCamGymEnv(gym.Env):
|
||||
|
||||
def getExtendedObservation(self):
|
||||
|
||||
#camEyePos = [0.03,0.236,0.54]
|
||||
#distance = 1.06
|
||||
#pitch=-56
|
||||
#yaw = 258
|
||||
#roll=0
|
||||
#upAxisIndex = 2
|
||||
#camInfo = p.getDebugVisualizerCamera()
|
||||
#print("width,height")
|
||||
#print(camInfo[0])
|
||||
#print(camInfo[1])
|
||||
#print("viewMatrix")
|
||||
#print(camInfo[2])
|
||||
#print("projectionMatrix")
|
||||
#print(camInfo[3])
|
||||
#viewMat = camInfo[2]
|
||||
#viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
|
||||
viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0]
|
||||
#projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
|
||||
projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
|
||||
#camEyePos = [0.03,0.236,0.54]
|
||||
#distance = 1.06
|
||||
#pitch=-56
|
||||
#yaw = 258
|
||||
#roll=0
|
||||
#upAxisIndex = 2
|
||||
#camInfo = p.getDebugVisualizerCamera()
|
||||
#print("width,height")
|
||||
#print(camInfo[0])
|
||||
#print(camInfo[1])
|
||||
#print("viewMatrix")
|
||||
#print(camInfo[2])
|
||||
#print("projectionMatrix")
|
||||
#print(camInfo[3])
|
||||
#viewMat = camInfo[2]
|
||||
#viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
|
||||
viewMat = [
|
||||
-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722,
|
||||
-0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843,
|
||||
0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0
|
||||
]
|
||||
#projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
|
||||
projMatrix = [
|
||||
0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0,
|
||||
-0.02000020071864128, 0.0
|
||||
]
|
||||
|
||||
img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
|
||||
rgb=img_arr[2]
|
||||
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||
self._observation = np_img_arr
|
||||
return self._observation
|
||||
img_arr = p.getCameraImage(width=self._width,
|
||||
height=self._height,
|
||||
viewMatrix=viewMat,
|
||||
projectionMatrix=projMatrix)
|
||||
rgb = img_arr[2]
|
||||
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||
self._observation = np_img_arr
|
||||
return self._observation
|
||||
|
||||
def step(self, action):
|
||||
if (self._isDiscrete):
|
||||
dv = 0.01
|
||||
dx = [0,-dv,dv,0,0,0,0][action]
|
||||
dy = [0,0,0,-dv,dv,0,0][action]
|
||||
da = [0,0,0,0,0,-0.1,0.1][action]
|
||||
dx = [0, -dv, dv, 0, 0, 0, 0][action]
|
||||
dy = [0, 0, 0, -dv, dv, 0, 0][action]
|
||||
da = [0, 0, 0, 0, 0, -0.1, 0.1][action]
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
realAction = [dx, dy, -0.002, da, f]
|
||||
else:
|
||||
dv = 0.01
|
||||
dx = action[0] * dv
|
||||
dy = action[1] * dv
|
||||
da = action[2] * 0.1
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
realAction = [dx, dy, -0.002, da, f]
|
||||
|
||||
return self.step2( realAction)
|
||||
return self.step2(realAction)
|
||||
|
||||
def step2(self, action):
|
||||
for i in range(self._actionRepeat):
|
||||
@@ -156,7 +168,7 @@ class KukaCamGymEnv(gym.Env):
|
||||
|
||||
self._observation = self.getExtendedObservation()
|
||||
if self._renders:
|
||||
time.sleep(self._timeStep)
|
||||
time.sleep(self._timeStep)
|
||||
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
@@ -170,66 +182,67 @@ class KukaCamGymEnv(gym.Env):
|
||||
def render(self, mode='human', close=False):
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
def _termination(self):
|
||||
#print (self._kuka.endEffectorPos[2])
|
||||
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
if (self.terminated or self._envStepCounter>maxSteps):
|
||||
if (self.terminated or self._envStepCounter > maxSteps):
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
maxDist = 0.005
|
||||
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
|
||||
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)
|
||||
|
||||
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
|
||||
if (len(closestPoints)): #(actualEndEffectorPos[2] <= -0.43):
|
||||
self.terminated = 1
|
||||
|
||||
#print("closing gripper, attempting grasp")
|
||||
#start grasp and terminate
|
||||
fingerAngle = 0.3
|
||||
for i in range (100):
|
||||
graspAction = [0,0,0.0001,0,fingerAngle]
|
||||
for i in range(100):
|
||||
graspAction = [0, 0, 0.0001, 0, fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
fingerAngle = fingerAngle-(0.3/100.)
|
||||
if (fingerAngle<0):
|
||||
fingerAngle=0
|
||||
fingerAngle = fingerAngle - (0.3 / 100.)
|
||||
if (fingerAngle < 0):
|
||||
fingerAngle = 0
|
||||
|
||||
for i in range (1000):
|
||||
graspAction = [0,0,0.001,0,fingerAngle]
|
||||
for i in range(1000):
|
||||
graspAction = [0, 0, 0.001, 0, fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
if (blockPos[2] > 0.23):
|
||||
#print("BLOCKPOS!")
|
||||
#print(blockPos[2])
|
||||
break
|
||||
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
if (actualEndEffectorPos[2]>0.5):
|
||||
if (actualEndEffectorPos[2] > 0.5):
|
||||
break
|
||||
|
||||
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
return False
|
||||
@@ -237,20 +250,21 @@ class KukaCamGymEnv(gym.Env):
|
||||
def _reward(self):
|
||||
|
||||
#rewards is height of target object
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
|
||||
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
|
||||
self._kuka.kukaEndEffectorIndex)
|
||||
|
||||
reward = -1000
|
||||
numPt = len(closestPoints)
|
||||
#print(numPt)
|
||||
if (numPt>0):
|
||||
if (numPt > 0):
|
||||
#print("reward:")
|
||||
reward = -closestPoints[0][8]*10
|
||||
if (blockPos[2] >0.2):
|
||||
reward = -closestPoints[0][8] * 10
|
||||
if (blockPos[2] > 0.2):
|
||||
#print("grasped a block!!!")
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
reward = reward+1000
|
||||
reward = reward + 1000
|
||||
|
||||
#print("reward")
|
||||
#print(reward)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
print ("current_dir=" + currentdir)
|
||||
os.sys.path.insert(0,currentdir)
|
||||
print("current_dir=" + currentdir)
|
||||
os.sys.path.insert(0, currentdir)
|
||||
|
||||
import math
|
||||
import gym
|
||||
@@ -20,11 +20,9 @@ largeValObservation = 100
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
|
||||
class KukaGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot=pybullet_data.getDataPath(),
|
||||
@@ -32,10 +30,10 @@ class KukaGymEnv(gym.Env):
|
||||
isEnableSelfCollision=True,
|
||||
renders=False,
|
||||
isDiscrete=False,
|
||||
maxSteps = 1000):
|
||||
maxSteps=1000):
|
||||
#print("KukaGymEnv __init__")
|
||||
self._isDiscrete = isDiscrete
|
||||
self._timeStep = 1./240.
|
||||
self._timeStep = 1. / 240.
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
self._isEnableSelfCollision = isEnableSelfCollision
|
||||
@@ -51,9 +49,9 @@ class KukaGymEnv(gym.Env):
|
||||
self._p = p
|
||||
if self._renders:
|
||||
cid = p.connect(p.SHARED_MEMORY)
|
||||
if (cid<0):
|
||||
cid = p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
|
||||
if (cid < 0):
|
||||
cid = p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
|
||||
else:
|
||||
p.connect(p.DIRECT)
|
||||
#timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
|
||||
@@ -67,10 +65,10 @@ class KukaGymEnv(gym.Env):
|
||||
if (self._isDiscrete):
|
||||
self.action_space = spaces.Discrete(7)
|
||||
else:
|
||||
action_dim = 3
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high)
|
||||
action_dim = 3
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high)
|
||||
self.observation_space = spaces.Box(-observation_high, observation_high)
|
||||
self.viewer = None
|
||||
|
||||
@@ -80,17 +78,19 @@ class KukaGymEnv(gym.Env):
|
||||
p.resetSimulation()
|
||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||
p.setTimeStep(self._timeStep)
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
|
||||
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
|
||||
0.000000, 0.000000, 0.0, 1.0)
|
||||
|
||||
xpos = 0.55 +0.12*random.random()
|
||||
ypos = 0 +0.2*random.random()
|
||||
ang = 3.14*0.5+3.1415925438*random.random()
|
||||
orn = p.getQuaternionFromEuler([0,0,ang])
|
||||
self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.15,orn[0],orn[1],orn[2],orn[3])
|
||||
xpos = 0.55 + 0.12 * random.random()
|
||||
ypos = 0 + 0.2 * random.random()
|
||||
ang = 3.14 * 0.5 + 3.1415925438 * random.random()
|
||||
orn = p.getQuaternionFromEuler([0, 0, ang])
|
||||
self.blockUid = p.loadURDF(os.path.join(self._urdfRoot, "block.urdf"), xpos, ypos, -0.15,
|
||||
orn[0], orn[1], orn[2], orn[3])
|
||||
|
||||
p.setGravity(0,0,-10)
|
||||
p.setGravity(0, 0, -10)
|
||||
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
p.stepSimulation()
|
||||
@@ -105,47 +105,48 @@ class KukaGymEnv(gym.Env):
|
||||
return [seed]
|
||||
|
||||
def getExtendedObservation(self):
|
||||
self._observation = self._kuka.getObservation()
|
||||
gripperState = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaGripperIndex)
|
||||
gripperPos = gripperState[0]
|
||||
gripperOrn = gripperState[1]
|
||||
blockPos,blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
self._observation = self._kuka.getObservation()
|
||||
gripperState = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaGripperIndex)
|
||||
gripperPos = gripperState[0]
|
||||
gripperOrn = gripperState[1]
|
||||
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
|
||||
invGripperPos,invGripperOrn = p.invertTransform(gripperPos,gripperOrn)
|
||||
gripperMat = p.getMatrixFromQuaternion(gripperOrn)
|
||||
dir0 = [gripperMat[0],gripperMat[3],gripperMat[6]]
|
||||
dir1 = [gripperMat[1],gripperMat[4],gripperMat[7]]
|
||||
dir2 = [gripperMat[2],gripperMat[5],gripperMat[8]]
|
||||
invGripperPos, invGripperOrn = p.invertTransform(gripperPos, gripperOrn)
|
||||
gripperMat = p.getMatrixFromQuaternion(gripperOrn)
|
||||
dir0 = [gripperMat[0], gripperMat[3], gripperMat[6]]
|
||||
dir1 = [gripperMat[1], gripperMat[4], gripperMat[7]]
|
||||
dir2 = [gripperMat[2], gripperMat[5], gripperMat[8]]
|
||||
|
||||
gripperEul = p.getEulerFromQuaternion(gripperOrn)
|
||||
#print("gripperEul")
|
||||
#print(gripperEul)
|
||||
blockPosInGripper,blockOrnInGripper = p.multiplyTransforms(invGripperPos,invGripperOrn,blockPos,blockOrn)
|
||||
projectedBlockPos2D =[blockPosInGripper[0],blockPosInGripper[1]]
|
||||
blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
|
||||
#print("projectedBlockPos2D")
|
||||
#print(projectedBlockPos2D)
|
||||
#print("blockEulerInGripper")
|
||||
#print(blockEulerInGripper)
|
||||
gripperEul = p.getEulerFromQuaternion(gripperOrn)
|
||||
#print("gripperEul")
|
||||
#print(gripperEul)
|
||||
blockPosInGripper, blockOrnInGripper = p.multiplyTransforms(invGripperPos, invGripperOrn,
|
||||
blockPos, blockOrn)
|
||||
projectedBlockPos2D = [blockPosInGripper[0], blockPosInGripper[1]]
|
||||
blockEulerInGripper = p.getEulerFromQuaternion(blockOrnInGripper)
|
||||
#print("projectedBlockPos2D")
|
||||
#print(projectedBlockPos2D)
|
||||
#print("blockEulerInGripper")
|
||||
#print(blockEulerInGripper)
|
||||
|
||||
#we return the relative x,y position and euler angle of block in gripper space
|
||||
blockInGripperPosXYEulZ =[blockPosInGripper[0],blockPosInGripper[1],blockEulerInGripper[2]]
|
||||
#we return the relative x,y position and euler angle of block in gripper space
|
||||
blockInGripperPosXYEulZ = [blockPosInGripper[0], blockPosInGripper[1], blockEulerInGripper[2]]
|
||||
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir0[0],gripperPos[1]+dir0[1],gripperPos[2]+dir0[2]],[1,0,0],lifeTime=1)
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir1[0],gripperPos[1]+dir1[1],gripperPos[2]+dir1[2]],[0,1,0],lifeTime=1)
|
||||
#p.addUserDebugLine(gripperPos,[gripperPos[0]+dir2[0],gripperPos[1]+dir2[1],gripperPos[2]+dir2[2]],[0,0,1],lifeTime=1)
|
||||
|
||||
self._observation.extend(list(blockInGripperPosXYEulZ))
|
||||
return self._observation
|
||||
self._observation.extend(list(blockInGripperPosXYEulZ))
|
||||
return self._observation
|
||||
|
||||
def step(self, action):
|
||||
if (self._isDiscrete):
|
||||
dv = 0.005
|
||||
dx = [0,-dv,dv,0,0,0,0][action]
|
||||
dy = [0,0,0,-dv,dv,0,0][action]
|
||||
da = [0,0,0,0,0,-0.05,0.05][action]
|
||||
dx = [0, -dv, dv, 0, 0, 0, 0][action]
|
||||
dy = [0, 0, 0, -dv, dv, 0, 0][action]
|
||||
da = [0, 0, 0, 0, 0, -0.05, 0.05][action]
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
realAction = [dx, dy, -0.002, da, f]
|
||||
else:
|
||||
#print("action[0]=", str(action[0]))
|
||||
dv = 0.005
|
||||
@@ -153,8 +154,8 @@ class KukaGymEnv(gym.Env):
|
||||
dy = action[1] * dv
|
||||
da = action[2] * 0.05
|
||||
f = 0.3
|
||||
realAction = [dx,dy,-0.002,da,f]
|
||||
return self.step2( realAction)
|
||||
realAction = [dx, dy, -0.002, da, f]
|
||||
return self.step2(realAction)
|
||||
|
||||
def step2(self, action):
|
||||
for i in range(self._actionRepeat):
|
||||
@@ -171,11 +172,13 @@ class KukaGymEnv(gym.Env):
|
||||
#print(self._envStepCounter)
|
||||
|
||||
done = self._termination()
|
||||
npaction = np.array([action[3]]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
|
||||
actionCost = np.linalg.norm(npaction)*10.
|
||||
npaction = np.array([
|
||||
action[3]
|
||||
]) #only penalize rotation until learning works well [action[0],action[1],action[3]])
|
||||
actionCost = np.linalg.norm(npaction) * 10.
|
||||
#print("actionCost")
|
||||
#print(actionCost)
|
||||
reward = self._reward()-actionCost
|
||||
reward = self._reward() - actionCost
|
||||
#print("reward")
|
||||
#print(reward)
|
||||
|
||||
@@ -187,22 +190,23 @@ class KukaGymEnv(gym.Env):
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
|
||||
base_pos,orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
|
||||
#renderer=self._p.ER_TINY_RENDERER)
|
||||
|
||||
base_pos, orn = self._p.getBasePositionAndOrientation(self._kuka.kukaUid)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=self._p.ER_BULLET_HARDWARE_OPENGL)
|
||||
#renderer=self._p.ER_TINY_RENDERER)
|
||||
|
||||
rgb_array = np.array(px, dtype=np.uint8)
|
||||
rgb_array = np.reshape(rgb_array, (RENDER_HEIGHT, RENDER_WIDTH, 4))
|
||||
@@ -210,49 +214,47 @@ class KukaGymEnv(gym.Env):
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
|
||||
def _termination(self):
|
||||
#print (self._kuka.endEffectorPos[2])
|
||||
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
if (self.terminated or self._envStepCounter>self._maxSteps):
|
||||
if (self.terminated or self._envStepCounter > self._maxSteps):
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
maxDist = 0.005
|
||||
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist)
|
||||
closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid, maxDist)
|
||||
|
||||
if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43):
|
||||
if (len(closestPoints)): #(actualEndEffectorPos[2] <= -0.43):
|
||||
self.terminated = 1
|
||||
|
||||
#print("terminating, closing gripper, attempting grasp")
|
||||
#start grasp and terminate
|
||||
fingerAngle = 0.3
|
||||
for i in range (100):
|
||||
graspAction = [0,0,0.0001,0,fingerAngle]
|
||||
for i in range(100):
|
||||
graspAction = [0, 0, 0.0001, 0, fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
fingerAngle = fingerAngle-(0.3/100.)
|
||||
if (fingerAngle<0):
|
||||
fingerAngle=0
|
||||
fingerAngle = fingerAngle - (0.3 / 100.)
|
||||
if (fingerAngle < 0):
|
||||
fingerAngle = 0
|
||||
|
||||
for i in range (1000):
|
||||
graspAction = [0,0,0.001,0,fingerAngle]
|
||||
for i in range(1000):
|
||||
graspAction = [0, 0, 0.001, 0, fingerAngle]
|
||||
self._kuka.applyAction(graspAction)
|
||||
p.stepSimulation()
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
if (blockPos[2] > 0.23):
|
||||
#print("BLOCKPOS!")
|
||||
#print(blockPos[2])
|
||||
break
|
||||
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
|
||||
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
|
||||
actualEndEffectorPos = state[0]
|
||||
if (actualEndEffectorPos[2]>0.5):
|
||||
if (actualEndEffectorPos[2] > 0.5):
|
||||
break
|
||||
|
||||
|
||||
self._observation = self.getExtendedObservation()
|
||||
return True
|
||||
return False
|
||||
@@ -260,18 +262,19 @@ class KukaGymEnv(gym.Env):
|
||||
def _reward(self):
|
||||
|
||||
#rewards is height of target object
|
||||
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
|
||||
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex)
|
||||
blockPos, blockOrn = p.getBasePositionAndOrientation(self.blockUid)
|
||||
closestPoints = p.getClosestPoints(self.blockUid, self._kuka.kukaUid, 1000, -1,
|
||||
self._kuka.kukaEndEffectorIndex)
|
||||
|
||||
reward = -1000
|
||||
|
||||
numPt = len(closestPoints)
|
||||
#print(numPt)
|
||||
if (numPt>0):
|
||||
if (numPt > 0):
|
||||
#print("reward:")
|
||||
reward = -closestPoints[0][8]*10
|
||||
if (blockPos[2] >0.2):
|
||||
reward = reward+10000
|
||||
reward = -closestPoints[0][8] * 10
|
||||
if (blockPos[2] > 0.2):
|
||||
reward = reward + 10000
|
||||
print("successfully grasped a block!!!")
|
||||
#print("self._envStepCounter")
|
||||
#print(self._envStepCounter)
|
||||
@@ -279,7 +282,7 @@ class KukaGymEnv(gym.Env):
|
||||
#print(self._envStepCounter)
|
||||
#print("reward")
|
||||
#print(reward)
|
||||
#print("reward")
|
||||
#print("reward")
|
||||
#print(reward)
|
||||
return reward
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import glob
|
||||
from pkg_resources import parse_version
|
||||
import gym
|
||||
|
||||
|
||||
class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
"""Class for Kuka environment with diverse objects.
|
||||
|
||||
@@ -61,7 +62,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
"""
|
||||
|
||||
self._isDiscrete = isDiscrete
|
||||
self._timeStep = 1./240.
|
||||
self._timeStep = 1. / 240.
|
||||
self._urdfRoot = urdfRoot
|
||||
self._actionRepeat = actionRepeat
|
||||
self._isEnableSelfCollision = isEnableSelfCollision
|
||||
@@ -85,9 +86,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
|
||||
if self._renders:
|
||||
self.cid = p.connect(p.SHARED_MEMORY)
|
||||
if (self.cid<0):
|
||||
self.cid = p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
|
||||
if (self.cid < 0):
|
||||
self.cid = p.connect(p.GUI)
|
||||
p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
|
||||
else:
|
||||
self.cid = p.connect(p.DIRECT)
|
||||
self.seed()
|
||||
@@ -100,9 +101,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
else:
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(3,)) # dx, dy, da
|
||||
if self._removeHeightHack:
|
||||
self.action_space = spaces.Box(low=-1,
|
||||
high=1,
|
||||
shape=(4,)) # dx, dy, dz, da
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(4,)) # dx, dy, dz, da
|
||||
self.viewer = None
|
||||
|
||||
def reset(self):
|
||||
@@ -111,17 +110,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
# Set the camera settings.
|
||||
look = [0.23, 0.2, 0.54]
|
||||
distance = 1.
|
||||
pitch = -56 + self._cameraRandom*np.random.uniform(-3, 3)
|
||||
yaw = 245 + self._cameraRandom*np.random.uniform(-3, 3)
|
||||
pitch = -56 + self._cameraRandom * np.random.uniform(-3, 3)
|
||||
yaw = 245 + self._cameraRandom * np.random.uniform(-3, 3)
|
||||
roll = 0
|
||||
self._view_matrix = p.computeViewMatrixFromYawPitchRoll(
|
||||
look, distance, yaw, pitch, roll, 2)
|
||||
fov = 20. + self._cameraRandom*np.random.uniform(-2, 2)
|
||||
self._view_matrix = p.computeViewMatrixFromYawPitchRoll(look, distance, yaw, pitch, roll, 2)
|
||||
fov = 20. + self._cameraRandom * np.random.uniform(-2, 2)
|
||||
aspect = self._width / self._height
|
||||
near = 0.01
|
||||
far = 10
|
||||
self._proj_matrix = p.computeProjectionMatrixFOV(
|
||||
fov, aspect, near, far)
|
||||
self._proj_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
|
||||
|
||||
self._attempted_grasp = False
|
||||
self._env_step = 0
|
||||
@@ -130,18 +127,18 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
p.resetSimulation()
|
||||
p.setPhysicsEngineParameter(numSolverIterations=150)
|
||||
p.setTimeStep(self._timeStep)
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"),[0,0,-1])
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "plane.urdf"), [0, 0, -1])
|
||||
|
||||
p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
|
||||
p.loadURDF(os.path.join(self._urdfRoot, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
|
||||
0.000000, 0.000000, 0.0, 1.0)
|
||||
|
||||
p.setGravity(0,0,-10)
|
||||
p.setGravity(0, 0, -10)
|
||||
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
p.stepSimulation()
|
||||
|
||||
# Choose the objects in the bin.
|
||||
urdfList = self._get_random_object(
|
||||
self._numObjects, self._isTest)
|
||||
urdfList = self._get_random_object(self._numObjects, self._isTest)
|
||||
self._objectUids = self._randomly_place_objects(urdfList)
|
||||
self._observation = self._get_observation()
|
||||
return np.array(self._observation)
|
||||
@@ -156,17 +153,15 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
The list of object unique ID's.
|
||||
"""
|
||||
|
||||
|
||||
# Randomize positions of each object urdf.
|
||||
objectUids = []
|
||||
for urdf_name in urdfList:
|
||||
xpos = 0.4 +self._blockRandom*random.random()
|
||||
ypos = self._blockRandom*(random.random()-.5)
|
||||
angle = np.pi/2 + self._blockRandom * np.pi * random.random()
|
||||
xpos = 0.4 + self._blockRandom * random.random()
|
||||
ypos = self._blockRandom * (random.random() - .5)
|
||||
angle = np.pi / 2 + self._blockRandom * np.pi * random.random()
|
||||
orn = p.getQuaternionFromEuler([0, 0, angle])
|
||||
urdf_path = os.path.join(self._urdfRoot, urdf_name)
|
||||
uid = p.loadURDF(urdf_path, [xpos, ypos, .15],
|
||||
[orn[0], orn[1], orn[2], orn[3]])
|
||||
uid = p.loadURDF(urdf_path, [xpos, ypos, .15], [orn[0], orn[1], orn[2], orn[3]])
|
||||
objectUids.append(uid)
|
||||
# Let each object fall to the tray individual, to prevent object
|
||||
# intersection.
|
||||
@@ -178,9 +173,9 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
"""Return the observation as an image.
|
||||
"""
|
||||
img_arr = p.getCameraImage(width=self._width,
|
||||
height=self._height,
|
||||
viewMatrix=self._view_matrix,
|
||||
projectionMatrix=self._proj_matrix)
|
||||
height=self._height,
|
||||
viewMatrix=self._view_matrix,
|
||||
projectionMatrix=self._proj_matrix)
|
||||
rgb = img_arr[2]
|
||||
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||
return np_img_arr[:, :, :3]
|
||||
@@ -246,8 +241,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
break
|
||||
|
||||
# If we are close to the bin, attempt grasp.
|
||||
state = p.getLinkState(self._kuka.kukaUid,
|
||||
self._kuka.kukaEndEffectorIndex)
|
||||
state = p.getLinkState(self._kuka.kukaUid, self._kuka.kukaEndEffectorIndex)
|
||||
end_effector_pos = state[0]
|
||||
if end_effector_pos[2] <= 0.1:
|
||||
finger_angle = 0.3
|
||||
@@ -257,7 +251,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
p.stepSimulation()
|
||||
#if self._renders:
|
||||
# time.sleep(self._timeStep)
|
||||
finger_angle -= 0.3/100.
|
||||
finger_angle -= 0.3 / 100.
|
||||
if finger_angle < 0:
|
||||
finger_angle = 0
|
||||
for _ in range(500):
|
||||
@@ -266,7 +260,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
p.stepSimulation()
|
||||
if self._renders:
|
||||
time.sleep(self._timeStep)
|
||||
finger_angle -= 0.3/100.
|
||||
finger_angle -= 0.3 / 100.
|
||||
if finger_angle < 0:
|
||||
finger_angle = 0
|
||||
self._attempted_grasp = True
|
||||
@@ -274,9 +268,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
done = self._termination()
|
||||
reward = self._reward()
|
||||
|
||||
debug = {
|
||||
'grasp_success': self._graspSuccess
|
||||
}
|
||||
debug = {'grasp_success': self._graspSuccess}
|
||||
return observation, reward, done, debug
|
||||
|
||||
def _reward(self):
|
||||
@@ -288,8 +280,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
reward = 0
|
||||
self._graspSuccess = 0
|
||||
for uid in self._objectUids:
|
||||
pos, _ = p.getBasePositionAndOrientation(
|
||||
uid)
|
||||
pos, _ = p.getBasePositionAndOrientation(uid)
|
||||
# If any block is above height, provide reward.
|
||||
if pos[2] > 0.2:
|
||||
self._graspSuccess += 1
|
||||
@@ -319,8 +310,7 @@ class KukaDiverseObjectEnv(KukaGymEnv):
|
||||
urdf_pattern = os.path.join(self._urdfRoot, 'random_urdfs/*[1-9]/*.urdf')
|
||||
found_object_directories = glob.glob(urdf_pattern)
|
||||
total_num_objects = len(found_object_directories)
|
||||
selected_objects = np.random.choice(np.arange(total_num_objects),
|
||||
num_objects)
|
||||
selected_objects = np.random.choice(np.arange(total_num_objects), num_objects)
|
||||
selected_objects_filenames = []
|
||||
for object_index in selected_objects:
|
||||
selected_objects_filenames += [found_object_directories[object_index]]
|
||||
|
||||
@@ -15,9 +15,8 @@ OVERHEAT_SHUTDOWN_TORQUE = 2.45
|
||||
OVERHEAT_SHUTDOWN_TIME = 1.0
|
||||
LEG_POSITION = ["front_left", "back_left", "front_right", "back_right"]
|
||||
MOTOR_NAMES = [
|
||||
"motor_front_leftL_joint", "motor_front_leftR_joint",
|
||||
"motor_back_leftL_joint", "motor_back_leftR_joint",
|
||||
"motor_front_rightL_joint", "motor_front_rightR_joint",
|
||||
"motor_front_leftL_joint", "motor_front_leftR_joint", "motor_back_leftL_joint",
|
||||
"motor_back_leftR_joint", "motor_front_rightL_joint", "motor_front_rightR_joint",
|
||||
"motor_back_rightL_joint", "motor_back_rightR_joint"
|
||||
]
|
||||
LEG_LINK_ID = [2, 3, 5, 6, 8, 9, 11, 12, 15, 16, 18, 19, 21, 22, 24, 25]
|
||||
@@ -33,7 +32,7 @@ class Minitaur(object):
|
||||
|
||||
def __init__(self,
|
||||
pybullet_client,
|
||||
urdf_root= os.path.join(os.path.dirname(__file__),"../data"),
|
||||
urdf_root=os.path.join(os.path.dirname(__file__), "../data"),
|
||||
time_step=0.01,
|
||||
self_collision_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
@@ -87,10 +86,9 @@ class Minitaur(object):
|
||||
if self._accurate_motor_model_enabled:
|
||||
self._kp = motor_kp
|
||||
self._kd = motor_kd
|
||||
self._motor_model = motor.MotorModel(
|
||||
torque_control_enabled=self._torque_control_enabled,
|
||||
kp=self._kp,
|
||||
kd=self._kd)
|
||||
self._motor_model = motor.MotorModel(torque_control_enabled=self._torque_control_enabled,
|
||||
kp=self._kp,
|
||||
kd=self._kd)
|
||||
elif self._pd_control_enabled:
|
||||
self._kp = 8
|
||||
self._kd = kd_for_pd_controllers
|
||||
@@ -101,15 +99,12 @@ class Minitaur(object):
|
||||
self.Reset()
|
||||
|
||||
def _RecordMassInfoFromURDF(self):
|
||||
self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(
|
||||
self.quadruped, BASE_LINK_ID)[0]
|
||||
self._base_mass_urdf = self._pybullet_client.getDynamicsInfo(self.quadruped, BASE_LINK_ID)[0]
|
||||
self._leg_masses_urdf = []
|
||||
self._leg_masses_urdf.append(
|
||||
self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[
|
||||
0])
|
||||
self._pybullet_client.getDynamicsInfo(self.quadruped, LEG_LINK_ID[0])[0])
|
||||
self._leg_masses_urdf.append(
|
||||
self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[
|
||||
0])
|
||||
self._pybullet_client.getDynamicsInfo(self.quadruped, MOTOR_LINK_ID[0])[0])
|
||||
|
||||
def _BuildJointNameToIdDict(self):
|
||||
num_joints = self._pybullet_client.getNumJoints(self.quadruped)
|
||||
@@ -119,9 +114,7 @@ class Minitaur(object):
|
||||
self._joint_name_to_id[joint_info[1].decode("UTF-8")] = joint_info[0]
|
||||
|
||||
def _BuildMotorIdList(self):
|
||||
self._motor_id_list = [
|
||||
self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES
|
||||
]
|
||||
self._motor_id_list = [self._joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES]
|
||||
|
||||
def Reset(self, reload_urdf=True):
|
||||
"""Reset the minitaur to its initial states.
|
||||
@@ -144,39 +137,35 @@ class Minitaur(object):
|
||||
self._RecordMassInfoFromURDF()
|
||||
self.ResetPose(add_constraint=True)
|
||||
if self._on_rack:
|
||||
self._pybullet_client.createConstraint(
|
||||
self.quadruped, -1, -1, -1, self._pybullet_client.JOINT_FIXED,
|
||||
[0, 0, 0], [0, 0, 0], [0, 0, 1])
|
||||
self._pybullet_client.createConstraint(self.quadruped, -1, -1, -1,
|
||||
self._pybullet_client.JOINT_FIXED, [0, 0, 0],
|
||||
[0, 0, 0], [0, 0, 1])
|
||||
else:
|
||||
self._pybullet_client.resetBasePositionAndOrientation(
|
||||
self.quadruped, INIT_POSITION, INIT_ORIENTATION)
|
||||
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0],
|
||||
[0, 0, 0])
|
||||
self._pybullet_client.resetBasePositionAndOrientation(self.quadruped, INIT_POSITION,
|
||||
INIT_ORIENTATION)
|
||||
self._pybullet_client.resetBaseVelocity(self.quadruped, [0, 0, 0], [0, 0, 0])
|
||||
self.ResetPose(add_constraint=False)
|
||||
|
||||
self._overheat_counter = np.zeros(self.num_motors)
|
||||
self._motor_enabled_list = [True] * self.num_motors
|
||||
|
||||
def _SetMotorTorqueById(self, motor_id, torque):
|
||||
self._pybullet_client.setJointMotorControl2(
|
||||
bodyIndex=self.quadruped,
|
||||
jointIndex=motor_id,
|
||||
controlMode=self._pybullet_client.TORQUE_CONTROL,
|
||||
force=torque)
|
||||
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
|
||||
jointIndex=motor_id,
|
||||
controlMode=self._pybullet_client.TORQUE_CONTROL,
|
||||
force=torque)
|
||||
|
||||
def _SetDesiredMotorAngleById(self, motor_id, desired_angle):
|
||||
self._pybullet_client.setJointMotorControl2(
|
||||
bodyIndex=self.quadruped,
|
||||
jointIndex=motor_id,
|
||||
controlMode=self._pybullet_client.POSITION_CONTROL,
|
||||
targetPosition=desired_angle,
|
||||
positionGain=self._kp,
|
||||
velocityGain=self._kd,
|
||||
force=self._max_force)
|
||||
self._pybullet_client.setJointMotorControl2(bodyIndex=self.quadruped,
|
||||
jointIndex=motor_id,
|
||||
controlMode=self._pybullet_client.POSITION_CONTROL,
|
||||
targetPosition=desired_angle,
|
||||
positionGain=self._kp,
|
||||
velocityGain=self._kd,
|
||||
force=self._max_force)
|
||||
|
||||
def _SetDesiredMotorAngleByName(self, motor_name, desired_angle):
|
||||
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name],
|
||||
desired_angle)
|
||||
self._SetDesiredMotorAngleById(self._joint_name_to_id[motor_name], desired_angle)
|
||||
|
||||
def ResetPose(self, add_constraint):
|
||||
"""Reset the pose of the minitaur.
|
||||
@@ -200,59 +189,53 @@ class Minitaur(object):
|
||||
knee_angle = -2.1834
|
||||
|
||||
leg_position = LEG_POSITION[leg_id]
|
||||
self._pybullet_client.resetJointState(
|
||||
self.quadruped,
|
||||
self._joint_name_to_id["motor_" + leg_position + "L_joint"],
|
||||
self._motor_direction[2 * leg_id] * half_pi,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(
|
||||
self.quadruped,
|
||||
self._joint_name_to_id["knee_" + leg_position + "L_link"],
|
||||
self._motor_direction[2 * leg_id] * knee_angle,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(
|
||||
self.quadruped,
|
||||
self._joint_name_to_id["motor_" + leg_position + "R_joint"],
|
||||
self._motor_direction[2 * leg_id + 1] * half_pi,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(
|
||||
self.quadruped,
|
||||
self._joint_name_to_id["knee_" + leg_position + "R_link"],
|
||||
self._motor_direction[2 * leg_id + 1] * knee_angle,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(self.quadruped,
|
||||
self._joint_name_to_id["motor_" + leg_position +
|
||||
"L_joint"],
|
||||
self._motor_direction[2 * leg_id] * half_pi,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(self.quadruped,
|
||||
self._joint_name_to_id["knee_" + leg_position +
|
||||
"L_link"],
|
||||
self._motor_direction[2 * leg_id] * knee_angle,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(self.quadruped,
|
||||
self._joint_name_to_id["motor_" + leg_position +
|
||||
"R_joint"],
|
||||
self._motor_direction[2 * leg_id + 1] * half_pi,
|
||||
targetVelocity=0)
|
||||
self._pybullet_client.resetJointState(self.quadruped,
|
||||
self._joint_name_to_id["knee_" + leg_position +
|
||||
"R_link"],
|
||||
self._motor_direction[2 * leg_id + 1] * knee_angle,
|
||||
targetVelocity=0)
|
||||
if add_constraint:
|
||||
self._pybullet_client.createConstraint(
|
||||
self.quadruped, self._joint_name_to_id["knee_"
|
||||
+ leg_position + "R_link"],
|
||||
self.quadruped, self._joint_name_to_id["knee_"
|
||||
+ leg_position + "L_link"],
|
||||
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0],
|
||||
KNEE_CONSTRAINT_POINT_RIGHT, KNEE_CONSTRAINT_POINT_LEFT)
|
||||
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "R_link"],
|
||||
self.quadruped, self._joint_name_to_id["knee_" + leg_position + "L_link"],
|
||||
self._pybullet_client.JOINT_POINT2POINT, [0, 0, 0], KNEE_CONSTRAINT_POINT_RIGHT,
|
||||
KNEE_CONSTRAINT_POINT_LEFT)
|
||||
|
||||
if self._accurate_motor_model_enabled or self._pd_control_enabled:
|
||||
# Disable the default motor in pybullet.
|
||||
self._pybullet_client.setJointMotorControl2(
|
||||
bodyIndex=self.quadruped,
|
||||
jointIndex=(self._joint_name_to_id["motor_"
|
||||
+ leg_position + "L_joint"]),
|
||||
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "L_joint"]),
|
||||
controlMode=self._pybullet_client.VELOCITY_CONTROL,
|
||||
targetVelocity=0,
|
||||
force=knee_friction_force)
|
||||
self._pybullet_client.setJointMotorControl2(
|
||||
bodyIndex=self.quadruped,
|
||||
jointIndex=(self._joint_name_to_id["motor_"
|
||||
+ leg_position + "R_joint"]),
|
||||
jointIndex=(self._joint_name_to_id["motor_" + leg_position + "R_joint"]),
|
||||
controlMode=self._pybullet_client.VELOCITY_CONTROL,
|
||||
targetVelocity=0,
|
||||
force=knee_friction_force)
|
||||
|
||||
else:
|
||||
self._SetDesiredMotorAngleByName(
|
||||
"motor_" + leg_position + "L_joint",
|
||||
self._motor_direction[2 * leg_id] * half_pi)
|
||||
self._SetDesiredMotorAngleByName("motor_" + leg_position + "L_joint",
|
||||
self._motor_direction[2 * leg_id] * half_pi)
|
||||
self._SetDesiredMotorAngleByName("motor_" + leg_position + "R_joint",
|
||||
self._motor_direction[2 * leg_id
|
||||
+ 1] * half_pi)
|
||||
self._motor_direction[2 * leg_id + 1] * half_pi)
|
||||
|
||||
self._pybullet_client.setJointMotorControl2(
|
||||
bodyIndex=self.quadruped,
|
||||
@@ -273,8 +256,7 @@ class Minitaur(object):
|
||||
Returns:
|
||||
The position of minitaur's base.
|
||||
"""
|
||||
position, _ = (
|
||||
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
|
||||
position, _ = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
|
||||
return position
|
||||
|
||||
def GetBaseOrientation(self):
|
||||
@@ -283,8 +265,7 @@ class Minitaur(object):
|
||||
Returns:
|
||||
The orientation of minitaur's base.
|
||||
"""
|
||||
_, orientation = (
|
||||
self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
|
||||
_, orientation = (self._pybullet_client.getBasePositionAndOrientation(self.quadruped))
|
||||
return orientation
|
||||
|
||||
def GetActionDimension(self):
|
||||
@@ -304,10 +285,9 @@ class Minitaur(object):
|
||||
"""
|
||||
upper_bound = np.array([0.0] * self.GetObservationDimension())
|
||||
upper_bound[0:self.num_motors] = math.pi # Joint angle.
|
||||
upper_bound[self.num_motors:2 * self.num_motors] = (
|
||||
motor.MOTOR_SPEED_LIMIT) # Joint velocity.
|
||||
upper_bound[2 * self.num_motors:3 * self.num_motors] = (
|
||||
motor.OBSERVED_TORQUE_LIMIT) # Joint torque.
|
||||
upper_bound[self.num_motors:2 * self.num_motors] = (motor.MOTOR_SPEED_LIMIT) # Joint velocity.
|
||||
upper_bound[2 * self.num_motors:3 * self.num_motors] = (motor.OBSERVED_TORQUE_LIMIT
|
||||
) # Joint torque.
|
||||
upper_bound[3 * self.num_motors:] = 1.0 # Quaternion of base orientation.
|
||||
return upper_bound
|
||||
|
||||
@@ -354,12 +334,9 @@ class Minitaur(object):
|
||||
"""
|
||||
if self._motor_velocity_limit < np.inf:
|
||||
current_motor_angle = self.GetMotorAngles()
|
||||
motor_commands_max = (
|
||||
current_motor_angle + self.time_step * self._motor_velocity_limit)
|
||||
motor_commands_min = (
|
||||
current_motor_angle - self.time_step * self._motor_velocity_limit)
|
||||
motor_commands = np.clip(motor_commands, motor_commands_min,
|
||||
motor_commands_max)
|
||||
motor_commands_max = (current_motor_angle + self.time_step * self._motor_velocity_limit)
|
||||
motor_commands_min = (current_motor_angle - self.time_step * self._motor_velocity_limit)
|
||||
motor_commands = np.clip(motor_commands, motor_commands_min, motor_commands_max)
|
||||
|
||||
if self._accurate_motor_model_enabled or self._pd_control_enabled:
|
||||
q = self.GetMotorAngles()
|
||||
@@ -373,8 +350,7 @@ class Minitaur(object):
|
||||
self._overheat_counter[i] += 1
|
||||
else:
|
||||
self._overheat_counter[i] = 0
|
||||
if (self._overheat_counter[i] >
|
||||
OVERHEAT_SHUTDOWN_TIME / self.time_step):
|
||||
if (self._overheat_counter[i] > OVERHEAT_SHUTDOWN_TIME / self.time_step):
|
||||
self._motor_enabled_list[i] = False
|
||||
|
||||
# The torque is already in the observation space because we use
|
||||
@@ -382,12 +358,11 @@ class Minitaur(object):
|
||||
self._observed_motor_torques = observed_torque
|
||||
|
||||
# Transform into the motor space when applying the torque.
|
||||
self._applied_motor_torque = np.multiply(actual_torque,
|
||||
self._motor_direction)
|
||||
self._applied_motor_torque = np.multiply(actual_torque, self._motor_direction)
|
||||
|
||||
for motor_id, motor_torque, motor_enabled in zip(
|
||||
self._motor_id_list, self._applied_motor_torque,
|
||||
self._motor_enabled_list):
|
||||
for motor_id, motor_torque, motor_enabled in zip(self._motor_id_list,
|
||||
self._applied_motor_torque,
|
||||
self._motor_enabled_list):
|
||||
if motor_enabled:
|
||||
self._SetMotorTorqueById(motor_id, motor_torque)
|
||||
else:
|
||||
@@ -403,14 +378,12 @@ class Minitaur(object):
|
||||
self._applied_motor_torques = np.multiply(self._observed_motor_torques,
|
||||
self._motor_direction)
|
||||
|
||||
for motor_id, motor_torque in zip(self._motor_id_list,
|
||||
self._applied_motor_torques):
|
||||
for motor_id, motor_torque in zip(self._motor_id_list, self._applied_motor_torques):
|
||||
self._SetMotorTorqueById(motor_id, motor_torque)
|
||||
else:
|
||||
motor_commands_with_direction = np.multiply(motor_commands,
|
||||
self._motor_direction)
|
||||
for motor_id, motor_command_with_direction in zip(
|
||||
self._motor_id_list, motor_commands_with_direction):
|
||||
motor_commands_with_direction = np.multiply(motor_commands, self._motor_direction)
|
||||
for motor_id, motor_command_with_direction in zip(self._motor_id_list,
|
||||
motor_commands_with_direction):
|
||||
self._SetDesiredMotorAngleById(motor_id, motor_command_with_direction)
|
||||
|
||||
def GetMotorAngles(self):
|
||||
@@ -471,13 +444,13 @@ class Minitaur(object):
|
||||
quater_pi = math.pi / 4
|
||||
for i in range(self.num_motors):
|
||||
action_idx = i // 2
|
||||
forward_backward_component = (-scale_for_singularity * quater_pi * (
|
||||
actions[action_idx + half_num_motors] + offset_for_singularity))
|
||||
forward_backward_component = (
|
||||
-scale_for_singularity * quater_pi *
|
||||
(actions[action_idx + half_num_motors] + offset_for_singularity))
|
||||
extension_component = (-1)**i * quater_pi * actions[action_idx]
|
||||
if i >= half_num_motors:
|
||||
extension_component = -extension_component
|
||||
motor_angle[i] = (
|
||||
math.pi + forward_backward_component + extension_component)
|
||||
motor_angle[i] = (math.pi + forward_backward_component + extension_component)
|
||||
return motor_angle
|
||||
|
||||
def GetBaseMassFromURDF(self):
|
||||
@@ -489,8 +462,7 @@ class Minitaur(object):
|
||||
return self._leg_masses_urdf
|
||||
|
||||
def SetBaseMass(self, base_mass):
|
||||
self._pybullet_client.changeDynamics(
|
||||
self.quadruped, BASE_LINK_ID, mass=base_mass)
|
||||
self._pybullet_client.changeDynamics(self.quadruped, BASE_LINK_ID, mass=base_mass)
|
||||
|
||||
def SetLegMasses(self, leg_masses):
|
||||
"""Set the mass of the legs.
|
||||
@@ -504,11 +476,9 @@ class Minitaur(object):
|
||||
leg_masses[1] is the mass of the motor.
|
||||
"""
|
||||
for link_id in LEG_LINK_ID:
|
||||
self._pybullet_client.changeDynamics(
|
||||
self.quadruped, link_id, mass=leg_masses[0])
|
||||
self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[0])
|
||||
for link_id in MOTOR_LINK_ID:
|
||||
self._pybullet_client.changeDynamics(
|
||||
self.quadruped, link_id, mass=leg_masses[1])
|
||||
self._pybullet_client.changeDynamics(self.quadruped, link_id, mass=leg_masses[1])
|
||||
|
||||
def SetFootFriction(self, foot_friction):
|
||||
"""Set the lateral friction of the feet.
|
||||
@@ -518,8 +488,7 @@ class Minitaur(object):
|
||||
shared by all four feet.
|
||||
"""
|
||||
for link_id in FOOT_LINK_ID:
|
||||
self._pybullet_client.changeDynamics(
|
||||
self.quadruped, link_id, lateralFriction=foot_friction)
|
||||
self._pybullet_client.changeDynamics(self.quadruped, link_id, lateralFriction=foot_friction)
|
||||
|
||||
def SetBatteryVoltage(self, voltage):
|
||||
if self._accurate_motor_model_enabled:
|
||||
|
||||
@@ -6,8 +6,7 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import math
|
||||
import time
|
||||
@@ -34,8 +33,9 @@ OBSERVATION_EPS = 0.01
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
duckStartPos = [0,0,0.25]
|
||||
duckStartOrn = [0.5,0.5,0.5,0.5]
|
||||
duckStartPos = [0, 0, 0.25]
|
||||
duckStartOrn = [0.5, 0.5, 0.5, 0.5]
|
||||
|
||||
|
||||
class MinitaurBulletDuckEnv(gym.Env):
|
||||
"""The gym environment for the minitaur.
|
||||
@@ -47,34 +47,32 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
expenditure.
|
||||
|
||||
"""
|
||||
metadata = {
|
||||
"render.modes": ["human", "rgb_array"],
|
||||
"video.frames_per_second": 50
|
||||
}
|
||||
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
||||
|
||||
def __init__(self,
|
||||
urdf_root=pybullet_data.getDataPath(),
|
||||
action_repeat=1,
|
||||
distance_weight=1.0,
|
||||
energy_weight=0.005,
|
||||
shake_weight=0.0,
|
||||
drift_weight=0.0,
|
||||
distance_limit=float("inf"),
|
||||
observation_noise_stdev=0.0,
|
||||
self_collision_enabled=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
|
||||
leg_model_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.0,
|
||||
motor_kd=0.02,
|
||||
torque_control_enabled=False,
|
||||
motor_overheat_protection=True,
|
||||
hard_reset=True,
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
def __init__(
|
||||
self,
|
||||
urdf_root=pybullet_data.getDataPath(),
|
||||
action_repeat=1,
|
||||
distance_weight=1.0,
|
||||
energy_weight=0.005,
|
||||
shake_weight=0.0,
|
||||
drift_weight=0.0,
|
||||
distance_limit=float("inf"),
|
||||
observation_noise_stdev=0.0,
|
||||
self_collision_enabled=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=False, #not needed to be true if accurate motor model is enabled (has its own better PD)
|
||||
leg_model_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.0,
|
||||
motor_kd=0.02,
|
||||
torque_control_enabled=False,
|
||||
motor_overheat_protection=True,
|
||||
hard_reset=True,
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
"""Initialize the minitaur gym environment.
|
||||
|
||||
Args:
|
||||
@@ -152,17 +150,14 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
self._action_repeat *= NUM_SUBSTEPS
|
||||
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(
|
||||
connection_mode=pybullet.GUI)
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
observation_high = (
|
||||
self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
|
||||
observation_low = (
|
||||
self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
|
||||
observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
|
||||
observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
|
||||
action_dim = 8
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
@@ -183,35 +178,36 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
numSolverIterations=int(self._num_bullet_solver_iterations))
|
||||
self._pybullet_client.setTimeStep(self._time_step)
|
||||
self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
|
||||
self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn)
|
||||
self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,
|
||||
duckStartPos, duckStartOrn)
|
||||
self._pybullet_client.setGravity(0, 0, -10)
|
||||
acc_motor = self._accurate_motor_model_enabled
|
||||
motor_protect = self._motor_overheat_protection
|
||||
self.minitaur = (minitaur.Minitaur(
|
||||
pybullet_client=self._pybullet_client,
|
||||
urdf_root=self._urdf_root,
|
||||
time_step=self._time_step,
|
||||
self_collision_enabled=self._self_collision_enabled,
|
||||
motor_velocity_limit=self._motor_velocity_limit,
|
||||
pd_control_enabled=self._pd_control_enabled,
|
||||
accurate_motor_model_enabled=acc_motor,
|
||||
motor_kp=self._motor_kp,
|
||||
motor_kd=self._motor_kd,
|
||||
torque_control_enabled=self._torque_control_enabled,
|
||||
motor_overheat_protection=motor_protect,
|
||||
on_rack=self._on_rack,
|
||||
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
||||
self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
|
||||
urdf_root=self._urdf_root,
|
||||
time_step=self._time_step,
|
||||
self_collision_enabled=self._self_collision_enabled,
|
||||
motor_velocity_limit=self._motor_velocity_limit,
|
||||
pd_control_enabled=self._pd_control_enabled,
|
||||
accurate_motor_model_enabled=acc_motor,
|
||||
motor_kp=self._motor_kp,
|
||||
motor_kd=self._motor_kd,
|
||||
torque_control_enabled=self._torque_control_enabled,
|
||||
motor_overheat_protection=motor_protect,
|
||||
on_rack=self._on_rack,
|
||||
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
||||
else:
|
||||
self.minitaur.Reset(reload_urdf=False)
|
||||
self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn)
|
||||
self._pybullet_client.resetBasePositionAndOrientation(self._duckId, duckStartPos,
|
||||
duckStartOrn)
|
||||
if self._env_randomizer is not None:
|
||||
self._env_randomizer.randomize_env(self)
|
||||
|
||||
self._env_step_counter = 0
|
||||
self._last_base_position = [0, 0, 0]
|
||||
self._objectives = []
|
||||
self._pybullet_client.resetDebugVisualizerCamera(
|
||||
self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
|
||||
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
|
||||
self._cam_pitch, [0, 0, 0])
|
||||
if not self._torque_control_enabled:
|
||||
for _ in range(100):
|
||||
if self._pd_control_enabled or self._accurate_motor_model_enabled:
|
||||
@@ -228,8 +224,7 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
for i, action_component in enumerate(action):
|
||||
if not (-self._action_bound - ACTION_EPS <= action_component <=
|
||||
self._action_bound + ACTION_EPS):
|
||||
raise ValueError(
|
||||
"{}th action {} out of bounds.".format(i, action_component))
|
||||
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
|
||||
action = self.minitaur.ConvertFromLegModel(action)
|
||||
return action
|
||||
|
||||
@@ -258,8 +253,8 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
if time_to_sleep > 0:
|
||||
time.sleep(time_to_sleep)
|
||||
base_pos = self.minitaur.GetBasePosition()
|
||||
self._pybullet_client.resetDebugVisualizerCamera(
|
||||
self._cam_dist, self._cam_yaw, self._cam_pitch, base_pos)
|
||||
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
|
||||
self._cam_pitch, base_pos)
|
||||
action = self._transform_action_to_motor_command(action)
|
||||
for _ in range(self._action_repeat):
|
||||
self.minitaur.ApplyAction(action)
|
||||
@@ -281,12 +276,17 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._pybullet_client.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) /
|
||||
RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _,
|
||||
_) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
@@ -297,9 +297,8 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
Returns:
|
||||
A numpy array of motor angles.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
|
||||
MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_motor_velocities(self):
|
||||
"""Get the minitaur's motor velocities.
|
||||
@@ -308,8 +307,8 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
A numpy array of motor velocities.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
|
||||
MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_motor_torques(self):
|
||||
"""Get the minitaur's motor torques.
|
||||
@@ -318,8 +317,8 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
A numpy array of motor torques.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
|
||||
MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_base_orientation(self):
|
||||
"""Get the minitaur's base orientation, represented by a quaternion.
|
||||
@@ -330,8 +329,8 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])
|
||||
|
||||
def lost_duck(self):
|
||||
points = self._pybullet_client.getContactPoints(self._duckId, self._groundId);
|
||||
return len(points)>0
|
||||
points = self._pybullet_client.getContactPoints(self._duckId, self._groundId)
|
||||
return len(points) > 0
|
||||
|
||||
def is_fallen(self):
|
||||
"""Decide whether the minitaur has fallen.
|
||||
@@ -347,8 +346,7 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
|
||||
local_up = rot_mat[6:]
|
||||
pos = self.minitaur.GetBasePosition()
|
||||
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
|
||||
pos[2] < 0.13)
|
||||
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)
|
||||
|
||||
def _termination(self):
|
||||
position = self.minitaur.GetBasePosition()
|
||||
@@ -364,12 +362,9 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
energy_reward = np.abs(
|
||||
np.dot(self.minitaur.GetMotorTorques(),
|
||||
self.minitaur.GetMotorVelocities())) * self._time_step
|
||||
reward = (
|
||||
self._distance_weight * forward_reward -
|
||||
self._energy_weight * energy_reward + self._drift_weight * drift_reward
|
||||
+ self._shake_weight * shake_reward)
|
||||
self._objectives.append(
|
||||
[forward_reward, energy_reward, drift_reward, shake_reward])
|
||||
reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
|
||||
self._drift_weight * drift_reward + self._shake_weight * shake_reward)
|
||||
self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
|
||||
return reward
|
||||
|
||||
def get_objectives(self):
|
||||
@@ -383,9 +378,9 @@ class MinitaurBulletDuckEnv(gym.Env):
|
||||
self._get_observation()
|
||||
observation = np.array(self._observation)
|
||||
if self._observation_noise_stdev > 0:
|
||||
observation += (np.random.normal(
|
||||
scale=self._observation_noise_stdev, size=observation.shape) *
|
||||
self.minitaur.GetObservationUpperBound())
|
||||
observation += (
|
||||
np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
|
||||
self.minitaur.GetObservationUpperBound())
|
||||
return observation
|
||||
|
||||
if parse_version(gym.__version__) < parse_version('0.9.6'):
|
||||
|
||||
@@ -45,24 +45,20 @@ class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
|
||||
minitaur.SetBaseMass(randomized_base_mass)
|
||||
|
||||
leg_masses = minitaur.GetLegMassesFromURDF()
|
||||
leg_masses_lower_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[0])
|
||||
leg_masses_upper_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[1])
|
||||
leg_masses_lower_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[0])
|
||||
leg_masses_upper_bound = np.array(leg_masses) * (1.0 + self._minitaur_leg_mass_err_range[1])
|
||||
randomized_leg_masses = [
|
||||
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
|
||||
for i in range(len(leg_masses))
|
||||
]
|
||||
minitaur.SetLegMasses(randomized_leg_masses)
|
||||
|
||||
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
|
||||
BATTERY_VOLTAGE_RANGE[1])
|
||||
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0], BATTERY_VOLTAGE_RANGE[1])
|
||||
minitaur.SetBatteryVoltage(randomized_battery_voltage)
|
||||
|
||||
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
|
||||
MOTOR_VISCOUS_DAMPING_RANGE[1])
|
||||
minitaur.SetMotorViscousDamping(randomized_motor_damping)
|
||||
|
||||
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
|
||||
MINITAUR_LEG_FRICTION[1])
|
||||
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0], MINITAUR_LEG_FRICTION[1])
|
||||
minitaur.SetFootFriction(randomized_foot_friction)
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import math
|
||||
import time
|
||||
@@ -33,6 +32,7 @@ OBSERVATION_EPS = 0.01
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
|
||||
class MinitaurBulletEnv(gym.Env):
|
||||
"""The gym environment for the minitaur.
|
||||
|
||||
@@ -43,34 +43,32 @@ class MinitaurBulletEnv(gym.Env):
|
||||
expenditure.
|
||||
|
||||
"""
|
||||
metadata = {
|
||||
"render.modes": ["human", "rgb_array"],
|
||||
"video.frames_per_second": 50
|
||||
}
|
||||
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
||||
|
||||
def __init__(self,
|
||||
urdf_root=pybullet_data.getDataPath(),
|
||||
action_repeat=1,
|
||||
distance_weight=1.0,
|
||||
energy_weight=0.005,
|
||||
shake_weight=0.0,
|
||||
drift_weight=0.0,
|
||||
distance_limit=float("inf"),
|
||||
observation_noise_stdev=0.0,
|
||||
self_collision_enabled=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD)
|
||||
leg_model_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.0,
|
||||
motor_kd=0.02,
|
||||
torque_control_enabled=False,
|
||||
motor_overheat_protection=True,
|
||||
hard_reset=True,
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
def __init__(
|
||||
self,
|
||||
urdf_root=pybullet_data.getDataPath(),
|
||||
action_repeat=1,
|
||||
distance_weight=1.0,
|
||||
energy_weight=0.005,
|
||||
shake_weight=0.0,
|
||||
drift_weight=0.0,
|
||||
distance_limit=float("inf"),
|
||||
observation_noise_stdev=0.0,
|
||||
self_collision_enabled=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=False, #not needed to be true if accurate motor model is enabled (has its own better PD)
|
||||
leg_model_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.0,
|
||||
motor_kd=0.02,
|
||||
torque_control_enabled=False,
|
||||
motor_overheat_protection=True,
|
||||
hard_reset=True,
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
"""Initialize the minitaur gym environment.
|
||||
|
||||
Args:
|
||||
@@ -147,17 +145,14 @@ class MinitaurBulletEnv(gym.Env):
|
||||
self._action_repeat *= NUM_SUBSTEPS
|
||||
|
||||
if self._is_render:
|
||||
self._pybullet_client = bullet_client.BulletClient(
|
||||
connection_mode=pybullet.GUI)
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
observation_high = (
|
||||
self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
|
||||
observation_low = (
|
||||
self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
|
||||
observation_high = (self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS)
|
||||
observation_low = (self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS)
|
||||
action_dim = 8
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
@@ -178,25 +173,25 @@ class MinitaurBulletEnv(gym.Env):
|
||||
numSolverIterations=int(self._num_bullet_solver_iterations))
|
||||
self._pybullet_client.setTimeStep(self._time_step)
|
||||
plane = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root)
|
||||
self._pybullet_client.changeVisualShape(plane,-1,rgbaColor=[1,1,1,0.9])
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,0)
|
||||
self._pybullet_client.changeVisualShape(plane, -1, rgbaColor=[1, 1, 1, 0.9])
|
||||
self._pybullet_client.configureDebugVisualizer(
|
||||
self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION, 0)
|
||||
self._pybullet_client.setGravity(0, 0, -10)
|
||||
acc_motor = self._accurate_motor_model_enabled
|
||||
motor_protect = self._motor_overheat_protection
|
||||
self.minitaur = (minitaur.Minitaur(
|
||||
pybullet_client=self._pybullet_client,
|
||||
urdf_root=self._urdf_root,
|
||||
time_step=self._time_step,
|
||||
self_collision_enabled=self._self_collision_enabled,
|
||||
motor_velocity_limit=self._motor_velocity_limit,
|
||||
pd_control_enabled=self._pd_control_enabled,
|
||||
accurate_motor_model_enabled=acc_motor,
|
||||
motor_kp=self._motor_kp,
|
||||
motor_kd=self._motor_kd,
|
||||
torque_control_enabled=self._torque_control_enabled,
|
||||
motor_overheat_protection=motor_protect,
|
||||
on_rack=self._on_rack,
|
||||
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
||||
self.minitaur = (minitaur.Minitaur(pybullet_client=self._pybullet_client,
|
||||
urdf_root=self._urdf_root,
|
||||
time_step=self._time_step,
|
||||
self_collision_enabled=self._self_collision_enabled,
|
||||
motor_velocity_limit=self._motor_velocity_limit,
|
||||
pd_control_enabled=self._pd_control_enabled,
|
||||
accurate_motor_model_enabled=acc_motor,
|
||||
motor_kp=self._motor_kp,
|
||||
motor_kd=self._motor_kd,
|
||||
torque_control_enabled=self._torque_control_enabled,
|
||||
motor_overheat_protection=motor_protect,
|
||||
on_rack=self._on_rack,
|
||||
kd_for_pd_controllers=self._kd_for_pd_controllers))
|
||||
else:
|
||||
self.minitaur.Reset(reload_urdf=False)
|
||||
|
||||
@@ -206,8 +201,8 @@ class MinitaurBulletEnv(gym.Env):
|
||||
self._env_step_counter = 0
|
||||
self._last_base_position = [0, 0, 0]
|
||||
self._objectives = []
|
||||
self._pybullet_client.resetDebugVisualizerCamera(
|
||||
self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
|
||||
self._pybullet_client.resetDebugVisualizerCamera(self._cam_dist, self._cam_yaw,
|
||||
self._cam_pitch, [0, 0, 0])
|
||||
if not self._torque_control_enabled:
|
||||
for _ in range(100):
|
||||
if self._pd_control_enabled or self._accurate_motor_model_enabled:
|
||||
@@ -224,8 +219,7 @@ class MinitaurBulletEnv(gym.Env):
|
||||
for i, action_component in enumerate(action):
|
||||
if not (-self._action_bound - ACTION_EPS <= action_component <=
|
||||
self._action_bound + ACTION_EPS):
|
||||
raise ValueError(
|
||||
"{}th action {} out of bounds.".format(i, action_component))
|
||||
raise ValueError("{}th action {} out of bounds.".format(i, action_component))
|
||||
action = self.minitaur.ConvertFromLegModel(action)
|
||||
return action
|
||||
|
||||
@@ -256,14 +250,15 @@ class MinitaurBulletEnv(gym.Env):
|
||||
base_pos = self.minitaur.GetBasePosition()
|
||||
camInfo = self._pybullet_client.getDebugVisualizerCamera()
|
||||
curTargetPos = camInfo[11]
|
||||
distance=camInfo[10]
|
||||
distance = camInfo[10]
|
||||
yaw = camInfo[8]
|
||||
pitch=camInfo[9]
|
||||
targetPos = [0.95*curTargetPos[0]+0.05*base_pos[0],0.95*curTargetPos[1]+0.05*base_pos[1],curTargetPos[2]]
|
||||
pitch = camInfo[9]
|
||||
targetPos = [
|
||||
0.95 * curTargetPos[0] + 0.05 * base_pos[0], 0.95 * curTargetPos[1] + 0.05 * base_pos[1],
|
||||
curTargetPos[2]
|
||||
]
|
||||
|
||||
|
||||
self._pybullet_client.resetDebugVisualizerCamera(
|
||||
distance, yaw, pitch, base_pos)
|
||||
self._pybullet_client.resetDebugVisualizerCamera(distance, yaw, pitch, base_pos)
|
||||
action = self._transform_action_to_motor_command(action)
|
||||
for _ in range(self._action_repeat):
|
||||
self.minitaur.ApplyAction(action)
|
||||
@@ -285,12 +280,17 @@ class MinitaurBulletEnv(gym.Env):
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._pybullet_client.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) /
|
||||
RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _,
|
||||
_) = self._pybullet_client.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
@@ -301,9 +301,8 @@ class MinitaurBulletEnv(gym.Env):
|
||||
Returns:
|
||||
A numpy array of motor angles.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:
|
||||
MOTOR_ANGLE_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
return np.array(self._observation[MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_motor_velocities(self):
|
||||
"""Get the minitaur's motor velocities.
|
||||
@@ -312,8 +311,8 @@ class MinitaurBulletEnv(gym.Env):
|
||||
A numpy array of motor velocities.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:
|
||||
MOTOR_VELOCITY_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
self._observation[MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_motor_torques(self):
|
||||
"""Get the minitaur's motor torques.
|
||||
@@ -322,8 +321,8 @@ class MinitaurBulletEnv(gym.Env):
|
||||
A numpy array of motor torques.
|
||||
"""
|
||||
return np.array(
|
||||
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:
|
||||
MOTOR_TORQUE_OBSERVATION_INDEX + NUM_MOTORS])
|
||||
self._observation[MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
|
||||
NUM_MOTORS])
|
||||
|
||||
def get_minitaur_base_orientation(self):
|
||||
"""Get the minitaur's base orientation, represented by a quaternion.
|
||||
@@ -347,8 +346,7 @@ class MinitaurBulletEnv(gym.Env):
|
||||
rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
|
||||
local_up = rot_mat[6:]
|
||||
pos = self.minitaur.GetBasePosition()
|
||||
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or
|
||||
pos[2] < 0.13)
|
||||
return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.85 or pos[2] < 0.13)
|
||||
|
||||
def _termination(self):
|
||||
position = self.minitaur.GetBasePosition()
|
||||
@@ -364,12 +362,9 @@ class MinitaurBulletEnv(gym.Env):
|
||||
energy_reward = np.abs(
|
||||
np.dot(self.minitaur.GetMotorTorques(),
|
||||
self.minitaur.GetMotorVelocities())) * self._time_step
|
||||
reward = (
|
||||
self._distance_weight * forward_reward -
|
||||
self._energy_weight * energy_reward + self._drift_weight * drift_reward
|
||||
+ self._shake_weight * shake_reward)
|
||||
self._objectives.append(
|
||||
[forward_reward, energy_reward, drift_reward, shake_reward])
|
||||
reward = (self._distance_weight * forward_reward - self._energy_weight * energy_reward +
|
||||
self._drift_weight * drift_reward + self._shake_weight * shake_reward)
|
||||
self._objectives.append([forward_reward, energy_reward, drift_reward, shake_reward])
|
||||
return reward
|
||||
|
||||
def get_objectives(self):
|
||||
@@ -383,9 +378,9 @@ class MinitaurBulletEnv(gym.Env):
|
||||
self._get_observation()
|
||||
observation = np.array(self._observation)
|
||||
if self._observation_noise_stdev > 0:
|
||||
observation += (np.random.normal(
|
||||
scale=self._observation_noise_stdev, size=observation.shape) *
|
||||
self.minitaur.GetObservationUpperBound())
|
||||
observation += (
|
||||
np.random.normal(scale=self._observation_noise_stdev, size=observation.shape) *
|
||||
self.minitaur.GetObservationUpperBound())
|
||||
return observation
|
||||
|
||||
if parse_version(gym.__version__) < parse_version('0.9.6'):
|
||||
|
||||
@@ -7,8 +7,7 @@ MOTOR_VOLTAGE = 16.0
|
||||
MOTOR_RESISTANCE = 0.186
|
||||
MOTOR_TORQUE_CONSTANT = 0.0954
|
||||
MOTOR_VISCOUS_DAMPING = 0
|
||||
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING
|
||||
+ MOTOR_TORQUE_CONSTANT)
|
||||
MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING + MOTOR_TORQUE_CONSTANT)
|
||||
|
||||
|
||||
class MotorModel(object):
|
||||
@@ -24,10 +23,7 @@ class MotorModel(object):
|
||||
pd gains, viscous friction, back-EMF voltage and current-torque profile.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
torque_control_enabled=False,
|
||||
kp=1.2,
|
||||
kd=0):
|
||||
def __init__(self, torque_control_enabled=False, kp=1.2, kd=0):
|
||||
self._torque_control_enabled = torque_control_enabled
|
||||
self._kp = kp
|
||||
self._kd = kd
|
||||
@@ -50,8 +46,7 @@ class MotorModel(object):
|
||||
def get_viscous_dampling(self):
|
||||
return self._viscous_damping
|
||||
|
||||
def convert_to_torque(self, motor_commands, current_motor_angle,
|
||||
current_motor_velocity):
|
||||
def convert_to_torque(self, motor_commands, current_motor_angle, current_motor_velocity):
|
||||
"""Convert the commands (position control or torque control) to torque.
|
||||
|
||||
Args:
|
||||
@@ -66,8 +61,8 @@ class MotorModel(object):
|
||||
if self._torque_control_enabled:
|
||||
pwm = motor_commands
|
||||
else:
|
||||
pwm = (-self._kp * (current_motor_angle - motor_commands)
|
||||
- self._kd * current_motor_velocity)
|
||||
pwm = (-self._kp * (current_motor_angle - motor_commands) -
|
||||
self._kd * current_motor_velocity)
|
||||
pwm = np.clip(pwm, -1.0, 1.0)
|
||||
return self._convert_to_torque_from_pwm(pwm, current_motor_velocity)
|
||||
|
||||
@@ -81,21 +76,19 @@ class MotorModel(object):
|
||||
actual_torque: The torque that needs to be applied to the motor.
|
||||
observed_torque: The torque observed by the sensor.
|
||||
"""
|
||||
observed_torque = np.clip(
|
||||
self._torque_constant * (pwm * self._voltage / self._resistance),
|
||||
-OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
|
||||
observed_torque = np.clip(self._torque_constant * (pwm * self._voltage / self._resistance),
|
||||
-OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT)
|
||||
|
||||
# Net voltage is clipped at 50V by diodes on the motor controller.
|
||||
voltage_net = np.clip(pwm * self._voltage -
|
||||
(self._torque_constant + self._viscous_damping)
|
||||
* current_motor_velocity,
|
||||
-VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
|
||||
voltage_net = np.clip(
|
||||
pwm * self._voltage -
|
||||
(self._torque_constant + self._viscous_damping) * current_motor_velocity,
|
||||
-VOLTAGE_CLIPPING, VOLTAGE_CLIPPING)
|
||||
current = voltage_net / self._resistance
|
||||
current_sign = np.sign(current)
|
||||
current_magnitude = np.absolute(current)
|
||||
|
||||
# Saturate torque based on empirical current relation.
|
||||
actual_torque = np.interp(current_magnitude, self._current_table,
|
||||
self._torque_table)
|
||||
actual_torque = np.interp(current_magnitude, self._current_table, self._torque_table)
|
||||
actual_torque = np.multiply(current_sign, actual_torque)
|
||||
return actual_torque, observed_torque
|
||||
|
||||
@@ -4,80 +4,150 @@ import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Racecar:
|
||||
|
||||
def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
|
||||
self.urdfRootPath = urdfRootPath
|
||||
self.timeStep = timeStep
|
||||
self._p = bullet_client
|
||||
self.reset()
|
||||
def __init__(self, bullet_client, urdfRootPath='', timeStep=0.01):
|
||||
self.urdfRootPath = urdfRootPath
|
||||
self.timeStep = timeStep
|
||||
self._p = bullet_client
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
car = self._p.loadURDF(os.path.join(self.urdfRootPath,"racecar/racecar_differential.urdf"), [0,0,.2],useFixedBase=False)
|
||||
self.racecarUniqueId = car
|
||||
#for i in range (self._p.getNumJoints(car)):
|
||||
# print (self._p.getJointInfo(car,i))
|
||||
for wheel in range(self._p.getNumJoints(car)):
|
||||
self._p.setJointMotorControl2(car,wheel,self._p.VELOCITY_CONTROL,targetVelocity=0,force=0)
|
||||
self._p.getJointInfo(car,wheel)
|
||||
def reset(self):
|
||||
car = self._p.loadURDF(os.path.join(self.urdfRootPath, "racecar/racecar_differential.urdf"),
|
||||
[0, 0, .2],
|
||||
useFixedBase=False)
|
||||
self.racecarUniqueId = car
|
||||
#for i in range (self._p.getNumJoints(car)):
|
||||
# print (self._p.getJointInfo(car,i))
|
||||
for wheel in range(self._p.getNumJoints(car)):
|
||||
self._p.setJointMotorControl2(car,
|
||||
wheel,
|
||||
self._p.VELOCITY_CONTROL,
|
||||
targetVelocity=0,
|
||||
force=0)
|
||||
self._p.getJointInfo(car, wheel)
|
||||
|
||||
#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
|
||||
c = self._p.createConstraint(car,9,car,11,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
|
||||
#self._p.setJointMotorControl2(car,10,self._p.VELOCITY_CONTROL,targetVelocity=1,force=10)
|
||||
c = self._p.createConstraint(car,
|
||||
9,
|
||||
car,
|
||||
11,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,10,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
10,
|
||||
car,
|
||||
13,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,9,car,13,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
9,
|
||||
car,
|
||||
13,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,16,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=1, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
16,
|
||||
car,
|
||||
18,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,16,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
16,
|
||||
car,
|
||||
19,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,17,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
17,
|
||||
car,
|
||||
19,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, maxForce=10000)
|
||||
|
||||
c = self._p.createConstraint(car,1,car,18,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15, maxForce=10000)
|
||||
c = self._p.createConstraint(car,3,car,19,jointType=self._p.JOINT_GEAR,jointAxis =[0,1,0],parentFramePosition=[0,0,0],childFramePosition=[0,0,0])
|
||||
self._p.changeConstraint(c,gearRatio=-1, gearAuxLink = 15,maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
1,
|
||||
car,
|
||||
18,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)
|
||||
c = self._p.createConstraint(car,
|
||||
3,
|
||||
car,
|
||||
19,
|
||||
jointType=self._p.JOINT_GEAR,
|
||||
jointAxis=[0, 1, 0],
|
||||
parentFramePosition=[0, 0, 0],
|
||||
childFramePosition=[0, 0, 0])
|
||||
self._p.changeConstraint(c, gearRatio=-1, gearAuxLink=15, maxForce=10000)
|
||||
|
||||
self.steeringLinks = [0,2]
|
||||
self.maxForce = 20
|
||||
self.nMotors = 2
|
||||
self.motorizedwheels=[8,15]
|
||||
self.speedMultiplier = 20.
|
||||
self.steeringMultiplier = 0.5
|
||||
self.steeringLinks = [0, 2]
|
||||
self.maxForce = 20
|
||||
self.nMotors = 2
|
||||
self.motorizedwheels = [8, 15]
|
||||
self.speedMultiplier = 20.
|
||||
self.steeringMultiplier = 0.5
|
||||
|
||||
def getActionDimension(self):
|
||||
return self.nMotors
|
||||
def getActionDimension(self):
|
||||
return self.nMotors
|
||||
|
||||
def getObservationDimension(self):
|
||||
return len(self.getObservation())
|
||||
def getObservationDimension(self):
|
||||
return len(self.getObservation())
|
||||
|
||||
def getObservation(self):
|
||||
observation = []
|
||||
pos,orn=self._p.getBasePositionAndOrientation(self.racecarUniqueId)
|
||||
def getObservation(self):
|
||||
observation = []
|
||||
pos, orn = self._p.getBasePositionAndOrientation(self.racecarUniqueId)
|
||||
|
||||
observation.extend(list(pos))
|
||||
observation.extend(list(orn))
|
||||
observation.extend(list(pos))
|
||||
observation.extend(list(orn))
|
||||
|
||||
return observation
|
||||
return observation
|
||||
|
||||
def applyAction(self, motorCommands):
|
||||
targetVelocity=motorCommands[0]*self.speedMultiplier
|
||||
#print("targetVelocity")
|
||||
#print(targetVelocity)
|
||||
steeringAngle = motorCommands[1]*self.steeringMultiplier
|
||||
#print("steeringAngle")
|
||||
#print(steeringAngle)
|
||||
#print("maxForce")
|
||||
#print(self.maxForce)
|
||||
def applyAction(self, motorCommands):
|
||||
targetVelocity = motorCommands[0] * self.speedMultiplier
|
||||
#print("targetVelocity")
|
||||
#print(targetVelocity)
|
||||
steeringAngle = motorCommands[1] * self.steeringMultiplier
|
||||
#print("steeringAngle")
|
||||
#print(steeringAngle)
|
||||
#print("maxForce")
|
||||
#print(self.maxForce)
|
||||
|
||||
for motor in self.motorizedwheels:
|
||||
self._p.setJointMotorControl2(self.racecarUniqueId,motor,self._p.VELOCITY_CONTROL,targetVelocity=targetVelocity,force=self.maxForce)
|
||||
for steer in self.steeringLinks:
|
||||
self._p.setJointMotorControl2(self.racecarUniqueId,steer,self._p.POSITION_CONTROL,targetPosition=steeringAngle)
|
||||
for motor in self.motorizedwheels:
|
||||
self._p.setJointMotorControl2(self.racecarUniqueId,
|
||||
motor,
|
||||
self._p.VELOCITY_CONTROL,
|
||||
targetVelocity=targetVelocity,
|
||||
force=self.maxForce)
|
||||
for steer in self.steeringLinks:
|
||||
self._p.setJointMotorControl2(self.racecarUniqueId,
|
||||
steer,
|
||||
self._p.POSITION_CONTROL,
|
||||
targetPosition=steeringAngle)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import math
|
||||
import gym
|
||||
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
|
||||
class RacecarGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot=pybullet_data.getDataPath(),
|
||||
@@ -42,25 +40,24 @@ class RacecarGymEnv(gym.Env):
|
||||
self._renders = renders
|
||||
self._isDiscrete = isDiscrete
|
||||
if self._renders:
|
||||
self._p = bullet_client.BulletClient(
|
||||
connection_mode=pybullet.GUI)
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
|
||||
self.seed()
|
||||
#self.reset()
|
||||
observationDim = 2 #len(self.getExtendedObservation())
|
||||
observationDim = 2 #len(self.getExtendedObservation())
|
||||
#print("observationDim")
|
||||
#print(observationDim)
|
||||
# observation_high = np.array([np.finfo(np.float32).max] * observationDim)
|
||||
observation_high = np.ones(observationDim) * 1000 #np.inf
|
||||
observation_high = np.ones(observationDim) * 1000 #np.inf
|
||||
if (isDiscrete):
|
||||
self.action_space = spaces.Discrete(9)
|
||||
else:
|
||||
action_dim = 2
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
action_dim = 2
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
self.observation_space = spaces.Box(-observation_high, observation_high, dtype=np.float32)
|
||||
self.viewer = None
|
||||
|
||||
@@ -69,23 +66,24 @@ class RacecarGymEnv(gym.Env):
|
||||
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
||||
self._p.setTimeStep(self._timeStep)
|
||||
#self._p.loadURDF(os.path.join(self._urdfRoot,"plane.urdf"))
|
||||
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
|
||||
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
|
||||
#move the stadium objects slightly above 0
|
||||
#for i in stadiumobjects:
|
||||
# pos,orn = self._p.getBasePositionAndOrientation(i)
|
||||
# newpos = [pos[0],pos[1],pos[2]-0.1]
|
||||
# self._p.resetBasePositionAndOrientation(i,newpos,orn)
|
||||
|
||||
dist = 5 +2.*random.random()
|
||||
ang = 2.*3.1415925438*random.random()
|
||||
dist = 5 + 2. * random.random()
|
||||
ang = 2. * 3.1415925438 * random.random()
|
||||
|
||||
ballx = dist * math.sin(ang)
|
||||
bally = dist * math.cos(ang)
|
||||
ballz = 1
|
||||
|
||||
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2.urdf"),[ballx,bally,ballz])
|
||||
self._p.setGravity(0,0,-10)
|
||||
self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2.urdf"),
|
||||
[ballx, bally, ballz])
|
||||
self._p.setGravity(0, 0, -10)
|
||||
self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
for i in range(100):
|
||||
self._p.stepSimulation()
|
||||
@@ -100,26 +98,26 @@ class RacecarGymEnv(gym.Env):
|
||||
return [seed]
|
||||
|
||||
def getExtendedObservation(self):
|
||||
self._observation = [] #self._racecar.getObservation()
|
||||
carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
|
||||
invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
|
||||
ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
|
||||
self._observation = [] #self._racecar.getObservation()
|
||||
carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
|
||||
invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
|
||||
ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)
|
||||
|
||||
self._observation.extend([ballPosInCar[0],ballPosInCar[1]])
|
||||
return self._observation
|
||||
self._observation.extend([ballPosInCar[0], ballPosInCar[1]])
|
||||
return self._observation
|
||||
|
||||
def step(self, action):
|
||||
if (self._renders):
|
||||
basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
#self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
|
||||
|
||||
if (self._isDiscrete):
|
||||
fwd = [-1,-1,-1,0,0,0,1,1,1]
|
||||
steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
|
||||
forward = fwd[action]
|
||||
steer = steerings[action]
|
||||
realaction = [forward,steer]
|
||||
fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
|
||||
steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
|
||||
forward = fwd[action]
|
||||
steer = steerings[action]
|
||||
realaction = [forward, steer]
|
||||
else:
|
||||
realaction = action
|
||||
|
||||
@@ -142,35 +140,37 @@ class RacecarGymEnv(gym.Env):
|
||||
def render(self, mode='human', close=False):
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
|
||||
def _termination(self):
|
||||
return self._envStepCounter>1000
|
||||
return self._envStepCounter > 1000
|
||||
|
||||
def _reward(self):
|
||||
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
|
||||
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
|
||||
10000)
|
||||
|
||||
numPt = len(closestPoints)
|
||||
reward=-1000
|
||||
reward = -1000
|
||||
#print(numPt)
|
||||
if (numPt>0):
|
||||
if (numPt > 0):
|
||||
#print("reward:")
|
||||
reward = -closestPoints[0][8]
|
||||
#print(reward)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import math
|
||||
import gym
|
||||
@@ -19,11 +19,9 @@ from pkg_resources import parse_version
|
||||
RENDER_HEIGHT = 720
|
||||
RENDER_WIDTH = 960
|
||||
|
||||
|
||||
class RacecarZEDGymEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self,
|
||||
urdfRoot=pybullet_data.getDataPath(),
|
||||
@@ -44,8 +42,7 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
|
||||
self._isDiscrete = isDiscrete
|
||||
if self._renders:
|
||||
self._p = bullet_client.BulletClient(
|
||||
connection_mode=pybullet.GUI)
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
|
||||
@@ -59,11 +56,14 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
if (isDiscrete):
|
||||
self.action_space = spaces.Discrete(9)
|
||||
else:
|
||||
action_dim = 2
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4), dtype=np.uint8)
|
||||
action_dim = 2
|
||||
self._action_bound = 1
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32)
|
||||
self.observation_space = spaces.Box(low=0,
|
||||
high=255,
|
||||
shape=(self._height, self._width, 4),
|
||||
dtype=np.uint8)
|
||||
|
||||
self.viewer = None
|
||||
|
||||
@@ -72,23 +72,24 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
#p.setPhysicsEngineParameter(numSolverIterations=300)
|
||||
self._p.setTimeStep(self._timeStep)
|
||||
#self._p.loadURDF(os.path.join(os.path.dirname(__file__),"../data","plane.urdf"))
|
||||
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot,"stadium.sdf"))
|
||||
stadiumobjects = self._p.loadSDF(os.path.join(self._urdfRoot, "stadium.sdf"))
|
||||
#move the stadium objects slightly above 0
|
||||
for i in stadiumobjects:
|
||||
pos,orn = self._p.getBasePositionAndOrientation(i)
|
||||
newpos = [pos[0],pos[1],pos[2]+0.1]
|
||||
self._p.resetBasePositionAndOrientation(i,newpos,orn)
|
||||
pos, orn = self._p.getBasePositionAndOrientation(i)
|
||||
newpos = [pos[0], pos[1], pos[2] + 0.1]
|
||||
self._p.resetBasePositionAndOrientation(i, newpos, orn)
|
||||
|
||||
dist = 5 +2.*random.random()
|
||||
ang = 2.*3.1415925438*random.random()
|
||||
dist = 5 + 2. * random.random()
|
||||
ang = 2. * 3.1415925438 * random.random()
|
||||
|
||||
ballx = dist * math.sin(ang)
|
||||
bally = dist * math.cos(ang)
|
||||
ballz = 1
|
||||
|
||||
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot,"sphere2red.urdf"),[ballx,bally,ballz])
|
||||
self._p.setGravity(0,0,-10)
|
||||
self._racecar = racecar.Racecar(self._p,urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._ballUniqueId = self._p.loadURDF(os.path.join(self._urdfRoot, "sphere2red.urdf"),
|
||||
[ballx, bally, ballz])
|
||||
self._p.setGravity(0, 0, -10)
|
||||
self._racecar = racecar.Racecar(self._p, urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
|
||||
self._envStepCounter = 0
|
||||
for i in range(100):
|
||||
self._p.stepSimulation()
|
||||
@@ -103,38 +104,50 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
return [seed]
|
||||
|
||||
def getExtendedObservation(self):
|
||||
carpos,carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
carmat = self._p.getMatrixFromQuaternion(carorn)
|
||||
ballpos,ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
|
||||
invCarPos,invCarOrn = self._p.invertTransform(carpos,carorn)
|
||||
ballPosInCar,ballOrnInCar = self._p.multiplyTransforms(invCarPos,invCarOrn,ballpos,ballorn)
|
||||
dist0 = 0.3
|
||||
dist1 = 1.
|
||||
eyePos = [carpos[0]+dist0*carmat[0],carpos[1]+dist0*carmat[3],carpos[2]+dist0*carmat[6]+0.3]
|
||||
targetPos = [carpos[0]+dist1*carmat[0],carpos[1]+dist1*carmat[3],carpos[2]+dist1*carmat[6]+0.3]
|
||||
up = [carmat[2],carmat[5],carmat[8]]
|
||||
viewMat = self._p.computeViewMatrix(eyePos,targetPos,up)
|
||||
#viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
|
||||
#print("projectionMatrix:")
|
||||
#print(self._p.getDebugVisualizerCamera()[3])
|
||||
projMatrix = [0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
|
||||
img_arr = self._p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
|
||||
rgb=img_arr[2]
|
||||
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||
self._observation = np_img_arr
|
||||
return self._observation
|
||||
carpos, carorn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
carmat = self._p.getMatrixFromQuaternion(carorn)
|
||||
ballpos, ballorn = self._p.getBasePositionAndOrientation(self._ballUniqueId)
|
||||
invCarPos, invCarOrn = self._p.invertTransform(carpos, carorn)
|
||||
ballPosInCar, ballOrnInCar = self._p.multiplyTransforms(invCarPos, invCarOrn, ballpos, ballorn)
|
||||
dist0 = 0.3
|
||||
dist1 = 1.
|
||||
eyePos = [
|
||||
carpos[0] + dist0 * carmat[0], carpos[1] + dist0 * carmat[3],
|
||||
carpos[2] + dist0 * carmat[6] + 0.3
|
||||
]
|
||||
targetPos = [
|
||||
carpos[0] + dist1 * carmat[0], carpos[1] + dist1 * carmat[3],
|
||||
carpos[2] + dist1 * carmat[6] + 0.3
|
||||
]
|
||||
up = [carmat[2], carmat[5], carmat[8]]
|
||||
viewMat = self._p.computeViewMatrix(eyePos, targetPos, up)
|
||||
#viewMat = self._p.computeViewMatrixFromYawPitchRoll(carpos,1,0,0,0,2)
|
||||
#print("projectionMatrix:")
|
||||
#print(self._p.getDebugVisualizerCamera()[3])
|
||||
projMatrix = [
|
||||
0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0,
|
||||
0.0, 0.0, -0.02000020071864128, 0.0
|
||||
]
|
||||
img_arr = self._p.getCameraImage(width=self._width,
|
||||
height=self._height,
|
||||
viewMatrix=viewMat,
|
||||
projectionMatrix=projMatrix)
|
||||
rgb = img_arr[2]
|
||||
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
|
||||
self._observation = np_img_arr
|
||||
return self._observation
|
||||
|
||||
def step(self, action):
|
||||
if (self._renders):
|
||||
basePos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
basePos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
#self._p.resetDebugVisualizerCamera(1, 30, -40, basePos)
|
||||
|
||||
if (self._isDiscrete):
|
||||
fwd = [-1,-1,-1,0,0,0,1,1,1]
|
||||
steerings = [-0.6,0,0.6,-0.6,0,0.6,-0.6,0,0.6]
|
||||
forward = fwd[action]
|
||||
steer = steerings[action]
|
||||
realaction = [forward,steer]
|
||||
fwd = [-1, -1, -1, 0, 0, 0, 1, 1, 1]
|
||||
steerings = [-0.6, 0, 0.6, -0.6, 0, 0.6, -0.6, 0, 0.6]
|
||||
forward = fwd[action]
|
||||
steer = steerings[action]
|
||||
realaction = [forward, steer]
|
||||
else:
|
||||
realaction = action
|
||||
|
||||
@@ -157,35 +170,37 @@ class RacecarZEDGymEnv(gym.Env):
|
||||
def render(self, mode='human', close=False):
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
base_pos,orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(RENDER_WIDTH)/RENDER_HEIGHT,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=RENDER_WIDTH, height=RENDER_HEIGHT, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
base_pos, orn = self._p.getBasePositionAndOrientation(self._racecar.racecarUniqueId)
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=RENDER_WIDTH,
|
||||
height=RENDER_HEIGHT,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
|
||||
def _termination(self):
|
||||
return self._envStepCounter>1000
|
||||
return self._envStepCounter > 1000
|
||||
|
||||
def _reward(self):
|
||||
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId,self._ballUniqueId,10000)
|
||||
closestPoints = self._p.getClosestPoints(self._racecar.racecarUniqueId, self._ballUniqueId,
|
||||
10000)
|
||||
|
||||
numPt = len(closestPoints)
|
||||
reward=-1000
|
||||
reward = -1000
|
||||
#print(numPt)
|
||||
if (numPt>0):
|
||||
if (numPt > 0):
|
||||
#print("reward:")
|
||||
reward = -closestPoints[0][8]
|
||||
#print(reward)
|
||||
|
||||
@@ -4,11 +4,10 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
print("parentdir=",parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
print("parentdir=", parentdir)
|
||||
|
||||
|
||||
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||
from pybullet_utils.logger import Logger
|
||||
from pybullet_envs.deep_mimic.testrl import update_world, update_timestep, build_world
|
||||
@@ -17,36 +16,40 @@ import pybullet_utils.mpi_util as MPIUtil
|
||||
args = []
|
||||
world = None
|
||||
|
||||
|
||||
def run():
|
||||
global update_timestep
|
||||
global world
|
||||
global update_timestep
|
||||
global world
|
||||
|
||||
done = False
|
||||
while not (done):
|
||||
update_world(world, update_timestep)
|
||||
done = False
|
||||
while not (done):
|
||||
update_world(world, update_timestep)
|
||||
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
def shutdown():
|
||||
global world
|
||||
global world
|
||||
|
||||
Logger.print2('Shutting down...')
|
||||
world.shutdown()
|
||||
return
|
||||
|
||||
Logger.print2('Shutting down...')
|
||||
world.shutdown()
|
||||
return
|
||||
|
||||
def main():
|
||||
global args
|
||||
global world
|
||||
global args
|
||||
global world
|
||||
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
enable_draw = False
|
||||
world = build_world(args, enable_draw)
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
enable_draw = False
|
||||
world = build_world(args, enable_draw)
|
||||
|
||||
run()
|
||||
shutdown()
|
||||
run()
|
||||
shutdown()
|
||||
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ActionSpace(Enum):
|
||||
Null = 0
|
||||
Continuous = 1
|
||||
Discrete = 2
|
||||
Null = 0
|
||||
Continuous = 1
|
||||
Discrete = 2
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
from abc import abstractmethod
|
||||
from abc import abstractmethod
|
||||
import sys, abc
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC
|
||||
ABC = abc.ABC
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
import numpy as np
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Env(ABC):
|
||||
class Terminate(Enum):
|
||||
Null = 0
|
||||
Fail = 1
|
||||
Succ = 2
|
||||
|
||||
def __init__(self, args, enable_draw):
|
||||
self.enable_draw = enable_draw
|
||||
return
|
||||
|
||||
class Terminate(Enum):
|
||||
Null = 0
|
||||
Fail = 1
|
||||
Succ = 2
|
||||
|
||||
def __init__(self, args, enable_draw):
|
||||
self.enable_draw = enable_draw
|
||||
return
|
||||
|
||||
@@ -1,36 +1,61 @@
|
||||
from pybullet_utils import bullet_client
|
||||
import math
|
||||
|
||||
|
||||
|
||||
class HumanoidPoseInterpolator(object):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def Reset(self,basePos=[0,0,0], baseOrn=[0,0,0,1],chestRot=[0,0,0,1], neckRot=[0,0,0,1],rightHipRot= [0,0,0,1], rightKneeRot=[0],rightAnkleRot = [0,0,0,1],
|
||||
rightShoulderRot = [0,0,0,1],rightElbowRot = [0], leftHipRot = [0,0,0,1], leftKneeRot = [0],leftAnkleRot = [0,0,0,1],
|
||||
leftShoulderRot = [0,0,0,1] ,leftElbowRot = [0],
|
||||
baseLinVel = [0,0,0],baseAngVel = [0,0,0], chestVel = [0,0,0],neckVel = [0,0,0],rightHipVel = [0,0,0],rightKneeVel = [0],
|
||||
rightAnkleVel = [0,0,0],rightShoulderVel = [0,0,0],rightElbowVel = [0],leftHipVel = [0,0,0],leftKneeVel = [0],leftAnkleVel = [0,0,0],leftShoulderVel = [0,0,0],leftElbowVel = [0]
|
||||
):
|
||||
|
||||
|
||||
def Reset(self,
|
||||
basePos=[0, 0, 0],
|
||||
baseOrn=[0, 0, 0, 1],
|
||||
chestRot=[0, 0, 0, 1],
|
||||
neckRot=[0, 0, 0, 1],
|
||||
rightHipRot=[0, 0, 0, 1],
|
||||
rightKneeRot=[0],
|
||||
rightAnkleRot=[0, 0, 0, 1],
|
||||
rightShoulderRot=[0, 0, 0, 1],
|
||||
rightElbowRot=[0],
|
||||
leftHipRot=[0, 0, 0, 1],
|
||||
leftKneeRot=[0],
|
||||
leftAnkleRot=[0, 0, 0, 1],
|
||||
leftShoulderRot=[0, 0, 0, 1],
|
||||
leftElbowRot=[0],
|
||||
baseLinVel=[0, 0, 0],
|
||||
baseAngVel=[0, 0, 0],
|
||||
chestVel=[0, 0, 0],
|
||||
neckVel=[0, 0, 0],
|
||||
rightHipVel=[0, 0, 0],
|
||||
rightKneeVel=[0],
|
||||
rightAnkleVel=[0, 0, 0],
|
||||
rightShoulderVel=[0, 0, 0],
|
||||
rightElbowVel=[0],
|
||||
leftHipVel=[0, 0, 0],
|
||||
leftKneeVel=[0],
|
||||
leftAnkleVel=[0, 0, 0],
|
||||
leftShoulderVel=[0, 0, 0],
|
||||
leftElbowVel=[0]):
|
||||
|
||||
self._basePos = basePos
|
||||
self._baseLinVel = baseLinVel
|
||||
#print("HumanoidPoseInterpolator.Reset: baseLinVel = ", baseLinVel)
|
||||
self._baseOrn =baseOrn
|
||||
self._baseOrn = baseOrn
|
||||
self._baseAngVel = baseAngVel
|
||||
|
||||
|
||||
self._chestRot = chestRot
|
||||
self._chestVel =chestVel
|
||||
self._chestVel = chestVel
|
||||
self._neckRot = neckRot
|
||||
self._neckVel = neckVel
|
||||
|
||||
|
||||
self._rightHipRot = rightHipRot
|
||||
self._rightHipVel = rightHipVel
|
||||
self._rightKneeRot =rightKneeRot
|
||||
self._rightKneeRot = rightKneeRot
|
||||
self._rightKneeVel = rightKneeVel
|
||||
self._rightAnkleRot = rightAnkleRot
|
||||
self._rightAnkleVel = rightAnkleVel
|
||||
|
||||
self._rightShoulderRot =rightShoulderRot
|
||||
|
||||
self._rightShoulderRot = rightShoulderRot
|
||||
self._rightShoulderVel = rightShoulderVel
|
||||
self._rightElbowRot = rightElbowRot
|
||||
self._rightElbowVel = rightElbowVel
|
||||
@@ -39,225 +64,253 @@ class HumanoidPoseInterpolator(object):
|
||||
self._leftHipVel = leftHipVel
|
||||
self._leftKneeRot = leftKneeRot
|
||||
self._leftKneeVel = leftKneeVel
|
||||
self._leftAnkleRot =leftAnkleRot
|
||||
self._leftAnkleRot = leftAnkleRot
|
||||
self._leftAnkleVel = leftAnkleVel
|
||||
|
||||
|
||||
self._leftShoulderRot = leftShoulderRot
|
||||
self._leftShoulderVel = leftShoulderVel
|
||||
self._leftElbowRot =leftElbowRot
|
||||
self._leftElbowRot = leftElbowRot
|
||||
self._leftElbowVel = leftElbowVel
|
||||
|
||||
def ComputeLinVel(self,posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
|
||||
def ComputeLinVel(self, posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
|
||||
(posEnd[2] - posStart[2]) / deltaTime]
|
||||
return vel
|
||||
|
||||
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
|
||||
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
|
||||
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
|
||||
ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
|
||||
|
||||
def NormalizeVector(self, vec):
|
||||
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
|
||||
if (length2>0):
|
||||
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2]
|
||||
if (length2 > 0):
|
||||
length = math.sqrt(length2)
|
||||
|
||||
|
||||
def NormalizeQuaternion(self, orn):
|
||||
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
|
||||
if (length2>0):
|
||||
length2 = orn[0] * orn[0] + orn[1] * orn[1] + orn[2] * orn[2] + orn[3] * orn[3]
|
||||
if (length2 > 0):
|
||||
length = math.sqrt(length2)
|
||||
orn[0]/=length
|
||||
orn[1]/=length
|
||||
orn[2]/=length
|
||||
orn[3]/=length
|
||||
orn[0] /= length
|
||||
orn[1] /= length
|
||||
orn[2] /= length
|
||||
orn[3] /= length
|
||||
return orn
|
||||
|
||||
|
||||
#print("Normalize? length=",length)
|
||||
|
||||
|
||||
def PostProcessMotionData(self, frameData):
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
|
||||
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||
|
||||
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||
|
||||
|
||||
def GetPose(self):
|
||||
pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
|
||||
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
|
||||
self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
|
||||
self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
|
||||
self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
|
||||
self._rightKneeRot[0],
|
||||
self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
|
||||
self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
|
||||
self._rightElbowRot[0],
|
||||
self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
|
||||
self._leftKneeRot[0],
|
||||
self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
|
||||
self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
|
||||
self._leftElbowRot[0] ]
|
||||
return pose
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
|
||||
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
|
||||
|
||||
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
|
||||
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
|
||||
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
|
||||
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
|
||||
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
|
||||
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
|
||||
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
|
||||
|
||||
def GetPose(self):
|
||||
pose = [
|
||||
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
|
||||
self._baseOrn[2], self._baseOrn[3], self._chestRot[0], self._chestRot[1],
|
||||
self._chestRot[2], self._chestRot[3], self._neckRot[0], self._neckRot[1], self._neckRot[2],
|
||||
self._neckRot[3], self._rightHipRot[0], self._rightHipRot[1], self._rightHipRot[2],
|
||||
self._rightHipRot[3], self._rightKneeRot[0], self._rightAnkleRot[0],
|
||||
self._rightAnkleRot[1], self._rightAnkleRot[2], self._rightAnkleRot[3],
|
||||
self._rightShoulderRot[0], self._rightShoulderRot[1], self._rightShoulderRot[2],
|
||||
self._rightShoulderRot[3], self._rightElbowRot[0], self._leftHipRot[0],
|
||||
self._leftHipRot[1], self._leftHipRot[2], self._leftHipRot[3], self._leftKneeRot[0],
|
||||
self._leftAnkleRot[0], self._leftAnkleRot[1], self._leftAnkleRot[2], self._leftAnkleRot[3],
|
||||
self._leftShoulderRot[0], self._leftShoulderRot[1], self._leftShoulderRot[2],
|
||||
self._leftShoulderRot[3], self._leftElbowRot[0]
|
||||
]
|
||||
return pose
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
|
||||
keyFrameDuration = frameData[0]
|
||||
basePos1Start = [frameData[1],frameData[2],frameData[3]]
|
||||
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
|
||||
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
|
||||
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
|
||||
|
||||
basePos1Start = [frameData[1], frameData[2], frameData[3]]
|
||||
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._basePos = [
|
||||
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
|
||||
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
|
||||
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
|
||||
]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
##pre-rotate to make z-up
|
||||
#y2zPos=[0,0,0.0]
|
||||
#y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
|
||||
#basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)
|
||||
|
||||
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||
chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
|
||||
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
|
||||
self._chestVel = self.ComputeAngVelRel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||
neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
|
||||
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
|
||||
self._neckVel = self.ComputeAngVelRel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||
rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
|
||||
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
|
||||
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
chestRotStart = [frameData[9], frameData[10], frameData[11], frameData[8]]
|
||||
chestRotEnd = [frameDataNext[9], frameDataNext[10], frameDataNext[11], frameDataNext[8]]
|
||||
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart, chestRotEnd, frameFraction)
|
||||
self._chestVel = self.ComputeAngVelRel(chestRotStart, chestRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
neckRotStart = [frameData[13], frameData[14], frameData[15], frameData[12]]
|
||||
neckRotEnd = [frameDataNext[13], frameDataNext[14], frameDataNext[15], frameDataNext[12]]
|
||||
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart, neckRotEnd, frameFraction)
|
||||
self._neckVel = self.ComputeAngVelRel(neckRotStart, neckRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
rightHipRotStart = [frameData[17], frameData[18], frameData[19], frameData[16]]
|
||||
rightHipRotEnd = [frameDataNext[17], frameDataNext[18], frameDataNext[19], frameDataNext[16]]
|
||||
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart, rightHipRotEnd,
|
||||
frameFraction)
|
||||
self._rightHipVel = self.ComputeAngVelRel(rightHipRotStart, rightHipRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
rightKneeRotStart = [frameData[20]]
|
||||
rightKneeRotEnd = [frameDataNext[20]]
|
||||
self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
|
||||
self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
|
||||
|
||||
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||
rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
|
||||
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
|
||||
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||
rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
|
||||
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
|
||||
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
|
||||
|
||||
self._rightKneeRot = [
|
||||
rightKneeRotStart[0] + frameFraction * (rightKneeRotEnd[0] - rightKneeRotStart[0])
|
||||
]
|
||||
self._rightKneeVel = [(rightKneeRotEnd[0] - rightKneeRotStart[0]) / keyFrameDuration]
|
||||
|
||||
rightAnkleRotStart = [frameData[22], frameData[23], frameData[24], frameData[21]]
|
||||
rightAnkleRotEnd = [frameDataNext[22], frameDataNext[23], frameDataNext[24], frameDataNext[21]]
|
||||
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart, rightAnkleRotEnd,
|
||||
frameFraction)
|
||||
self._rightAnkleVel = self.ComputeAngVelRel(rightAnkleRotStart, rightAnkleRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
rightShoulderRotStart = [frameData[26], frameData[27], frameData[28], frameData[25]]
|
||||
rightShoulderRotEnd = [
|
||||
frameDataNext[26], frameDataNext[27], frameDataNext[28], frameDataNext[25]
|
||||
]
|
||||
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,
|
||||
rightShoulderRotEnd, frameFraction)
|
||||
self._rightShoulderVel = self.ComputeAngVelRel(rightShoulderRotStart, rightShoulderRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
rightElbowRotStart = [frameData[29]]
|
||||
rightElbowRotEnd = [frameDataNext[29]]
|
||||
self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
|
||||
self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
|
||||
|
||||
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||
leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
|
||||
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
|
||||
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
|
||||
|
||||
self._rightElbowRot = [
|
||||
rightElbowRotStart[0] + frameFraction * (rightElbowRotEnd[0] - rightElbowRotStart[0])
|
||||
]
|
||||
self._rightElbowVel = [(rightElbowRotEnd[0] - rightElbowRotStart[0]) / keyFrameDuration]
|
||||
|
||||
leftHipRotStart = [frameData[31], frameData[32], frameData[33], frameData[30]]
|
||||
leftHipRotEnd = [frameDataNext[31], frameDataNext[32], frameDataNext[33], frameDataNext[30]]
|
||||
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart, leftHipRotEnd,
|
||||
frameFraction)
|
||||
self._leftHipVel = self.ComputeAngVelRel(leftHipRotStart, leftHipRotEnd, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
leftKneeRotStart = [frameData[34]]
|
||||
leftKneeRotEnd = [frameDataNext[34]]
|
||||
self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
|
||||
self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
|
||||
|
||||
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||
leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
|
||||
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
|
||||
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||
self._leftKneeRot = [
|
||||
leftKneeRotStart[0] + frameFraction * (leftKneeRotEnd[0] - leftKneeRotStart[0])
|
||||
]
|
||||
self._leftKneeVel = [(leftKneeRotEnd[0] - leftKneeRotStart[0]) / keyFrameDuration]
|
||||
|
||||
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||
leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
|
||||
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
|
||||
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
|
||||
leftAnkleRotStart = [frameData[36], frameData[37], frameData[38], frameData[35]]
|
||||
leftAnkleRotEnd = [frameDataNext[36], frameDataNext[37], frameDataNext[38], frameDataNext[35]]
|
||||
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart, leftAnkleRotEnd,
|
||||
frameFraction)
|
||||
self._leftAnkleVel = self.ComputeAngVelRel(leftAnkleRotStart, leftAnkleRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
leftShoulderRotStart = [frameData[40], frameData[41], frameData[42], frameData[39]]
|
||||
leftShoulderRotEnd = [
|
||||
frameDataNext[40], frameDataNext[41], frameDataNext[42], frameDataNext[39]
|
||||
]
|
||||
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,
|
||||
leftShoulderRotEnd, frameFraction)
|
||||
self._leftShoulderVel = self.ComputeAngVelRel(leftShoulderRotStart, leftShoulderRotEnd,
|
||||
keyFrameDuration, bullet_client)
|
||||
|
||||
leftElbowRotStart = [frameData[43]]
|
||||
leftElbowRotEnd = [frameDataNext[43]]
|
||||
self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
|
||||
self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
|
||||
self._leftElbowRot = [
|
||||
leftElbowRotStart[0] + frameFraction * (leftElbowRotEnd[0] - leftElbowRotStart[0])
|
||||
]
|
||||
self._leftElbowVel = [(leftElbowRotEnd[0] - leftElbowRotStart[0]) / keyFrameDuration]
|
||||
|
||||
pose = self.GetPose()
|
||||
return pose
|
||||
|
||||
|
||||
def ConvertFromAction(self, pybullet_client, action):
|
||||
#turn action into pose
|
||||
|
||||
self.Reset()#?? needed?
|
||||
index=0
|
||||
|
||||
self.Reset() #?? needed?
|
||||
index = 0
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
#print("pose._chestRot=",pose._chestRot)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._rightKneeRot = [angle]
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._rightElbowRot = [angle]
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._leftKneeRot = [angle]
|
||||
|
||||
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
axis = [action[index+1],action[index+2],action[index+3]]
|
||||
index+=4
|
||||
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||
|
||||
axis = [action[index + 1], action[index + 2], action[index + 3]]
|
||||
index += 4
|
||||
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis, angle)
|
||||
|
||||
angle = action[index]
|
||||
index+=1
|
||||
index += 1
|
||||
self._leftElbowRot = [angle]
|
||||
|
||||
|
||||
pose = self.GetPose()
|
||||
return pose
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
import math
|
||||
|
||||
|
||||
class MotionCaptureData(object):
|
||||
|
||||
def __init__(self):
|
||||
self.Reset()
|
||||
|
||||
@@ -13,30 +15,33 @@ class MotionCaptureData(object):
|
||||
self._motion_data = json.load(f)
|
||||
|
||||
def NumFrames(self):
|
||||
return len(self._motion_data['Frames'])
|
||||
return len(self._motion_data['Frames'])
|
||||
|
||||
def KeyFrameDuraction(self):
|
||||
return self._motion_data['Frames'][0][0]
|
||||
|
||||
return self._motion_data['Frames'][0][0]
|
||||
|
||||
def getCycleTime(self):
|
||||
keyFrameDuration = self.KeyFrameDuraction()
|
||||
cycleTime = keyFrameDuration*(self.NumFrames()-1)
|
||||
cycleTime = keyFrameDuration * (self.NumFrames() - 1)
|
||||
return cycleTime
|
||||
|
||||
|
||||
def calcCycleCount(self, simTime, cycleTime):
|
||||
phases = simTime / cycleTime;
|
||||
phases = simTime / cycleTime
|
||||
count = math.floor(phases)
|
||||
loop = True
|
||||
#count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
|
||||
return count
|
||||
|
||||
|
||||
def computeCycleOffset(self):
|
||||
firstFrame=0
|
||||
lastFrame = self.NumFrames()-1
|
||||
firstFrame = 0
|
||||
lastFrame = self.NumFrames() - 1
|
||||
frameData = self._motion_data['Frames'][0]
|
||||
frameDataNext = self._motion_data['Frames'][lastFrame]
|
||||
|
||||
basePosStart = [frameData[1],frameData[2],frameData[3]]
|
||||
basePosEnd = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._cycleOffset = [basePosEnd[0]-basePosStart[0],basePosEnd[1]-basePosStart[1],basePosEnd[2]-basePosStart[2]]
|
||||
return self._cycleOffset
|
||||
|
||||
basePosStart = [frameData[1], frameData[2], frameData[3]]
|
||||
basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._cycleOffset = [
|
||||
basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
|
||||
basePosEnd[2] - basePosStart[2]
|
||||
]
|
||||
return self._cycleOffset
|
||||
|
||||
@@ -9,301 +9,313 @@ from pybullet_envs.deep_mimic.env import humanoid_stable_pd
|
||||
import pybullet_data
|
||||
import pybullet as p1
|
||||
import random
|
||||
|
||||
|
||||
|
||||
|
||||
class PyBulletDeepMimicEnv(Env):
|
||||
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
|
||||
super().__init__(arg_parser, enable_draw)
|
||||
self._num_agents = 1
|
||||
self._pybullet_client = pybullet_client
|
||||
self._isInitialized = False
|
||||
self._useStablePD = True
|
||||
self._arg_parser = arg_parser
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
|
||||
|
||||
if not self._isInitialized:
|
||||
if self.enable_draw:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI,0)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
|
||||
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
|
||||
#print("planeId=",self._planeId)
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
|
||||
self._pybullet_client.setGravity(0,-9.8,0)
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
|
||||
self._mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motion_file = self._arg_parser.parse_strings('motion_file')
|
||||
print("motion_file=",motion_file[0])
|
||||
|
||||
motionPath = pybullet_data.getDataPath()+"/"+motion_file[0]
|
||||
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
|
||||
self._mocapData.Load(motionPath)
|
||||
timeStep = 1./600.
|
||||
useFixedBase=False
|
||||
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
|
||||
self._isInitialized = True
|
||||
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
|
||||
|
||||
|
||||
selfCheck = False
|
||||
if (selfCheck):
|
||||
curTime = 0
|
||||
while self._pybullet_client.isConnected():
|
||||
self._humanoid.setSimTime(curTime)
|
||||
state = self._humanoid.getState()
|
||||
#print("state=",state)
|
||||
pose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
for i in range (10):
|
||||
curTime+=timeStep
|
||||
#taus = self._humanoid.computePDForces(pose)
|
||||
#self._humanoid.applyPDForces(taus)
|
||||
#self._pybullet_client.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||
rnrange = 1000
|
||||
rn = random.randint(0,rnrange)
|
||||
startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
|
||||
self.t = startTime
|
||||
|
||||
self._humanoid.setSimTime(startTime)
|
||||
|
||||
self._humanoid.resetPose()
|
||||
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||
#self._pybullet_client.stepSimulation()
|
||||
self._humanoid.resetPose()
|
||||
self.needs_update_time = self.t-1#force update
|
||||
|
||||
def get_num_agents(self):
|
||||
return self._num_agents
|
||||
|
||||
def get_action_space(self, agent_id):
|
||||
return ActionSpace(ActionSpace.Continuous)
|
||||
|
||||
def get_reward_min(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_reward_max(self, agent_id):
|
||||
return 1
|
||||
|
||||
def get_reward_fail(self, agent_id):
|
||||
return self.get_reward_min(agent_id)
|
||||
|
||||
def get_reward_succ(self, agent_id):
|
||||
return self.get_reward_max(agent_id)
|
||||
|
||||
#scene_name == "imitate" -> cDrawSceneImitate
|
||||
def get_state_size(self, agent_id):
|
||||
#cCtController::GetStateSize()
|
||||
#int state_size = cDeepMimicCharController::GetStateSize();
|
||||
# state_size += GetStatePoseSize();#106
|
||||
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
|
||||
#state_size += GetStatePhaseSize();#1
|
||||
#197
|
||||
return 197
|
||||
|
||||
def build_state_norm_groups(self, agent_id):
|
||||
#if (mEnablePhaseInput)
|
||||
#{
|
||||
#int phase_group = gNormGroupNone;
|
||||
#int phase_offset = GetStatePhaseOffset();
|
||||
#int phase_size = GetStatePhaseSize();
|
||||
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
|
||||
groups = [0]*self.get_state_size(agent_id)
|
||||
groups[0] = -1
|
||||
return groups
|
||||
|
||||
def build_state_offset(self, agent_id):
|
||||
out_offset = [0]*self.get_state_size(agent_id)
|
||||
phase_offset = -0.5
|
||||
out_offset[0] = phase_offset
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_state_scale(self, agent_id):
|
||||
out_scale = [1]*self.get_state_size(agent_id)
|
||||
phase_scale = 2
|
||||
out_scale[0] = phase_scale
|
||||
return np.array(out_scale)
|
||||
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None):
|
||||
super().__init__(arg_parser, enable_draw)
|
||||
self._num_agents = 1
|
||||
self._pybullet_client = pybullet_client
|
||||
self._isInitialized = False
|
||||
self._useStablePD = True
|
||||
self._arg_parser = arg_parser
|
||||
self.reset()
|
||||
|
||||
def get_goal_size(self, agent_id):
|
||||
return 0
|
||||
def reset(self):
|
||||
|
||||
def get_action_size(self, agent_id):
|
||||
ctrl_size = 43 #numDof
|
||||
root_size = 7
|
||||
return ctrl_size - root_size
|
||||
|
||||
def build_goal_norm_groups(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_offset(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_scale(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_action_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_action_size(agent_id)
|
||||
out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
|
||||
-0.200000000,0.0000000000,0.0000000000, 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, -1.5700000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000,
|
||||
0.00000000, -0.2000000, -1.5700000]
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_action_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
|
||||
1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
|
||||
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
|
||||
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
|
||||
0.159235668789]
|
||||
return np.array(out_scale)
|
||||
|
||||
def build_action_bound_min(self, agent_id):
|
||||
#see cCtCtrlUtil::BuildBoundsPDSpherical
|
||||
out_scale = [-1] * self.get_action_size(agent_id)
|
||||
out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
|
||||
-1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000, -1.000000000,
|
||||
-1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, -1.000000000,
|
||||
-12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000,
|
||||
-6.280000000, -1.000000000, -1.000000000, -1.000000000, -8.460000000,
|
||||
-1.000000000, -1.000000000, -1.000000000, -4.710000000]
|
||||
if not self._isInitialized:
|
||||
if self.enable_draw:
|
||||
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
#disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
|
||||
else:
|
||||
self._pybullet_client = bullet_client.BulletClient()
|
||||
|
||||
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
|
||||
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
|
||||
z2y,
|
||||
useMaximalCoordinates=True)
|
||||
#print("planeId=",self._planeId)
|
||||
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
self._pybullet_client.setGravity(0, -9.8, 0)
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
|
||||
self._mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motion_file = self._arg_parser.parse_strings('motion_file')
|
||||
print("motion_file=", motion_file[0])
|
||||
|
||||
motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
|
||||
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
|
||||
self._mocapData.Load(motionPath)
|
||||
timeStep = 1. / 600.
|
||||
useFixedBase = False
|
||||
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData,
|
||||
timeStep, useFixedBase)
|
||||
self._isInitialized = True
|
||||
|
||||
return out_scale
|
||||
|
||||
def build_action_bound_max(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
out_scale=[
|
||||
4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
|
||||
1.000000000,1.000000000,8.779999999,1.000000000, 1.0000000, 1.0000000,
|
||||
4.7100000, 6.2800000, 1.0000000, 1.0000000, 1.0000000,
|
||||
12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000,
|
||||
6.2800000, 1.0000000, 1.0000000, 1.0000000, 10.100000,
|
||||
1.0000000, 1.0000000, 1.0000000, 7.8500000]
|
||||
return out_scale
|
||||
|
||||
def set_mode(self, mode):
|
||||
self._mode = mode
|
||||
|
||||
def need_new_action(self, agent_id):
|
||||
if self.t>=self.needs_update_time:
|
||||
self.needs_update_time = self.t + 1./30.
|
||||
return True
|
||||
return False
|
||||
|
||||
def record_state(self, agent_id):
|
||||
state = self._humanoid.getState()
|
||||
|
||||
return np.array(state)
|
||||
|
||||
|
||||
def record_goal(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def calc_reward(self, agent_id):
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
reward = self._humanoid.getReward(kinPose)
|
||||
return reward
|
||||
|
||||
def set_action(self, agent_id, action):
|
||||
#print("action=",)
|
||||
#for a in action:
|
||||
# print(a)
|
||||
np.savetxt("pb_action.csv", action, delimiter=",")
|
||||
self.desiredPose = self._humanoid.convertActionToPose(action)
|
||||
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
|
||||
self.desiredPose[0] = 0
|
||||
self.desiredPose[1] = 0
|
||||
self.desiredPose[2] = 0
|
||||
self.desiredPose[3] = 0
|
||||
self.desiredPose[4] = 0
|
||||
self.desiredPose[5] = 0
|
||||
self.desiredPose[6] = 0
|
||||
target_pose = np.array(self.desiredPose)
|
||||
|
||||
|
||||
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
|
||||
|
||||
#print("set_action: desiredPose=", self.desiredPose)
|
||||
|
||||
def log_val(self, agent_id, val):
|
||||
pass
|
||||
|
||||
|
||||
def update(self, timeStep):
|
||||
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._humanoid._timeStep = timeStep
|
||||
|
||||
for i in range(1):
|
||||
self.t += timeStep
|
||||
self._humanoid.setSimTime(self.t)
|
||||
|
||||
if self.desiredPose:
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=True)
|
||||
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
|
||||
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
|
||||
#print("desiredPositions=",self.desiredPose)
|
||||
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
|
||||
|
||||
if self._useStablePD:
|
||||
taus = self._humanoid.computePDForces(self.desiredPose, desiredVelocities=None, maxForces=maxForces)
|
||||
self._humanoid.applyPDForces(taus)
|
||||
else:
|
||||
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
|
||||
|
||||
self._pybullet_client.stepSimulation()
|
||||
|
||||
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
|
||||
|
||||
def set_sample_count(self, count):
|
||||
return
|
||||
|
||||
def check_terminate(self, agent_id):
|
||||
return Env.Terminate(self.is_episode_end())
|
||||
|
||||
def is_episode_end(self):
|
||||
isEnded = self._humanoid.terminates()
|
||||
#also check maximum time, 20 seconds (todo get from file)
|
||||
#print("self.t=",self.t)
|
||||
if (self.t>20):
|
||||
isEnded = True
|
||||
return isEnded
|
||||
|
||||
def check_valid_episode(self):
|
||||
#could check if limbs exceed velocity threshold
|
||||
return true
|
||||
|
||||
def getKeyboardEvents(self):
|
||||
return self._pybullet_client.getKeyboardEvents()
|
||||
|
||||
def isKeyTriggered(self, keys, key):
|
||||
o = ord(key)
|
||||
#print("ord=",o)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
selfCheck = False
|
||||
if (selfCheck):
|
||||
curTime = 0
|
||||
while self._pybullet_client.isConnected():
|
||||
self._humanoid.setSimTime(curTime)
|
||||
state = self._humanoid.getState()
|
||||
#print("state=",state)
|
||||
pose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
for i in range(10):
|
||||
curTime += timeStep
|
||||
#taus = self._humanoid.computePDForces(pose)
|
||||
#self._humanoid.applyPDForces(taus)
|
||||
#self._pybullet_client.stepSimulation()
|
||||
time.sleep(timeStep)
|
||||
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||
#startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||
rnrange = 1000
|
||||
rn = random.randint(0, rnrange)
|
||||
startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
|
||||
self.t = startTime
|
||||
|
||||
self._humanoid.setSimTime(startTime)
|
||||
|
||||
self._humanoid.resetPose()
|
||||
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||
#self._pybullet_client.stepSimulation()
|
||||
self._humanoid.resetPose()
|
||||
self.needs_update_time = self.t - 1 #force update
|
||||
|
||||
def get_num_agents(self):
|
||||
return self._num_agents
|
||||
|
||||
def get_action_space(self, agent_id):
|
||||
return ActionSpace(ActionSpace.Continuous)
|
||||
|
||||
def get_reward_min(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_reward_max(self, agent_id):
|
||||
return 1
|
||||
|
||||
def get_reward_fail(self, agent_id):
|
||||
return self.get_reward_min(agent_id)
|
||||
|
||||
def get_reward_succ(self, agent_id):
|
||||
return self.get_reward_max(agent_id)
|
||||
|
||||
#scene_name == "imitate" -> cDrawSceneImitate
|
||||
def get_state_size(self, agent_id):
|
||||
#cCtController::GetStateSize()
|
||||
#int state_size = cDeepMimicCharController::GetStateSize();
|
||||
# state_size += GetStatePoseSize();#106
|
||||
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
|
||||
#state_size += GetStatePhaseSize();#1
|
||||
#197
|
||||
return 197
|
||||
|
||||
def build_state_norm_groups(self, agent_id):
|
||||
#if (mEnablePhaseInput)
|
||||
#{
|
||||
#int phase_group = gNormGroupNone;
|
||||
#int phase_offset = GetStatePhaseOffset();
|
||||
#int phase_size = GetStatePhaseSize();
|
||||
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
|
||||
groups = [0] * self.get_state_size(agent_id)
|
||||
groups[0] = -1
|
||||
return groups
|
||||
|
||||
def build_state_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_state_size(agent_id)
|
||||
phase_offset = -0.5
|
||||
out_offset[0] = phase_offset
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_state_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_state_size(agent_id)
|
||||
phase_scale = 2
|
||||
out_scale[0] = phase_scale
|
||||
return np.array(out_scale)
|
||||
|
||||
def get_goal_size(self, agent_id):
|
||||
return 0
|
||||
|
||||
def get_action_size(self, agent_id):
|
||||
ctrl_size = 43 #numDof
|
||||
root_size = 7
|
||||
return ctrl_size - root_size
|
||||
|
||||
def build_goal_norm_groups(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_offset(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_goal_scale(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def build_action_offset(self, agent_id):
|
||||
out_offset = [0] * self.get_action_size(agent_id)
|
||||
out_offset = [
|
||||
0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
|
||||
0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
|
||||
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
|
||||
-0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
|
||||
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
|
||||
-0.2000000, -1.5700000
|
||||
]
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
return np.array(out_offset)
|
||||
|
||||
def build_action_scale(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||
out_scale = [
|
||||
0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
|
||||
1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
|
||||
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
|
||||
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
|
||||
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
|
||||
0.159235668789
|
||||
]
|
||||
return np.array(out_scale)
|
||||
|
||||
def build_action_bound_min(self, agent_id):
|
||||
#see cCtCtrlUtil::BuildBoundsPDSpherical
|
||||
out_scale = [-1] * self.get_action_size(agent_id)
|
||||
out_scale = [
|
||||
-4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
|
||||
-1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
|
||||
-1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
|
||||
-1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -4.710000000
|
||||
]
|
||||
|
||||
return out_scale
|
||||
|
||||
def build_action_bound_max(self, agent_id):
|
||||
out_scale = [1] * self.get_action_size(agent_id)
|
||||
out_scale = [
|
||||
4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
|
||||
1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
|
||||
1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
|
||||
1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
|
||||
]
|
||||
return out_scale
|
||||
|
||||
def set_mode(self, mode):
|
||||
self._mode = mode
|
||||
|
||||
def need_new_action(self, agent_id):
|
||||
if self.t >= self.needs_update_time:
|
||||
self.needs_update_time = self.t + 1. / 30.
|
||||
return True
|
||||
return False
|
||||
|
||||
def record_state(self, agent_id):
|
||||
state = self._humanoid.getState()
|
||||
|
||||
return np.array(state)
|
||||
|
||||
def record_goal(self, agent_id):
|
||||
return np.array([])
|
||||
|
||||
def calc_reward(self, agent_id):
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
reward = self._humanoid.getReward(kinPose)
|
||||
return reward
|
||||
|
||||
def set_action(self, agent_id, action):
|
||||
#print("action=",)
|
||||
#for a in action:
|
||||
# print(a)
|
||||
np.savetxt("pb_action.csv", action, delimiter=",")
|
||||
self.desiredPose = self._humanoid.convertActionToPose(action)
|
||||
#we need the target root positon and orientation to be zero, to be compatible with deep mimic
|
||||
self.desiredPose[0] = 0
|
||||
self.desiredPose[1] = 0
|
||||
self.desiredPose[2] = 0
|
||||
self.desiredPose[3] = 0
|
||||
self.desiredPose[4] = 0
|
||||
self.desiredPose[5] = 0
|
||||
self.desiredPose[6] = 0
|
||||
target_pose = np.array(self.desiredPose)
|
||||
|
||||
np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
|
||||
|
||||
#print("set_action: desiredPose=", self.desiredPose)
|
||||
|
||||
def log_val(self, agent_id, val):
|
||||
pass
|
||||
|
||||
def update(self, timeStep):
|
||||
#print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
|
||||
self._pybullet_client.setTimeStep(timeStep)
|
||||
self._humanoid._timeStep = timeStep
|
||||
|
||||
for i in range(1):
|
||||
self.t += timeStep
|
||||
self._humanoid.setSimTime(self.t)
|
||||
|
||||
if self.desiredPose:
|
||||
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||
self._humanoid.initializePose(self._humanoid._poseInterpolator,
|
||||
self._humanoid._kin_model,
|
||||
initBase=True)
|
||||
#pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
|
||||
#self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
|
||||
#print("desiredPositions=",self.desiredPose)
|
||||
maxForces = [
|
||||
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
|
||||
90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
|
||||
100, 100, 60
|
||||
]
|
||||
|
||||
if self._useStablePD:
|
||||
taus = self._humanoid.computePDForces(self.desiredPose,
|
||||
desiredVelocities=None,
|
||||
maxForces=maxForces)
|
||||
self._humanoid.applyPDForces(taus)
|
||||
else:
|
||||
self._humanoid.setJointMotors(self.desiredPose, maxForces=maxForces)
|
||||
|
||||
self._pybullet_client.stepSimulation()
|
||||
|
||||
def set_sample_count(self, count):
|
||||
return
|
||||
|
||||
def check_terminate(self, agent_id):
|
||||
return Env.Terminate(self.is_episode_end())
|
||||
|
||||
def is_episode_end(self):
|
||||
isEnded = self._humanoid.terminates()
|
||||
#also check maximum time, 20 seconds (todo get from file)
|
||||
#print("self.t=",self.t)
|
||||
if (self.t > 20):
|
||||
isEnded = True
|
||||
return isEnded
|
||||
|
||||
def check_valid_episode(self):
|
||||
#could check if limbs exceed velocity threshold
|
||||
return true
|
||||
|
||||
def getKeyboardEvents(self):
|
||||
return self._pybullet_client.getKeyboardEvents()
|
||||
|
||||
def isKeyTriggered(self, keys, key):
|
||||
o = ord(key)
|
||||
#print("ord=",o)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
|
||||
@@ -1,54 +1,66 @@
|
||||
from pybullet_utils import bullet_client
|
||||
import math
|
||||
|
||||
|
||||
|
||||
class QuadrupedPoseInterpolator(object):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
def ComputeLinVel(self,posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
|
||||
|
||||
def ComputeLinVel(self, posStart, posEnd, deltaTime):
|
||||
vel = [(posEnd[0] - posStart[0]) / deltaTime, (posEnd[1] - posStart[1]) / deltaTime,
|
||||
(posEnd[2] - posStart[2]) / deltaTime]
|
||||
return vel
|
||||
|
||||
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
dorn = bullet_client.getDifferenceQuaternion(ornStart, ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def ComputeAngVelRel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0],-ornStart[1],-ornStart[2],ornStart[3]]
|
||||
pos_diff, q_diff =bullet_client.multiplyTransforms([0,0,0], ornStartConjugate, [0,0,0], ornEnd)
|
||||
axis,angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||
|
||||
def ComputeAngVelRel(self, ornStart, ornEnd, deltaTime, bullet_client):
|
||||
ornStartConjugate = [-ornStart[0], -ornStart[1], -ornStart[2], ornStart[3]]
|
||||
pos_diff, q_diff = bullet_client.multiplyTransforms([0, 0, 0], ornStartConjugate, [0, 0, 0],
|
||||
ornEnd)
|
||||
axis, angle = bullet_client.getAxisAngleFromQuaternion(q_diff)
|
||||
angVel = [(axis[0] * angle) / deltaTime, (axis[1] * angle) / deltaTime,
|
||||
(axis[2] * angle) / deltaTime]
|
||||
return angVel
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
|
||||
|
||||
def Slerp(self, frameFraction, frameData, frameDataNext, bullet_client):
|
||||
keyFrameDuration = frameData[0]
|
||||
basePos1Start = [frameData[1],frameData[2],frameData[3]]
|
||||
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
|
||||
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
|
||||
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
|
||||
|
||||
jointPositions=[self._basePos[0],self._basePos[1],self._basePos[2],
|
||||
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3]]
|
||||
jointVelocities=[self._baseLinVel[0],self._baseLinVel[1],self._baseLinVel[2],
|
||||
self._baseAngVel[0],self._baseAngVel[1],self._baseAngVel[2]]
|
||||
|
||||
for j in range (12):
|
||||
index=j+8
|
||||
jointPosStart=frameData[index]
|
||||
jointPosEnd=frameDataNext[index]
|
||||
jointPos=jointPosStart+frameFraction*(jointPosEnd-jointPosStart)
|
||||
jointVel=(jointPosEnd-jointPosStart)/keyFrameDuration
|
||||
basePos1Start = [frameData[1], frameData[2], frameData[3]]
|
||||
basePos1End = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
|
||||
self._basePos = [
|
||||
basePos1Start[0] + frameFraction * (basePos1End[0] - basePos1Start[0]),
|
||||
basePos1Start[1] + frameFraction * (basePos1End[1] - basePos1Start[1]),
|
||||
basePos1Start[2] + frameFraction * (basePos1End[2] - basePos1Start[2])
|
||||
]
|
||||
self._baseLinVel = self.ComputeLinVel(basePos1Start, basePos1End, keyFrameDuration)
|
||||
baseOrn1Start = [frameData[5], frameData[6], frameData[7], frameData[4]]
|
||||
baseOrn1Next = [frameDataNext[5], frameDataNext[6], frameDataNext[7], frameDataNext[4]]
|
||||
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start, baseOrn1Next, frameFraction)
|
||||
self._baseAngVel = self.ComputeAngVel(baseOrn1Start, baseOrn1Next, keyFrameDuration,
|
||||
bullet_client)
|
||||
|
||||
jointPositions = [
|
||||
self._basePos[0], self._basePos[1], self._basePos[2], self._baseOrn[0], self._baseOrn[1],
|
||||
self._baseOrn[2], self._baseOrn[3]
|
||||
]
|
||||
jointVelocities = [
|
||||
self._baseLinVel[0], self._baseLinVel[1], self._baseLinVel[2], self._baseAngVel[0],
|
||||
self._baseAngVel[1], self._baseAngVel[2]
|
||||
]
|
||||
|
||||
for j in range(12):
|
||||
index = j + 8
|
||||
jointPosStart = frameData[index]
|
||||
jointPosEnd = frameDataNext[index]
|
||||
jointPos = jointPosStart + frameFraction * (jointPosEnd - jointPosStart)
|
||||
jointVel = (jointPosEnd - jointPosStart) / keyFrameDuration
|
||||
jointPositions.append(jointPos)
|
||||
jointVelocities.append(jointVel)
|
||||
self._jointPositions = jointPositions
|
||||
self._jointVelocities = jointVelocities
|
||||
return jointPositions,jointVelocities
|
||||
return jointPositions, jointVelocities
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,87 +8,89 @@ import pybullet as p1
|
||||
import humanoid_pose_interpolator
|
||||
import numpy as np
|
||||
|
||||
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
|
||||
pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
z2y = pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
|
||||
z2y = pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
|
||||
#planeId = pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
|
||||
planeId= pybullet_client.loadURDF("plane_implicit.urdf",[0,0,0],z2y, useMaximalCoordinates=True)
|
||||
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
planeId = pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
|
||||
z2y,
|
||||
useMaximalCoordinates=True)
|
||||
pybullet_client.changeDynamics(planeId, linkIndex=-1, lateralFriction=0.9)
|
||||
#print("planeId=",planeId)
|
||||
|
||||
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
|
||||
pybullet_client.setGravity(0,-9.8,0)
|
||||
pybullet_client.configureDebugVisualizer(pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
pybullet_client.setGravity(0, -9.8, 0)
|
||||
|
||||
pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||
|
||||
|
||||
mocapData = motion_capture_data.MotionCaptureData()
|
||||
#motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_walk.txt"
|
||||
motionPath = pybullet_data.getDataPath()+"/data/motions/humanoid3d_backflip.txt"
|
||||
motionPath = pybullet_data.getDataPath() + "/data/motions/humanoid3d_backflip.txt"
|
||||
mocapData.Load(motionPath)
|
||||
timeStep = 1./600
|
||||
useFixedBase=False
|
||||
timeStep = 1. / 600
|
||||
useFixedBase = False
|
||||
humanoid = humanoid_stable_pd.HumanoidStablePD(pybullet_client, mocapData, timeStep, useFixedBase)
|
||||
isInitialized = True
|
||||
|
||||
pybullet_client.setTimeStep(timeStep)
|
||||
pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
|
||||
timeId = pybullet_client.addUserDebugParameter("time",0,10,0)
|
||||
timeId = pybullet_client.addUserDebugParameter("time", 0, 10, 0)
|
||||
|
||||
|
||||
|
||||
def isKeyTriggered(keys, key):
|
||||
o = ord(key)
|
||||
if o in keys:
|
||||
return keys[ord(key)] & pybullet_client.KEY_WAS_TRIGGERED
|
||||
return False
|
||||
|
||||
|
||||
|
||||
animating = False
|
||||
singleStep = False
|
||||
|
||||
|
||||
t=0
|
||||
t = 0
|
||||
while (1):
|
||||
|
||||
keys = pybullet_client.getKeyboardEvents()
|
||||
#print(keys)
|
||||
if isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
animating = not animating
|
||||
|
||||
if isKeyTriggered(keys, 'b'):
|
||||
singleStep = True
|
||||
|
||||
singleStep = True
|
||||
|
||||
if animating or singleStep:
|
||||
|
||||
|
||||
|
||||
singleStep = False
|
||||
#t = pybullet_client.readUserDebugParameter(timeId)
|
||||
#print("t=",t)
|
||||
for i in range (1):
|
||||
for i in range(1):
|
||||
|
||||
print("t=",t)
|
||||
print("t=", t)
|
||||
humanoid.setSimTime(t)
|
||||
|
||||
|
||||
humanoid.computePose(humanoid._frameFraction)
|
||||
pose = humanoid._poseInterpolator
|
||||
#humanoid.initializePose(pose=pose, phys_model = humanoid._sim_model, initBase=True, initializeVelocity=True)
|
||||
#humanoid.resetPose()
|
||||
|
||||
|
||||
|
||||
desiredPose = humanoid.computePose(humanoid._frameFraction)
|
||||
#desiredPose = desiredPose.GetPose()
|
||||
#desiredPose = desiredPose.GetPose()
|
||||
#curPose = HumanoidPoseInterpolator()
|
||||
#curPose.reset()
|
||||
s = humanoid.getState()
|
||||
#np.savetxt("pb_record_state_s.csv", s, delimiter=",")
|
||||
maxForces = [0,0,0,0,0,0,0,200,200,200,200, 50,50,50,50, 200,200,200,200, 150, 90,90,90,90, 100,100,100,100, 60, 200,200,200,200, 150, 90, 90, 90, 90, 100,100,100,100, 60]
|
||||
maxForces = [
|
||||
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90, 90,
|
||||
90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100, 100,
|
||||
100, 60
|
||||
]
|
||||
taus = humanoid.computePDForces(desiredPose, desiredVelocities=None, maxForces=maxForces)
|
||||
|
||||
|
||||
#print("taus=",taus)
|
||||
humanoid.applyPDForces(taus)
|
||||
|
||||
|
||||
pybullet_client.stepSimulation()
|
||||
t+=1./600.
|
||||
|
||||
|
||||
time.sleep(1./600.)
|
||||
t += 1. / 600.
|
||||
|
||||
time.sleep(1. / 600.)
|
||||
|
||||
@@ -8,240 +8,258 @@ import motion_capture_data
|
||||
import quadrupedPoseInterpolator
|
||||
|
||||
useConstraints = False
|
||||
|
||||
p = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
|
||||
p = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||
p.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
|
||||
plane = p.loadURDF("plane.urdf")
|
||||
p.setGravity(0,0,-10)
|
||||
timeStep=1./500
|
||||
p.setGravity(0, 0, -10)
|
||||
timeStep = 1. / 500
|
||||
p.setTimeStep(timeStep)
|
||||
#p.setDefaultContactERP(0)
|
||||
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
#urdfFlags = p.URDF_USE_SELF_COLLISION+p.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
|
||||
urdfFlags = p.URDF_USE_SELF_COLLISION
|
||||
|
||||
|
||||
startPos=[0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
|
||||
startOrn=[0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
|
||||
quadruped = p.loadURDF("laikago/laikago.urdf",startPos,startOrn, flags = urdfFlags,useFixedBase=False)
|
||||
p.resetBasePositionAndOrientation(quadruped,startPos,startOrn)
|
||||
startPos = [0.007058990464444105, 0.03149299192130908, 0.4918981912395484]
|
||||
startOrn = [0.005934649695708604, 0.7065453990917289, 0.7076373820553712, -0.0027774940359030264]
|
||||
quadruped = p.loadURDF("laikago/laikago.urdf",
|
||||
startPos,
|
||||
startOrn,
|
||||
flags=urdfFlags,
|
||||
useFixedBase=False)
|
||||
p.resetBasePositionAndOrientation(quadruped, startPos, startOrn)
|
||||
if not useConstraints:
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.setJointMotorControl2(quadruped,j,p.POSITION_CONTROL,force=0)
|
||||
|
||||
#This cube is added as a soft constraint to keep the laikago from falling
|
||||
p.setJointMotorControl2(quadruped, j, p.POSITION_CONTROL, force=0)
|
||||
|
||||
#This cube is added as a soft constraint to keep the laikago from falling
|
||||
#since we didn't train it yet, it doesn't balance
|
||||
cube = p.loadURDF("cube_no_rotation.urdf",[0,0,-0.5],[0,0.5,0.5,0])
|
||||
p.setCollisionFilterGroupMask(cube,-1,0,0)
|
||||
cube = p.loadURDF("cube_no_rotation.urdf", [0, 0, -0.5], [0, 0.5, 0.5, 0])
|
||||
p.setCollisionFilterGroupMask(cube, -1, 0, 0)
|
||||
for j in range(p.getNumJoints(cube)):
|
||||
p.setJointMotorControl2(cube,j,p.POSITION_CONTROL,force=0)
|
||||
p.setCollisionFilterGroupMask(cube,j,0,0)
|
||||
p.changeVisualShape(cube,j,rgbaColor=[1,0,0,0])
|
||||
cid = p.createConstraint(cube,p.getNumJoints(cube)-1,quadruped,-1,p.JOINT_FIXED,[0,0,0],[0,1,0],[0,0,0])
|
||||
p.setJointMotorControl2(cube, j, p.POSITION_CONTROL, force=0)
|
||||
p.setCollisionFilterGroupMask(cube, j, 0, 0)
|
||||
p.changeVisualShape(cube, j, rgbaColor=[1, 0, 0, 0])
|
||||
cid = p.createConstraint(cube,
|
||||
p.getNumJoints(cube) - 1, quadruped, -1, p.JOINT_FIXED, [0, 0, 0],
|
||||
[0, 1, 0], [0, 0, 0])
|
||||
p.changeConstraint(cid, maxForce=10)
|
||||
|
||||
jointIds = []
|
||||
paramIds = []
|
||||
jointOffsets = []
|
||||
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
|
||||
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
jointIds=[]
|
||||
paramIds=[]
|
||||
jointOffsets=[]
|
||||
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
|
||||
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
|
||||
for i in range (4):
|
||||
for i in range(4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
|
||||
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
|
||||
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
|
||||
startQ=[0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892, -0.068262, 0.836745, -1.534517]
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.resetJointState(quadruped,jointIds[j],jointDirections[j]*startQ[j]+jointOffsets[j])
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
startQ = [
|
||||
0.08389, 0.8482, -1.547832, -0.068933, 0.625726, -1.272086, 0.074398, 0.61135, -1.255892,
|
||||
-0.068262, 0.836745, -1.534517
|
||||
]
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.resetJointState(quadruped, jointIds[j], jointDirections[j] * startQ[j] + jointOffsets[j])
|
||||
|
||||
qpi = quadrupedPoseInterpolator.QuadrupedPoseInterpolator()
|
||||
|
||||
#enable collision between lower legs
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
print(p.getJointInfo(quadruped,j))
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
print(p.getJointInfo(quadruped, j))
|
||||
|
||||
#2,5,8 and 11 are the lower legs
|
||||
lower_legs = [2,5,8,11]
|
||||
lower_legs = [2, 5, 8, 11]
|
||||
for l0 in lower_legs:
|
||||
for l1 in lower_legs:
|
||||
if (l1>l0):
|
||||
enableCollision = 1
|
||||
print("collision for pair",l0,l1, p.getJointInfo(quadruped,l0)[12],p.getJointInfo(quadruped,l1)[12], "enabled=",enableCollision)
|
||||
p.setCollisionFilterPair(quadruped, quadruped, 2,5,enableCollision)
|
||||
for l1 in lower_legs:
|
||||
if (l1 > l0):
|
||||
enableCollision = 1
|
||||
print("collision for pair", l0, l1,
|
||||
p.getJointInfo(quadruped, l0)[12],
|
||||
p.getJointInfo(quadruped, l1)[12], "enabled=", enableCollision)
|
||||
p.setCollisionFilterPair(quadruped, quadruped, 2, 5, enableCollision)
|
||||
|
||||
jointIds=[]
|
||||
paramIds=[]
|
||||
jointOffsets=[]
|
||||
jointDirections=[-1,1,1,1,1,1,-1,1,1,1,1,1]
|
||||
jointAngles=[0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
jointIds = []
|
||||
paramIds = []
|
||||
jointOffsets = []
|
||||
jointDirections = [-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
|
||||
jointAngles = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
for i in range (4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
for i in range(4):
|
||||
jointOffsets.append(0)
|
||||
jointOffsets.append(-0.7)
|
||||
jointOffsets.append(0.7)
|
||||
|
||||
maxForceId = p.addUserDebugParameter("maxForce",0,100,20)
|
||||
maxForceId = p.addUserDebugParameter("maxForce", 0, 100, 20)
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
jointIds.append(j)
|
||||
|
||||
|
||||
p.getCameraImage(480,320)
|
||||
p.getCameraImage(480, 320)
|
||||
p.setRealTimeSimulation(0)
|
||||
|
||||
joints=[]
|
||||
|
||||
joints = []
|
||||
|
||||
mocapData = motion_capture_data.MotionCaptureData()
|
||||
|
||||
motionPath = pybullet_data.getDataPath()+"/data/motions/laikago_walk.txt"
|
||||
motionPath = pybullet_data.getDataPath() + "/data/motions/laikago_walk.txt"
|
||||
|
||||
mocapData.Load(motionPath)
|
||||
print("mocapData.NumFrames=",mocapData.NumFrames())
|
||||
print("mocapData.KeyFrameDuraction=",mocapData.KeyFrameDuraction())
|
||||
print("mocapData.getCycleTime=",mocapData.getCycleTime())
|
||||
print("mocapData.computeCycleOffset=",mocapData.computeCycleOffset())
|
||||
print("mocapData.NumFrames=", mocapData.NumFrames())
|
||||
print("mocapData.KeyFrameDuraction=", mocapData.KeyFrameDuraction())
|
||||
print("mocapData.getCycleTime=", mocapData.getCycleTime())
|
||||
print("mocapData.computeCycleOffset=", mocapData.computeCycleOffset())
|
||||
|
||||
stablePD = pd_controller_stable.PDControllerStable(p)
|
||||
|
||||
cycleTime = mocapData.getCycleTime()
|
||||
t=0
|
||||
t = 0
|
||||
|
||||
while t<10.*cycleTime:
|
||||
while t < 10. * cycleTime:
|
||||
#get interpolated joint
|
||||
keyFrameDuration = mocapData.KeyFrameDuraction()
|
||||
cycleTime = mocapData.getCycleTime()
|
||||
cycleCount = mocapData.calcCycleCount(t, cycleTime)
|
||||
|
||||
|
||||
#print("cycleTime=",cycleTime)
|
||||
#print("cycleCount=",cycleCount)
|
||||
|
||||
|
||||
#print("cycles=",cycles)
|
||||
frameTime = t - cycleCount*cycleTime
|
||||
frameTime = t - cycleCount * cycleTime
|
||||
#print("frameTime=",frameTime)
|
||||
if (frameTime<0):
|
||||
if (frameTime < 0):
|
||||
frameTime += cycleTime
|
||||
|
||||
frame = int(frameTime/keyFrameDuration)
|
||||
frameNext = frame+1
|
||||
if (frameNext >= mocapData.NumFrames()):
|
||||
frame = int(frameTime / keyFrameDuration)
|
||||
frameNext = frame + 1
|
||||
if (frameNext >= mocapData.NumFrames()):
|
||||
frameNext = frame
|
||||
frameFraction = (frameTime - frame*keyFrameDuration)/(keyFrameDuration)
|
||||
frameFraction = (frameTime - frame * keyFrameDuration) / (keyFrameDuration)
|
||||
#print("frame=",frame)
|
||||
#print("frameFraction=",frameFraction)
|
||||
frameData = mocapData._motion_data['Frames'][frame]
|
||||
frameDataNext = mocapData._motion_data['Frames'][frameNext]
|
||||
|
||||
jointsStr,qdot=qpi.Slerp(frameFraction, frameData, frameDataNext, p)
|
||||
|
||||
|
||||
jointsStr, qdot = qpi.Slerp(frameFraction, frameData, frameDataNext, p)
|
||||
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
print("jointIds=",jointIds)
|
||||
print("jointIds=", jointIds)
|
||||
|
||||
if useConstraints:
|
||||
for j in range (12):
|
||||
for j in range(12):
|
||||
#skip the base positional dofs
|
||||
targetPos = float(jointsStr[j+7])
|
||||
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
|
||||
|
||||
targetPos = float(jointsStr[j + 7])
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[j],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[j] * targetPos + jointOffsets[j],
|
||||
force=maxForce)
|
||||
|
||||
else:
|
||||
desiredPositions=[]
|
||||
for j in range (7):
|
||||
desiredPositions = []
|
||||
for j in range(7):
|
||||
targetPosUnmodified = float(jointsStr[j])
|
||||
desiredPositions.append(targetPosUnmodified)
|
||||
for j in range (12):
|
||||
targetPosUnmodified = float(jointsStr[j+7])
|
||||
targetPos=jointDirections[j]*targetPosUnmodified+jointOffsets[j]
|
||||
for j in range(12):
|
||||
targetPosUnmodified = float(jointsStr[j + 7])
|
||||
targetPos = jointDirections[j] * targetPosUnmodified + jointOffsets[j]
|
||||
desiredPositions.append(targetPos)
|
||||
numBaseDofs=6
|
||||
totalDofs=12+numBaseDofs
|
||||
desiredVelocities=None
|
||||
if desiredVelocities==None:
|
||||
desiredVelocities = [0]*totalDofs
|
||||
taus = stablePD.computePD(bodyUniqueId=quadruped,
|
||||
jointIndices = jointIds,
|
||||
desiredPositions = desiredPositions,
|
||||
desiredVelocities = desiredVelocities,
|
||||
kps = [4000]*totalDofs,
|
||||
kds = [40]*totalDofs,
|
||||
maxForces = [500]*totalDofs,
|
||||
timeStep=timeStep)
|
||||
|
||||
dofIndex=6
|
||||
numBaseDofs = 6
|
||||
totalDofs = 12 + numBaseDofs
|
||||
desiredVelocities = None
|
||||
if desiredVelocities == None:
|
||||
desiredVelocities = [0] * totalDofs
|
||||
taus = stablePD.computePD(bodyUniqueId=quadruped,
|
||||
jointIndices=jointIds,
|
||||
desiredPositions=desiredPositions,
|
||||
desiredVelocities=desiredVelocities,
|
||||
kps=[4000] * totalDofs,
|
||||
kds=[40] * totalDofs,
|
||||
maxForces=[500] * totalDofs,
|
||||
timeStep=timeStep)
|
||||
|
||||
dofIndex = 6
|
||||
scaling = 1
|
||||
for index in range (len(jointIds)):
|
||||
for index in range(len(jointIds)):
|
||||
jointIndex = jointIds[index]
|
||||
force=[scaling*taus[dofIndex]]
|
||||
print("force[", jointIndex,"]=",force)
|
||||
p.setJointMotorControlMultiDof(quadruped, jointIndex, controlMode=p.TORQUE_CONTROL, force=force)
|
||||
dofIndex+=1
|
||||
|
||||
force = [scaling * taus[dofIndex]]
|
||||
print("force[", jointIndex, "]=", force)
|
||||
p.setJointMotorControlMultiDof(quadruped,
|
||||
jointIndex,
|
||||
controlMode=p.TORQUE_CONTROL,
|
||||
force=force)
|
||||
dofIndex += 1
|
||||
|
||||
p.stepSimulation()
|
||||
t+=timeStep
|
||||
t += timeStep
|
||||
time.sleep(timeStep)
|
||||
|
||||
useOrgData=False
|
||||
useOrgData = False
|
||||
if useOrgData:
|
||||
with open("data1.txt","r") as filestream:
|
||||
for line in filestream:
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
currentline = line.split(",")
|
||||
frame = currentline[0]
|
||||
t = currentline[1]
|
||||
joints=currentline[2:14]
|
||||
for j in range (12):
|
||||
targetPos = float(joints[j])
|
||||
p.setJointMotorControl2(quadruped,jointIds[j],p.POSITION_CONTROL,jointDirections[j]*targetPos+jointOffsets[j], force=maxForce)
|
||||
p.stepSimulation()
|
||||
for lower_leg in lower_legs:
|
||||
#print("points for ", quadruped, " link: ", lower_leg)
|
||||
pts = p.getContactPoints(quadruped,-1, lower_leg)
|
||||
#print("num points=",len(pts))
|
||||
#for pt in pts:
|
||||
# print(pt[9])
|
||||
time.sleep(1./500.)
|
||||
|
||||
|
||||
for j in range (p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped,j,linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped,j)
|
||||
js = p.getJointState(quadruped,j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType==p.JOINT_PRISMATIC or jointType==p.JOINT_REVOLUTE):
|
||||
paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"),-4,4,(js[0]-jointOffsets[j])/jointDirections[j]))
|
||||
with open("data1.txt", "r") as filestream:
|
||||
for line in filestream:
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
currentline = line.split(",")
|
||||
frame = currentline[0]
|
||||
t = currentline[1]
|
||||
joints = currentline[2:14]
|
||||
for j in range(12):
|
||||
targetPos = float(joints[j])
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[j],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[j] * targetPos + jointOffsets[j],
|
||||
force=maxForce)
|
||||
p.stepSimulation()
|
||||
for lower_leg in lower_legs:
|
||||
#print("points for ", quadruped, " link: ", lower_leg)
|
||||
pts = p.getContactPoints(quadruped, -1, lower_leg)
|
||||
#print("num points=",len(pts))
|
||||
#for pt in pts:
|
||||
# print(pt[9])
|
||||
time.sleep(1. / 500.)
|
||||
|
||||
for j in range(p.getNumJoints(quadruped)):
|
||||
p.changeDynamics(quadruped, j, linearDamping=0, angularDamping=0)
|
||||
info = p.getJointInfo(quadruped, j)
|
||||
js = p.getJointState(quadruped, j)
|
||||
#print(info)
|
||||
jointName = info[1]
|
||||
jointType = info[2]
|
||||
if (jointType == p.JOINT_PRISMATIC or jointType == p.JOINT_REVOLUTE):
|
||||
paramIds.append(
|
||||
p.addUserDebugParameter(jointName.decode("utf-8"), -4, 4,
|
||||
(js[0] - jointOffsets[j]) / jointDirections[j]))
|
||||
|
||||
p.setRealTimeSimulation(1)
|
||||
|
||||
while (1):
|
||||
|
||||
for i in range(len(paramIds)):
|
||||
c = paramIds[i]
|
||||
targetPos = p.readUserDebugParameter(c)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
p.setJointMotorControl2(quadruped,jointIds[i],p.POSITION_CONTROL,jointDirections[i]*targetPos+jointOffsets[i], force=maxForce)
|
||||
|
||||
|
||||
for i in range(len(paramIds)):
|
||||
c = paramIds[i]
|
||||
targetPos = p.readUserDebugParameter(c)
|
||||
maxForce = p.readUserDebugParameter(maxForceId)
|
||||
p.setJointMotorControl2(quadruped,
|
||||
jointIds[i],
|
||||
p.POSITION_CONTROL,
|
||||
jointDirections[i] * targetPos + jointOffsets[i],
|
||||
force=maxForce)
|
||||
|
||||
@@ -5,17 +5,18 @@ import pybullet_data
|
||||
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
|
||||
def build_agent(world, id, file):
|
||||
agent = None
|
||||
with open(pybullet_data.getDataPath()+"/"+file) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
|
||||
if (agent_type == PPOAgent.NAME):
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
else:
|
||||
assert False, 'Unsupported agent type: ' + agent_type
|
||||
|
||||
return agent
|
||||
def build_agent(world, id, file):
|
||||
agent = None
|
||||
with open(pybullet_data.getDataPath() + "/" + file) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
|
||||
if (agent_type == PPOAgent.NAME):
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
else:
|
||||
assert False, 'Unsupported agent type: ' + agent_type
|
||||
|
||||
return agent
|
||||
|
||||
@@ -2,53 +2,54 @@ import json
|
||||
import numpy as np
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
|
||||
|
||||
class ExpParams(object):
|
||||
RATE_KEY = 'Rate'
|
||||
INIT_ACTION_RATE_KEY = 'InitActionRate'
|
||||
NOISE_KEY = 'Noise'
|
||||
NOISE_INTERNAL_KEY = 'NoiseInternal'
|
||||
TEMP_KEY = 'Temp'
|
||||
RATE_KEY = 'Rate'
|
||||
INIT_ACTION_RATE_KEY = 'InitActionRate'
|
||||
NOISE_KEY = 'Noise'
|
||||
NOISE_INTERNAL_KEY = 'NoiseInternal'
|
||||
TEMP_KEY = 'Temp'
|
||||
|
||||
def __init__(self):
|
||||
self.rate = 0.2
|
||||
self.init_action_rate = 0
|
||||
self.noise = 0.1
|
||||
self.noise_internal = 0
|
||||
self.temp = 0.1
|
||||
return
|
||||
def __init__(self):
|
||||
self.rate = 0.2
|
||||
self.init_action_rate = 0
|
||||
self.noise = 0.1
|
||||
self.noise_internal = 0
|
||||
self.temp = 0.1
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
str = ''
|
||||
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
|
||||
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
|
||||
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
|
||||
return str
|
||||
def __str__(self):
|
||||
str = ''
|
||||
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
|
||||
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
|
||||
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
|
||||
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
|
||||
return str
|
||||
|
||||
def load(self, json_data):
|
||||
if (self.RATE_KEY in json_data):
|
||||
self.rate = json_data[self.RATE_KEY]
|
||||
def load(self, json_data):
|
||||
if (self.RATE_KEY in json_data):
|
||||
self.rate = json_data[self.RATE_KEY]
|
||||
|
||||
if (self.INIT_ACTION_RATE_KEY in json_data):
|
||||
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
|
||||
if (self.INIT_ACTION_RATE_KEY in json_data):
|
||||
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
|
||||
|
||||
if (self.NOISE_KEY in json_data):
|
||||
self.noise = json_data[self.NOISE_KEY]
|
||||
if (self.NOISE_KEY in json_data):
|
||||
self.noise = json_data[self.NOISE_KEY]
|
||||
|
||||
if (self.NOISE_INTERNAL_KEY in json_data):
|
||||
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
|
||||
if (self.NOISE_INTERNAL_KEY in json_data):
|
||||
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
|
||||
|
||||
if (self.TEMP_KEY in json_data):
|
||||
self.temp = json_data[self.TEMP_KEY]
|
||||
if (self.TEMP_KEY in json_data):
|
||||
self.temp = json_data[self.TEMP_KEY]
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
def lerp(self, other, t):
|
||||
lerp_params = ExpParams()
|
||||
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
|
||||
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
|
||||
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
|
||||
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
|
||||
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
|
||||
return lerp_params
|
||||
def lerp(self, other, t):
|
||||
lerp_params = ExpParams()
|
||||
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
|
||||
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
|
||||
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
|
||||
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
|
||||
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
|
||||
return lerp_params
|
||||
|
||||
@@ -1 +1 @@
|
||||
from . import *
|
||||
from . import *
|
||||
|
||||
@@ -3,11 +3,12 @@ import pybullet_envs.deep_mimic.learning.tf_util as TFUtil
|
||||
|
||||
NAME = "fc_2layers_1024units"
|
||||
|
||||
def build_net(input_tfs, reuse=False):
|
||||
layers = [1024, 512]
|
||||
activation = tf.nn.relu
|
||||
|
||||
input_tf = tf.concat(axis=-1, values=input_tfs)
|
||||
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
|
||||
h = activation(h)
|
||||
return h
|
||||
def build_net(input_tfs, reuse=False):
|
||||
layers = [1024, 512]
|
||||
activation = tf.nn.relu
|
||||
|
||||
input_tf = tf.concat(axis=-1, values=input_tfs)
|
||||
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
|
||||
h = activation(h)
|
||||
return h
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import pybullet_envs.deep_mimic.learning.nets.fc_2layers_1024units as fc_2layers_1024units
|
||||
|
||||
def build_net(net_name, input_tfs, reuse=False):
|
||||
net = None
|
||||
|
||||
if (net_name == fc_2layers_1024units.NAME):
|
||||
net = fc_2layers_1024units.build_net(input_tfs, reuse)
|
||||
else:
|
||||
assert False, 'Unsupported net: ' + net_name
|
||||
|
||||
return net
|
||||
def build_net(net_name, input_tfs, reuse=False):
|
||||
net = None
|
||||
|
||||
if (net_name == fc_2layers_1024units.NAME):
|
||||
net = fc_2layers_1024units.build_net(input_tfs, reuse)
|
||||
else:
|
||||
assert False, 'Unsupported net: ' + net_name
|
||||
|
||||
return net
|
||||
|
||||
@@ -3,147 +3,149 @@ import copy
|
||||
import pybullet_utils.mpi_util as MPIUtil
|
||||
from pybullet_utils.logger import Logger
|
||||
|
||||
|
||||
class Normalizer(object):
|
||||
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
|
||||
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
|
||||
|
||||
# these group IDs must be the same as those in CharController.h
|
||||
NORM_GROUP_SINGLE = 0
|
||||
NORM_GROUP_NONE = -1
|
||||
# these group IDs must be the same as those in CharController.h
|
||||
NORM_GROUP_SINGLE = 0
|
||||
NORM_GROUP_NONE = -1
|
||||
|
||||
class Group(object):
|
||||
def __init__(self, id, indices):
|
||||
self.id = id
|
||||
self.indices = indices
|
||||
return
|
||||
class Group(object):
|
||||
|
||||
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.eps = eps
|
||||
self.clip = clip
|
||||
self.mean = np.zeros(size)
|
||||
self.mean_sq = np.zeros(size)
|
||||
self.std = np.ones(size)
|
||||
self.count = 0
|
||||
self.groups = self._build_groups(groups_ids)
|
||||
def __init__(self, id, indices):
|
||||
self.id = id
|
||||
self.indices = indices
|
||||
return
|
||||
|
||||
self.new_count = 0
|
||||
self.new_sum = np.zeros_like(self.mean)
|
||||
self.new_sum_sq = np.zeros_like(self.mean_sq)
|
||||
return
|
||||
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.eps = eps
|
||||
self.clip = clip
|
||||
self.mean = np.zeros(size)
|
||||
self.mean_sq = np.zeros(size)
|
||||
self.std = np.ones(size)
|
||||
self.count = 0
|
||||
self.groups = self._build_groups(groups_ids)
|
||||
|
||||
def record(self, x):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(x, np.ndarray)
|
||||
if not is_array:
|
||||
assert(size == 1)
|
||||
x = np.array([[x]])
|
||||
self.new_count = 0
|
||||
self.new_sum = np.zeros_like(self.mean)
|
||||
self.new_sum_sq = np.zeros_like(self.mean_sq)
|
||||
return
|
||||
|
||||
assert x.shape[-1] == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
|
||||
x = np.reshape(x, [-1, size])
|
||||
def record(self, x):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(x, np.ndarray)
|
||||
if not is_array:
|
||||
assert (size == 1)
|
||||
x = np.array([[x]])
|
||||
|
||||
self.new_count += x.shape[0]
|
||||
self.new_sum += np.sum(x, axis=0)
|
||||
self.new_sum_sq += np.sum(np.square(x), axis=0)
|
||||
return
|
||||
assert x.shape[-1] == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
|
||||
x = np.reshape(x, [-1, size])
|
||||
|
||||
def update(self):
|
||||
new_count = MPIUtil.reduce_sum(self.new_count)
|
||||
new_sum = MPIUtil.reduce_sum(self.new_sum)
|
||||
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
|
||||
self.new_count += x.shape[0]
|
||||
self.new_sum += np.sum(x, axis=0)
|
||||
self.new_sum_sq += np.sum(np.square(x), axis=0)
|
||||
return
|
||||
|
||||
new_total = self.count + new_count
|
||||
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
|
||||
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
|
||||
def update(self):
|
||||
new_count = MPIUtil.reduce_sum(self.new_count)
|
||||
new_sum = MPIUtil.reduce_sum(self.new_sum)
|
||||
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
|
||||
|
||||
if new_count > 0:
|
||||
new_mean = self._process_group_data(new_sum / new_count, self.mean)
|
||||
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
|
||||
w_old = float(self.count) / new_total
|
||||
w_new = float(new_count) / new_total
|
||||
new_total = self.count + new_count
|
||||
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
|
||||
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
|
||||
|
||||
self.mean = w_old * self.mean + w_new * new_mean
|
||||
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
|
||||
self.count = new_total
|
||||
self.std = self.calc_std(self.mean, self.mean_sq)
|
||||
if new_count > 0:
|
||||
new_mean = self._process_group_data(new_sum / new_count, self.mean)
|
||||
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
|
||||
w_old = float(self.count) / new_total
|
||||
w_new = float(new_count) / new_total
|
||||
|
||||
self.new_count = 0
|
||||
self.new_sum.fill(0)
|
||||
self.new_sum_sq.fill(0)
|
||||
self.mean = w_old * self.mean + w_new * new_mean
|
||||
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
|
||||
self.count = new_total
|
||||
self.std = self.calc_std(self.mean, self.mean_sq)
|
||||
|
||||
return
|
||||
self.new_count = 0
|
||||
self.new_sum.fill(0)
|
||||
self.new_sum_sq.fill(0)
|
||||
|
||||
def get_size(self):
|
||||
return self.mean.size
|
||||
return
|
||||
|
||||
def set_mean_std(self, mean, std):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
|
||||
|
||||
if not is_array:
|
||||
assert(size == 1)
|
||||
mean = np.array([mean])
|
||||
std = np.array([std])
|
||||
def get_size(self):
|
||||
return self.mean.size
|
||||
|
||||
assert len(mean) == size and len(std) == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
|
||||
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
def set_mean_std(self, mean, std):
|
||||
size = self.get_size()
|
||||
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
|
||||
|
||||
def normalize(self, x):
|
||||
norm_x = (x - self.mean) / self.std
|
||||
norm_x = np.clip(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
if not is_array:
|
||||
assert (size == 1)
|
||||
mean = np.array([mean])
|
||||
std = np.array([std])
|
||||
|
||||
def unnormalize(self, norm_x):
|
||||
x = norm_x * self.std + self.mean
|
||||
return x
|
||||
assert len(mean) == size and len(std) == size, \
|
||||
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
|
||||
|
||||
def calc_std(self, mean, mean_sq):
|
||||
var = mean_sq - np.square(mean)
|
||||
# some time floating point errors can lead to small negative numbers
|
||||
var = np.maximum(var, 0)
|
||||
std = np.sqrt(var)
|
||||
std = np.maximum(std, self.eps)
|
||||
return std
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
|
||||
def calc_mean_sq(self, mean, std):
|
||||
return np.square(std) + np.square(self.mean)
|
||||
def normalize(self, x):
|
||||
norm_x = (x - self.mean) / self.std
|
||||
norm_x = np.clip(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if MPIUtil.is_root_proc():
|
||||
vars = np.concatenate([self.mean, self.mean_sq])
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = np.concatenate([self.mean, self.mean_sq])
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
def unnormalize(self, norm_x):
|
||||
x = norm_x * self.std + self.mean
|
||||
return x
|
||||
|
||||
def _build_groups(self, groups_ids):
|
||||
groups = []
|
||||
if groups_ids is None:
|
||||
curr_id = self.NORM_GROUP_SINGLE
|
||||
curr_list = np.arange(self.get_size()).astype(np.int32)
|
||||
groups.append(self.Group(curr_id, curr_list))
|
||||
else:
|
||||
ids = np.unique(groups_ids)
|
||||
for id in ids:
|
||||
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
|
||||
groups.append(self.Group(id, curr_list))
|
||||
def calc_std(self, mean, mean_sq):
|
||||
var = mean_sq - np.square(mean)
|
||||
# some time floating point errors can lead to small negative numbers
|
||||
var = np.maximum(var, 0)
|
||||
std = np.sqrt(var)
|
||||
std = np.maximum(std, self.eps)
|
||||
return std
|
||||
|
||||
return groups
|
||||
def calc_mean_sq(self, mean, std):
|
||||
return np.square(std) + np.square(self.mean)
|
||||
|
||||
def _process_group_data(self, new_data, old_data):
|
||||
proc_data = new_data.copy()
|
||||
for group in self.groups:
|
||||
if group.id == self.NORM_GROUP_NONE:
|
||||
proc_data[group.indices] = old_data[group.indices]
|
||||
elif group.id != self.NORM_GROUP_SINGLE:
|
||||
avg = np.mean(new_data[group.indices])
|
||||
proc_data[group.indices] = avg
|
||||
return proc_data
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if MPIUtil.is_root_proc():
|
||||
vars = np.concatenate([self.mean, self.mean_sq])
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = np.concatenate([self.mean, self.mean_sq])
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
|
||||
def _build_groups(self, groups_ids):
|
||||
groups = []
|
||||
if groups_ids is None:
|
||||
curr_id = self.NORM_GROUP_SINGLE
|
||||
curr_list = np.arange(self.get_size()).astype(np.int32)
|
||||
groups.append(self.Group(curr_id, curr_list))
|
||||
else:
|
||||
ids = np.unique(groups_ids)
|
||||
for id in ids:
|
||||
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
|
||||
groups.append(self.Group(id, curr_list))
|
||||
|
||||
return groups
|
||||
|
||||
def _process_group_data(self, new_data, old_data):
|
||||
proc_data = new_data.copy()
|
||||
for group in self.groups:
|
||||
if group.id == self.NORM_GROUP_NONE:
|
||||
proc_data[group.indices] = old_data[group.indices]
|
||||
elif group.id != self.NORM_GROUP_SINGLE:
|
||||
avg = np.mean(new_data[group.indices])
|
||||
proc_data[group.indices] = avg
|
||||
return proc_data
|
||||
|
||||
@@ -1,46 +1,47 @@
|
||||
import numpy as np
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
|
||||
class Path(object):
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def pathlength(self):
|
||||
return len(self.actions)
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def is_valid(self):
|
||||
valid = True
|
||||
l = self.pathlength()
|
||||
valid &= len(self.states) == l + 1
|
||||
valid &= len(self.goals) == l + 1
|
||||
valid &= len(self.actions) == l
|
||||
valid &= len(self.logps) == l
|
||||
valid &= len(self.rewards) == l
|
||||
valid &= len(self.flags) == l
|
||||
def pathlength(self):
|
||||
return len(self.actions)
|
||||
|
||||
return valid
|
||||
def is_valid(self):
|
||||
valid = True
|
||||
l = self.pathlength()
|
||||
valid &= len(self.states) == l + 1
|
||||
valid &= len(self.goals) == l + 1
|
||||
valid &= len(self.actions) == l
|
||||
valid &= len(self.logps) == l
|
||||
valid &= len(self.rewards) == l
|
||||
valid &= len(self.flags) == l
|
||||
|
||||
def check_vals(self):
|
||||
for vals in [self.states, self.goals, self.actions, self.logps,
|
||||
self.rewards]:
|
||||
for v in vals:
|
||||
if not np.isfinite(v).all():
|
||||
return False
|
||||
return True
|
||||
return valid
|
||||
|
||||
def clear(self):
|
||||
self.states = []
|
||||
self.goals = []
|
||||
self.actions = []
|
||||
self.logps = []
|
||||
self.rewards = []
|
||||
self.flags = []
|
||||
self.terminate = Env.Terminate.Null
|
||||
return
|
||||
def check_vals(self):
|
||||
for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
|
||||
for v in vals:
|
||||
if not np.isfinite(v).all():
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_pathlen(self):
|
||||
return len(self.rewards)
|
||||
def clear(self):
|
||||
self.states = []
|
||||
self.goals = []
|
||||
self.actions = []
|
||||
self.logps = []
|
||||
self.rewards = []
|
||||
self.flags = []
|
||||
self.terminate = Env.Terminate.Null
|
||||
return
|
||||
|
||||
def calc_return(self):
|
||||
return sum(self.rewards)
|
||||
def get_pathlen(self):
|
||||
return len(self.rewards)
|
||||
|
||||
def calc_return(self):
|
||||
return sum(self.rewards)
|
||||
|
||||
@@ -13,341 +13,343 @@ import pybullet_utils.mpi_util as MPIUtil
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
'''
|
||||
Policy Gradient Agent
|
||||
'''
|
||||
|
||||
|
||||
class PGAgent(TFAgent):
|
||||
NAME = 'PG'
|
||||
NAME = 'PG'
|
||||
|
||||
ACTOR_NET_KEY = 'ActorNet'
|
||||
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
|
||||
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
|
||||
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
|
||||
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
|
||||
ACTOR_NET_KEY = 'ActorNet'
|
||||
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
|
||||
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
|
||||
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
|
||||
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
|
||||
|
||||
CRITIC_NET_KEY = 'CriticNet'
|
||||
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
|
||||
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
|
||||
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
|
||||
|
||||
EXP_ACTION_FLAG = 1 << 0
|
||||
CRITIC_NET_KEY = 'CriticNet'
|
||||
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
|
||||
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
|
||||
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
self._exp_action = False
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
EXP_ACTION_FLAG = 1 << 0
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self._exp_action = False
|
||||
return
|
||||
def __init__(self, world, id, json_data):
|
||||
self._exp_action = False
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
|
||||
def _check_action_space(self):
|
||||
action_space = self.get_action_space()
|
||||
return action_space == ActionSpace.Continuous
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self._exp_action = False
|
||||
return
|
||||
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
|
||||
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
|
||||
return
|
||||
def _check_action_space(self):
|
||||
action_space = self.get_action_space()
|
||||
return action_space == ActionSpace.Continuous
|
||||
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
|
||||
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
|
||||
return
|
||||
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
|
||||
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
|
||||
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
|
||||
if (self.actor_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
|
||||
self.g_tf = tf.placeholder(tf.float32,
|
||||
shape=([None, g_size] if self.has_goal() else None),
|
||||
name="g") # goals
|
||||
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
return
|
||||
if (self.actor_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
|
||||
def _build_normalizers(self):
|
||||
super()._build_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
|
||||
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
|
||||
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
|
||||
return
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
def _init_normalizers(self):
|
||||
super()._init_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.val_norm.update()
|
||||
return
|
||||
return
|
||||
|
||||
def _load_normalizers(self):
|
||||
super()._load_normalizers()
|
||||
self.val_norm.load()
|
||||
return
|
||||
def _build_normalizers(self):
|
||||
super()._build_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
|
||||
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
|
||||
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
|
||||
return
|
||||
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
def _init_normalizers(self):
|
||||
super()._init_normalizers()
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.val_norm.update()
|
||||
return
|
||||
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
def _load_normalizers(self):
|
||||
super()._load_normalizers()
|
||||
self.val_norm.load()
|
||||
return
|
||||
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
|
||||
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (
|
||||
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (
|
||||
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
|
||||
self.actor_loss_tf *= self.adv_tf
|
||||
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
|
||||
self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
a_bound_loss /= self.exp_params_curr.noise
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
return
|
||||
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
|
||||
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
|
||||
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
|
||||
self.actor_loss_tf *= self.adv_tf
|
||||
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
|
||||
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
a_bound_loss /= self.exp_params_curr.noise
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
|
||||
return
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
def _build_net_actor(self, net_name, init_output_scale):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
|
||||
kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
|
||||
|
||||
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
|
||||
return a_tf
|
||||
|
||||
def _build_net_critic(self, net_name):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
|
||||
kernel_initializer=TFUtil.xavier_initializer);
|
||||
return
|
||||
|
||||
norm_val_tf = tf.reshape(norm_val_tf, [-1])
|
||||
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
|
||||
return val_tf
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (
|
||||
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (
|
||||
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (
|
||||
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (
|
||||
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
def _initialize_vars(self):
|
||||
super()._initialize_vars()
|
||||
self._sync_solvers()
|
||||
return
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
|
||||
momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
|
||||
def _sync_solvers(self):
|
||||
self.actor_solver.sync()
|
||||
self.critic_solver.sync()
|
||||
return
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = False
|
||||
a = self._eval_actor(s, g)[0]
|
||||
logp = 0
|
||||
return
|
||||
|
||||
if self._enable_stoch_policy():
|
||||
# epsilon-greedy
|
||||
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
if rand_action:
|
||||
norm_exp_noise = np.random.randn(*a.shape)
|
||||
norm_exp_noise *= self.exp_params_curr.noise
|
||||
exp_noise = norm_exp_noise * self.a_norm.std
|
||||
a += exp_noise
|
||||
def _build_net_actor(self, net_name, init_output_scale):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
logp = self._calc_action_logp(norm_exp_noise)
|
||||
self._exp_action = True
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_a_tf = tf.layers.dense(inputs=h,
|
||||
units=self.get_action_size(),
|
||||
activation=None,
|
||||
kernel_initializer=tf.random_uniform_initializer(
|
||||
minval=-init_output_scale, maxval=init_output_scale))
|
||||
|
||||
return a, logp
|
||||
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
|
||||
return a_tf
|
||||
|
||||
def _enable_stoch_policy(self):
|
||||
return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
|
||||
def _build_net_critic(self, net_name):
|
||||
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||
input_tfs = [norm_s_tf]
|
||||
if (self.has_goal()):
|
||||
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||
input_tfs += [norm_g_tf]
|
||||
|
||||
def _eval_actor(self, s, g):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g
|
||||
}
|
||||
h = NetBuilder.build_net(net_name, input_tfs)
|
||||
norm_val_tf = tf.layers.dense(inputs=h,
|
||||
units=1,
|
||||
activation=None,
|
||||
kernel_initializer=TFUtil.xavier_initializer)
|
||||
|
||||
a = self.actor_tf.eval(feed)
|
||||
return a
|
||||
|
||||
def _eval_critic(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
norm_val_tf = tf.reshape(norm_val_tf, [-1])
|
||||
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
|
||||
return val_tf
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g
|
||||
}
|
||||
def _initialize_vars(self):
|
||||
super()._initialize_vars()
|
||||
self._sync_solvers()
|
||||
return
|
||||
|
||||
val = self.critic_tf.eval(feed)
|
||||
return val
|
||||
def _sync_solvers(self):
|
||||
self.actor_solver.sync()
|
||||
self.critic_solver.sync()
|
||||
return
|
||||
|
||||
def _record_flags(self):
|
||||
flags = int(0)
|
||||
if (self._exp_action):
|
||||
flags = flags | self.EXP_ACTION_FLAG
|
||||
return flags
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = False
|
||||
a = self._eval_actor(s, g)[0]
|
||||
logp = 0
|
||||
|
||||
def _train_step(self):
|
||||
super()._train_step()
|
||||
if self._enable_stoch_policy():
|
||||
# epsilon-greedy
|
||||
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
if rand_action:
|
||||
norm_exp_noise = np.random.randn(*a.shape)
|
||||
norm_exp_noise *= self.exp_params_curr.noise
|
||||
exp_noise = norm_exp_noise * self.a_norm.std
|
||||
a += exp_noise
|
||||
|
||||
critic_loss = self._update_critic()
|
||||
actor_loss = self._update_actor()
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
logp = self._calc_action_logp(norm_exp_noise)
|
||||
self._exp_action = True
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
return a, logp
|
||||
|
||||
return
|
||||
def _enable_stoch_policy(self):
|
||||
return self.enable_training and (self._mode == self.Mode.TRAIN or
|
||||
self._mode == self.Mode.TRAIN_END)
|
||||
|
||||
def _update_critic(self):
|
||||
idx = self.replay_buffer.sample(self._local_mini_batch_size)
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
|
||||
|
||||
tar_V = self._calc_updated_vals(idx)
|
||||
tar_V = np.clip(tar_V, self.val_min, self.val_max)
|
||||
def _eval_actor(self, s, g):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.tar_val_tf: tar_V
|
||||
}
|
||||
feed = {self.s_tf: s, self.g_tf: g}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self):
|
||||
key = self.EXP_ACTION_FLAG
|
||||
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
|
||||
has_goal = self.has_goal()
|
||||
a = self.actor_tf.eval(feed)
|
||||
return a
|
||||
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if has_goal else None
|
||||
a = self.replay_buffer.get('actions', idx)
|
||||
def _eval_critic(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
V_new = self._calc_updated_vals(idx)
|
||||
V_old = self._eval_critic(s, g)
|
||||
adv = V_new - V_old
|
||||
feed = {self.s_tf: s, self.g_tf: g}
|
||||
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.a_tf: a,
|
||||
self.adv_tf: adv
|
||||
}
|
||||
val = self.critic_tf.eval(feed)
|
||||
return val
|
||||
|
||||
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
def _record_flags(self):
|
||||
flags = int(0)
|
||||
if (self._exp_action):
|
||||
flags = flags | self.EXP_ACTION_FLAG
|
||||
return flags
|
||||
|
||||
return loss
|
||||
def _train_step(self):
|
||||
super()._train_step()
|
||||
|
||||
def _calc_updated_vals(self, idx):
|
||||
r = self.replay_buffer.get('rewards', idx)
|
||||
critic_loss = self._update_critic()
|
||||
actor_loss = self._update_actor()
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
|
||||
if self.discount == 0:
|
||||
new_V = r
|
||||
else:
|
||||
next_idx = self.replay_buffer.get_next_idx(idx)
|
||||
s_next = self.replay_buffer.get('states', next_idx)
|
||||
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
|
||||
V_next = self._eval_critic(s_next, g_next)
|
||||
V_next[is_fail] = self.val_fail
|
||||
V_next[is_succ] = self.val_succ
|
||||
return
|
||||
|
||||
new_V = r + self.discount * V_next
|
||||
return new_V
|
||||
def _update_critic(self):
|
||||
idx = self.replay_buffer.sample(self._local_mini_batch_size)
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
|
||||
|
||||
def _calc_action_logp(self, norm_action_deltas):
|
||||
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
|
||||
stdev = self.exp_params_curr.noise
|
||||
assert stdev > 0
|
||||
tar_V = self._calc_updated_vals(idx)
|
||||
tar_V = np.clip(tar_V, self.val_min, self.val_max)
|
||||
|
||||
a_size = self.get_action_size()
|
||||
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
|
||||
logp += -0.5 * a_size * np.log(2 * np.pi)
|
||||
logp += -a_size * np.log(stdev)
|
||||
return logp
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_V}
|
||||
|
||||
def _log_val(self, s, g):
|
||||
val = self._eval_critic(s, g)
|
||||
norm_val = self.val_norm.normalize(val)
|
||||
self.world.env.log_val(self.id, norm_val[0])
|
||||
return
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _build_replay_buffer(self, buffer_size):
|
||||
super()._build_replay_buffer(buffer_size)
|
||||
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
|
||||
return
|
||||
def _update_actor(self):
|
||||
key = self.EXP_ACTION_FLAG
|
||||
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
|
||||
has_goal = self.has_goal()
|
||||
|
||||
s = self.replay_buffer.get('states', idx)
|
||||
g = self.replay_buffer.get('goals', idx) if has_goal else None
|
||||
a = self.replay_buffer.get('actions', idx)
|
||||
|
||||
V_new = self._calc_updated_vals(idx)
|
||||
V_old = self._eval_critic(s, g)
|
||||
adv = V_new - V_old
|
||||
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv}
|
||||
|
||||
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss
|
||||
|
||||
def _calc_updated_vals(self, idx):
|
||||
r = self.replay_buffer.get('rewards', idx)
|
||||
|
||||
if self.discount == 0:
|
||||
new_V = r
|
||||
else:
|
||||
next_idx = self.replay_buffer.get_next_idx(idx)
|
||||
s_next = self.replay_buffer.get('states', next_idx)
|
||||
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
|
||||
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
|
||||
V_next = self._eval_critic(s_next, g_next)
|
||||
V_next[is_fail] = self.val_fail
|
||||
V_next[is_succ] = self.val_succ
|
||||
|
||||
new_V = r + self.discount * V_next
|
||||
return new_V
|
||||
|
||||
def _calc_action_logp(self, norm_action_deltas):
|
||||
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
|
||||
stdev = self.exp_params_curr.noise
|
||||
assert stdev > 0
|
||||
|
||||
a_size = self.get_action_size()
|
||||
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
|
||||
logp += -0.5 * a_size * np.log(2 * np.pi)
|
||||
logp += -a_size * np.log(stdev)
|
||||
return logp
|
||||
|
||||
def _log_val(self, s, g):
|
||||
val = self._eval_critic(s, g)
|
||||
norm_val = self.val_norm.normalize(val)
|
||||
self.world.env.log_val(self.id, norm_val[0])
|
||||
return
|
||||
|
||||
def _build_replay_buffer(self, buffer_size):
|
||||
super()._build_replay_buffer(buffer_size)
|
||||
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
|
||||
return
|
||||
|
||||
@@ -10,359 +10,374 @@ from pybullet_utils.logger import Logger
|
||||
import pybullet_utils.mpi_util as MPIUtil
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
|
||||
'''
|
||||
Proximal Policy Optimization Agent
|
||||
'''
|
||||
|
||||
|
||||
class PPOAgent(PGAgent):
|
||||
NAME = "PPO"
|
||||
EPOCHS_KEY = "Epochs"
|
||||
BATCH_SIZE_KEY = "BatchSize"
|
||||
RATIO_CLIP_KEY = "RatioClip"
|
||||
NORM_ADV_CLIP_KEY = "NormAdvClip"
|
||||
TD_LAMBDA_KEY = "TDLambda"
|
||||
TAR_CLIP_FRAC = "TarClipFrac"
|
||||
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
|
||||
NAME = "PPO"
|
||||
EPOCHS_KEY = "Epochs"
|
||||
BATCH_SIZE_KEY = "BatchSize"
|
||||
RATIO_CLIP_KEY = "RatioClip"
|
||||
NORM_ADV_CLIP_KEY = "NormAdvClip"
|
||||
TD_LAMBDA_KEY = "TDLambda"
|
||||
TAR_CLIP_FRAC = "TarClipFrac"
|
||||
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
def __init__(self, world, id, json_data):
|
||||
super().__init__(world, id, json_data)
|
||||
return
|
||||
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
def _load_params(self, json_data):
|
||||
super()._load_params(json_data)
|
||||
|
||||
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
|
||||
self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
|
||||
self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
|
||||
self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
|
||||
self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
|
||||
self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
|
||||
self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
|
||||
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
|
||||
self.batch_size = 1024 if (
|
||||
self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
|
||||
self.ratio_clip = 0.2 if (
|
||||
self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
|
||||
self.norm_adv_clip = 5 if (
|
||||
self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
|
||||
self.td_lambda = 0.95 if (
|
||||
self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
|
||||
self.tar_clip_frac = -1 if (
|
||||
self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
|
||||
self.actor_stepsize_decay = 0.5 if (
|
||||
self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
|
||||
|
||||
num_procs = MPIUtil.get_num_procs()
|
||||
local_batch_size = int(self.batch_size / num_procs)
|
||||
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
|
||||
assert(self.replay_buffer_size > min_replay_size)
|
||||
num_procs = MPIUtil.get_num_procs()
|
||||
local_batch_size = int(self.batch_size / num_procs)
|
||||
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
|
||||
assert (self.replay_buffer_size > min_replay_size)
|
||||
|
||||
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
|
||||
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
def _build_nets(self, json_data):
|
||||
assert self.ACTOR_NET_KEY in json_data
|
||||
assert self.CRITIC_NET_KEY in json_data
|
||||
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data
|
||||
) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
s_size = self.get_state_size()
|
||||
g_size = self.get_goal_size()
|
||||
a_size = self.get_action_size()
|
||||
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
|
||||
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
|
||||
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
|
||||
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
|
||||
# setup input tensors
|
||||
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
|
||||
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
|
||||
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
|
||||
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
|
||||
self.g_tf = tf.placeholder(tf.float32,
|
||||
shape=([None, g_size] if self.has_goal() else None),
|
||||
name="g")
|
||||
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
|
||||
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
|
||||
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
if (self.a_mean_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
with tf.variable_scope('main'):
|
||||
with tf.variable_scope('actor'):
|
||||
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||
with tf.variable_scope('critic'):
|
||||
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
|
||||
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
|
||||
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
|
||||
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
|
||||
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
|
||||
if (self.a_mean_tf != None):
|
||||
Logger.print2('Built actor net: ' + actor_net_name)
|
||||
|
||||
return
|
||||
if (self.critic_tf != None):
|
||||
Logger.print2('Built critic net: ' + critic_net_name)
|
||||
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
|
||||
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
|
||||
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
|
||||
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
|
||||
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf,
|
||||
mean_tf=None,
|
||||
std_tf=self.norm_a_std_tf)
|
||||
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
|
||||
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
|
||||
return
|
||||
|
||||
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
|
||||
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
|
||||
actor_loss0 = self.adv_tf * ratio_tf
|
||||
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
|
||||
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
|
||||
def _build_losses(self, json_data):
|
||||
actor_weight_decay = 0 if (
|
||||
self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||
critic_weight_decay = 0 if (
|
||||
self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(
|
||||
self.critic_tf)
|
||||
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
# for debugging
|
||||
self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
|
||||
if (critic_weight_decay != 0):
|
||||
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||
|
||||
return
|
||||
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
|
||||
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
|
||||
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf,
|
||||
self.norm_a_std_tf)
|
||||
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
|
||||
actor_loss0 = self.adv_tf * ratio_tf
|
||||
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip,
|
||||
1 + self.ratio_clip)
|
||||
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
|
||||
|
||||
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
|
||||
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
|
||||
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
|
||||
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||
self.actor_loss_tf += a_bound_loss
|
||||
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
return
|
||||
if (actor_weight_decay != 0):
|
||||
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||
#print("_decide_action._exp_action=",self._exp_action)
|
||||
a, logp = self._eval_actor(s, g, self._exp_action)
|
||||
return a[0], logp[0]
|
||||
# for debugging
|
||||
self.clip_frac_tf = tf.reduce_mean(
|
||||
tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
|
||||
|
||||
def _eval_actor(self, s, g, enable_exp):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
feed = {
|
||||
self.s_tf : s,
|
||||
self.g_tf : g,
|
||||
self.exp_mask_tf: np.array([1 if enable_exp else 0])
|
||||
}
|
||||
return
|
||||
|
||||
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
|
||||
return a, logp
|
||||
def _build_solvers(self, json_data):
|
||||
actor_stepsize = 0.001 if (
|
||||
self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||
actor_momentum = 0.9 if (
|
||||
self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||
critic_stepsize = 0.01 if (
|
||||
self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||
critic_momentum = 0.9 if (
|
||||
self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||
|
||||
def _train_step(self):
|
||||
adv_eps = 1e-5
|
||||
critic_vars = self._tf_vars('main/critic')
|
||||
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize,
|
||||
momentum=critic_momentum)
|
||||
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||
|
||||
start_idx = self.replay_buffer.buffer_tail
|
||||
end_idx = self.replay_buffer.buffer_head
|
||||
assert(start_idx == 0)
|
||||
assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
|
||||
assert(start_idx < end_idx)
|
||||
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='actor_stepsize',
|
||||
initializer=actor_stepsize,
|
||||
trainable=False)
|
||||
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
|
||||
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
end_mask = self.replay_buffer.is_path_end(idx)
|
||||
end_mask = np.logical_not(end_mask)
|
||||
|
||||
vals = self._compute_batch_vals(start_idx, end_idx)
|
||||
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
|
||||
actor_vars = self._tf_vars('main/actor')
|
||||
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf,
|
||||
momentum=actor_momentum)
|
||||
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||
|
||||
valid_idx = idx[end_mask]
|
||||
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
|
||||
num_valid_idx = valid_idx.shape[0]
|
||||
num_exp_idx = exp_idx.shape[0]
|
||||
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
|
||||
|
||||
local_sample_count = valid_idx.size
|
||||
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
|
||||
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
|
||||
|
||||
adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
|
||||
new_vals = np.clip(new_vals, self.val_min, self.val_max)
|
||||
return
|
||||
|
||||
adv_mean = np.mean(adv)
|
||||
adv_std = np.std(adv)
|
||||
adv = (adv - adv_mean) / (adv_std + adv_eps)
|
||||
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
|
||||
def _decide_action(self, s, g):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(
|
||||
self.exp_params_curr.rate)
|
||||
#print("_decide_action._exp_action=",self._exp_action)
|
||||
a, logp = self._eval_actor(s, g, self._exp_action)
|
||||
return a[0], logp[0]
|
||||
|
||||
critic_loss = 0
|
||||
actor_loss = 0
|
||||
actor_clip_frac = 0
|
||||
def _eval_actor(self, s, g, enable_exp):
|
||||
s = np.reshape(s, [-1, self.get_state_size()])
|
||||
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||
|
||||
for e in range(self.epochs):
|
||||
np.random.shuffle(valid_idx)
|
||||
np.random.shuffle(exp_idx)
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.exp_mask_tf: np.array([1 if enable_exp else 0])}
|
||||
|
||||
for b in range(mini_batches):
|
||||
batch_idx_beg = b * self._local_mini_batch_size
|
||||
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
|
||||
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
|
||||
return a, logp
|
||||
|
||||
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
|
||||
actor_batch = critic_batch.copy()
|
||||
critic_batch = np.mod(critic_batch, num_valid_idx)
|
||||
actor_batch = np.mod(actor_batch, num_exp_idx)
|
||||
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
|
||||
def _train_step(self):
|
||||
adv_eps = 1e-5
|
||||
|
||||
critic_batch = valid_idx[critic_batch]
|
||||
actor_batch = exp_idx[actor_batch]
|
||||
critic_batch_vals = new_vals[critic_batch]
|
||||
actor_batch_adv = adv[actor_batch[:,1]]
|
||||
start_idx = self.replay_buffer.buffer_tail
|
||||
end_idx = self.replay_buffer.buffer_head
|
||||
assert (start_idx == 0)
|
||||
assert (self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size
|
||||
) # must avoid overflow
|
||||
assert (start_idx < end_idx)
|
||||
|
||||
critic_s = self.replay_buffer.get('states', critic_batch)
|
||||
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
|
||||
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
end_mask = self.replay_buffer.is_path_end(idx)
|
||||
end_mask = np.logical_not(end_mask)
|
||||
|
||||
actor_s = self.replay_buffer.get("states", actor_batch[:,0])
|
||||
actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
|
||||
actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
|
||||
actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
|
||||
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
|
||||
|
||||
critic_loss += curr_critic_loss
|
||||
actor_loss += np.abs(curr_actor_loss)
|
||||
actor_clip_frac += curr_actor_clip_frac
|
||||
vals = self._compute_batch_vals(start_idx, end_idx)
|
||||
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
|
||||
|
||||
if (shuffle_actor):
|
||||
np.random.shuffle(exp_idx)
|
||||
valid_idx = idx[end_mask]
|
||||
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
|
||||
num_valid_idx = valid_idx.shape[0]
|
||||
num_exp_idx = exp_idx.shape[0]
|
||||
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
|
||||
|
||||
total_batches = mini_batches * self.epochs
|
||||
critic_loss /= total_batches
|
||||
actor_loss /= total_batches
|
||||
actor_clip_frac /= total_batches
|
||||
local_sample_count = valid_idx.size
|
||||
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
|
||||
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
|
||||
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
|
||||
adv = new_vals[exp_idx[:, 0]] - vals[exp_idx[:, 0]]
|
||||
new_vals = np.clip(new_vals, self.val_min, self.val_max)
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
|
||||
adv_mean = np.mean(adv)
|
||||
adv_std = np.std(adv)
|
||||
adv = (adv - adv_mean) / (adv_std + adv_eps)
|
||||
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
|
||||
self.logger.log_tabular('Adv_Mean', adv_mean)
|
||||
self.logger.log_tabular('Adv_Std', adv_std)
|
||||
critic_loss = 0
|
||||
actor_loss = 0
|
||||
actor_clip_frac = 0
|
||||
|
||||
self.replay_buffer.clear()
|
||||
for e in range(self.epochs):
|
||||
np.random.shuffle(valid_idx)
|
||||
np.random.shuffle(exp_idx)
|
||||
|
||||
return
|
||||
for b in range(mini_batches):
|
||||
batch_idx_beg = b * self._local_mini_batch_size
|
||||
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
|
||||
|
||||
def _get_iters_per_update(self):
|
||||
return 1
|
||||
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
|
||||
actor_batch = critic_batch.copy()
|
||||
critic_batch = np.mod(critic_batch, num_valid_idx)
|
||||
actor_batch = np.mod(actor_batch, num_exp_idx)
|
||||
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
|
||||
|
||||
def _valid_train_step(self):
|
||||
samples = self.replay_buffer.get_current_size()
|
||||
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
|
||||
global_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
|
||||
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
|
||||
critic_batch = valid_idx[critic_batch]
|
||||
actor_batch = exp_idx[actor_batch]
|
||||
critic_batch_vals = new_vals[critic_batch]
|
||||
actor_batch_adv = adv[actor_batch[:, 1]]
|
||||
|
||||
def _compute_batch_vals(self, start_idx, end_idx):
|
||||
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
|
||||
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
critic_s = self.replay_buffer.get('states', critic_batch)
|
||||
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
|
||||
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
|
||||
|
||||
vals = self._eval_critic(states, goals)
|
||||
vals[is_fail] = self.val_fail
|
||||
vals[is_succ] = self.val_succ
|
||||
actor_s = self.replay_buffer.get("states", actor_batch[:, 0])
|
||||
actor_g = self.replay_buffer.get("goals", actor_batch[:, 0]) if self.has_goal() else None
|
||||
actor_a = self.replay_buffer.get("actions", actor_batch[:, 0])
|
||||
actor_logp = self.replay_buffer.get("logps", actor_batch[:, 0])
|
||||
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a,
|
||||
actor_logp, actor_batch_adv)
|
||||
|
||||
return vals
|
||||
critic_loss += curr_critic_loss
|
||||
actor_loss += np.abs(curr_actor_loss)
|
||||
actor_clip_frac += curr_actor_clip_frac
|
||||
|
||||
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
|
||||
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
|
||||
if (shuffle_actor):
|
||||
np.random.shuffle(exp_idx)
|
||||
|
||||
if self.discount == 0:
|
||||
new_vals = rewards.copy()
|
||||
total_batches = mini_batches * self.epochs
|
||||
critic_loss /= total_batches
|
||||
actor_loss /= total_batches
|
||||
actor_clip_frac /= total_batches
|
||||
|
||||
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
|
||||
|
||||
critic_stepsize = self.critic_solver.get_stepsize()
|
||||
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
|
||||
|
||||
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
|
||||
self.logger.log_tabular('Adv_Mean', adv_mean)
|
||||
self.logger.log_tabular('Adv_Std', adv_std)
|
||||
|
||||
self.replay_buffer.clear()
|
||||
|
||||
return
|
||||
|
||||
def _get_iters_per_update(self):
|
||||
return 1
|
||||
|
||||
def _valid_train_step(self):
|
||||
samples = self.replay_buffer.get_current_size()
|
||||
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
|
||||
global_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
|
||||
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
|
||||
|
||||
def _compute_batch_vals(self, start_idx, end_idx):
|
||||
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
|
||||
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
|
||||
|
||||
idx = np.array(list(range(start_idx, end_idx)))
|
||||
is_end = self.replay_buffer.is_path_end(idx)
|
||||
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||
is_fail = np.logical_and(is_end, is_fail)
|
||||
is_succ = np.logical_and(is_end, is_succ)
|
||||
|
||||
vals = self._eval_critic(states, goals)
|
||||
vals[is_fail] = self.val_fail
|
||||
vals[is_succ] = self.val_succ
|
||||
|
||||
return vals
|
||||
|
||||
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
|
||||
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
|
||||
|
||||
if self.discount == 0:
|
||||
new_vals = rewards.copy()
|
||||
else:
|
||||
new_vals = np.zeros_like(val_buffer)
|
||||
|
||||
curr_idx = start_idx
|
||||
while curr_idx < end_idx:
|
||||
idx0 = curr_idx - start_idx
|
||||
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
|
||||
r = rewards[idx0:idx1]
|
||||
v = val_buffer[idx0:(idx1 + 1)]
|
||||
|
||||
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
|
||||
curr_idx = idx1 + start_idx + 1
|
||||
|
||||
return new_vals
|
||||
|
||||
def _update_critic(self, s, g, tar_vals):
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.tar_val_tf: tar_vals}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self, s, g, a, logp, adv):
|
||||
feed = {self.s_tf: s, self.g_tf: g, self.a_tf: a, self.adv_tf: adv, self.old_logp_tf: logp}
|
||||
|
||||
loss, grads, clip_frac = self.sess.run(
|
||||
[self.actor_loss_tf, self.actor_grad_tf, self.clip_frac_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss, clip_frac
|
||||
|
||||
def update_actor_stepsize(self, clip_frac):
|
||||
clip_tol = 1.5
|
||||
step_scale = 2
|
||||
max_stepsize = 1e-2
|
||||
min_stepsize = 1e-8
|
||||
warmup_iters = 5
|
||||
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
|
||||
min_clip = self.tar_clip_frac / clip_tol
|
||||
max_clip = self.tar_clip_frac * clip_tol
|
||||
under_tol = clip_frac < min_clip
|
||||
over_tol = clip_frac > max_clip
|
||||
|
||||
if (over_tol or under_tol):
|
||||
if (over_tol):
|
||||
actor_stepsize *= self.actor_stepsize_decay
|
||||
else:
|
||||
new_vals = np.zeros_like(val_buffer)
|
||||
actor_stepsize /= self.actor_stepsize_decay
|
||||
|
||||
curr_idx = start_idx
|
||||
while curr_idx < end_idx:
|
||||
idx0 = curr_idx - start_idx
|
||||
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
|
||||
r = rewards[idx0:idx1]
|
||||
v = val_buffer[idx0:(idx1 + 1)]
|
||||
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
|
||||
self.set_actor_stepsize(actor_stepsize)
|
||||
|
||||
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
|
||||
curr_idx = idx1 + start_idx + 1
|
||||
|
||||
return new_vals
|
||||
return actor_stepsize
|
||||
|
||||
def _update_critic(self, s, g, tar_vals):
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.tar_val_tf: tar_vals
|
||||
}
|
||||
|
||||
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||
self.critic_solver.update(grads)
|
||||
return loss
|
||||
|
||||
def _update_actor(self, s, g, a, logp, adv):
|
||||
feed = {
|
||||
self.s_tf: s,
|
||||
self.g_tf: g,
|
||||
self.a_tf: a,
|
||||
self.adv_tf: adv,
|
||||
self.old_logp_tf: logp
|
||||
}
|
||||
|
||||
loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
|
||||
self.clip_frac_tf], feed)
|
||||
self.actor_solver.update(grads)
|
||||
|
||||
return loss, clip_frac
|
||||
|
||||
def update_actor_stepsize(self, clip_frac):
|
||||
clip_tol = 1.5
|
||||
step_scale = 2
|
||||
max_stepsize = 1e-2
|
||||
min_stepsize = 1e-8
|
||||
warmup_iters = 5
|
||||
|
||||
actor_stepsize = self.actor_solver.get_stepsize()
|
||||
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
|
||||
min_clip = self.tar_clip_frac / clip_tol
|
||||
max_clip = self.tar_clip_frac * clip_tol
|
||||
under_tol = clip_frac < min_clip
|
||||
over_tol = clip_frac > max_clip
|
||||
|
||||
if (over_tol or under_tol):
|
||||
if (over_tol):
|
||||
actor_stepsize *= self.actor_stepsize_decay
|
||||
else:
|
||||
actor_stepsize /= self.actor_stepsize_decay
|
||||
|
||||
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
|
||||
self.set_actor_stepsize(actor_stepsize)
|
||||
|
||||
return actor_stepsize
|
||||
|
||||
def set_actor_stepsize(self, stepsize):
|
||||
feed = {
|
||||
self._actor_stepsize_ph: stepsize,
|
||||
}
|
||||
self.sess.run(self._actor_stepsize_update_op, feed)
|
||||
return
|
||||
def set_actor_stepsize(self, stepsize):
|
||||
feed = {
|
||||
self._actor_stepsize_ph: stepsize,
|
||||
}
|
||||
self.sess.run(self._actor_stepsize_update_op, feed)
|
||||
return
|
||||
|
||||
@@ -5,347 +5,353 @@ import inspect as inspect
|
||||
from pybullet_envs.deep_mimic.env.env import Env
|
||||
import pybullet_utils.math_util as MathUtil
|
||||
|
||||
|
||||
class ReplayBuffer(object):
|
||||
TERMINATE_KEY = 'terminate'
|
||||
PATH_START_KEY = 'path_start'
|
||||
PATH_END_KEY = 'path_end'
|
||||
TERMINATE_KEY = 'terminate'
|
||||
PATH_START_KEY = 'path_start'
|
||||
PATH_END_KEY = 'path_end'
|
||||
|
||||
def __init__(self, buffer_size):
|
||||
assert buffer_size > 0
|
||||
def __init__(self, buffer_size):
|
||||
assert buffer_size > 0
|
||||
|
||||
self.buffer_size = buffer_size
|
||||
self.total_count = 0
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
self._sample_buffers = dict()
|
||||
self.buffers = None
|
||||
self.buffer_size = buffer_size
|
||||
self.total_count = 0
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
self._sample_buffers = dict()
|
||||
self.buffers = None
|
||||
|
||||
self.clear()
|
||||
return
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
curr_size = self.get_current_size()
|
||||
assert curr_size > 0
|
||||
def sample(self, n):
|
||||
curr_size = self.get_current_size()
|
||||
assert curr_size > 0
|
||||
|
||||
idx = np.empty(n, dtype=int)
|
||||
# makes sure that the end states are not sampled
|
||||
for i in range(n):
|
||||
while True:
|
||||
curr_idx = np.random.randint(0, curr_size, size=1)[0]
|
||||
curr_idx += self.buffer_tail
|
||||
curr_idx = np.mod(curr_idx, self.buffer_size)
|
||||
idx = np.empty(n, dtype=int)
|
||||
# makes sure that the end states are not sampled
|
||||
for i in range(n):
|
||||
while True:
|
||||
curr_idx = np.random.randint(0, curr_size, size=1)[0]
|
||||
curr_idx += self.buffer_tail
|
||||
curr_idx = np.mod(curr_idx, self.buffer_size)
|
||||
|
||||
if not self.is_path_end(curr_idx):
|
||||
break
|
||||
idx[i] = curr_idx
|
||||
if not self.is_path_end(curr_idx):
|
||||
break
|
||||
idx[i] = curr_idx
|
||||
|
||||
return idx
|
||||
return idx
|
||||
|
||||
def sample_filtered(self, n, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.sample(n)
|
||||
return idx
|
||||
def sample_filtered(self, n, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.sample(n)
|
||||
return idx
|
||||
|
||||
def count_filtered(self, key):
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
return curr_buffer.count
|
||||
def count_filtered(self, key):
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
return curr_buffer.count
|
||||
|
||||
def get(self, key, idx):
|
||||
return self.buffers[key][idx]
|
||||
def get(self, key, idx):
|
||||
return self.buffers[key][idx]
|
||||
|
||||
def get_all(self, key):
|
||||
return self.buffers[key]
|
||||
def get_all(self, key):
|
||||
return self.buffers[key]
|
||||
|
||||
def get_idx_filtered(self, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
|
||||
return idx
|
||||
|
||||
def get_path_start(self, idx):
|
||||
return self.buffers[self.PATH_START_KEY][idx]
|
||||
def get_idx_filtered(self, key):
|
||||
assert key in self._sample_buffers
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
|
||||
return idx
|
||||
|
||||
def get_path_end(self, idx):
|
||||
return self.buffers[self.PATH_END_KEY][idx]
|
||||
def get_path_start(self, idx):
|
||||
return self.buffers[self.PATH_START_KEY][idx]
|
||||
|
||||
def get_pathlen(self, idx):
|
||||
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
|
||||
if not is_array:
|
||||
idx = [idx]
|
||||
def get_path_end(self, idx):
|
||||
return self.buffers[self.PATH_END_KEY][idx]
|
||||
|
||||
n = len(idx)
|
||||
start_idx = self.get_path_start(idx)
|
||||
end_idx = self.get_path_end(idx)
|
||||
pathlen = np.empty(n, dtype=int)
|
||||
def get_pathlen(self, idx):
|
||||
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
|
||||
if not is_array:
|
||||
idx = [idx]
|
||||
|
||||
for i in range(n):
|
||||
curr_start = start_idx[i]
|
||||
curr_end = end_idx[i]
|
||||
if curr_start < curr_end:
|
||||
curr_len = curr_end - curr_start
|
||||
else:
|
||||
curr_len = self.buffer_size - curr_start + curr_end
|
||||
pathlen[i] = curr_len
|
||||
n = len(idx)
|
||||
start_idx = self.get_path_start(idx)
|
||||
end_idx = self.get_path_end(idx)
|
||||
pathlen = np.empty(n, dtype=int)
|
||||
|
||||
if not is_array:
|
||||
pathlen = pathlen[0]
|
||||
for i in range(n):
|
||||
curr_start = start_idx[i]
|
||||
curr_end = end_idx[i]
|
||||
if curr_start < curr_end:
|
||||
curr_len = curr_end - curr_start
|
||||
else:
|
||||
curr_len = self.buffer_size - curr_start + curr_end
|
||||
pathlen[i] = curr_len
|
||||
|
||||
return pathlen
|
||||
if not is_array:
|
||||
pathlen = pathlen[0]
|
||||
|
||||
def is_valid_path(self, idx):
|
||||
start_idx = self.get_path_start(idx)
|
||||
valid = start_idx != MathUtil.INVALID_IDX
|
||||
return valid
|
||||
return pathlen
|
||||
|
||||
def store(self, path):
|
||||
start_idx = MathUtil.INVALID_IDX
|
||||
n = path.pathlength()
|
||||
|
||||
if (n > 0):
|
||||
assert path.is_valid()
|
||||
def is_valid_path(self, idx):
|
||||
start_idx = self.get_path_start(idx)
|
||||
valid = start_idx != MathUtil.INVALID_IDX
|
||||
return valid
|
||||
|
||||
if path.check_vals():
|
||||
if self.buffers is None:
|
||||
self._init_buffers(path)
|
||||
def store(self, path):
|
||||
start_idx = MathUtil.INVALID_IDX
|
||||
n = path.pathlength()
|
||||
|
||||
idx = self._request_idx(n + 1)
|
||||
self._store_path(path, idx)
|
||||
self._add_sample_buffers(idx)
|
||||
if (n > 0):
|
||||
assert path.is_valid()
|
||||
|
||||
self.num_paths += 1
|
||||
self.total_count += n + 1
|
||||
start_idx = idx[0]
|
||||
else:
|
||||
Logger.print2('Invalid path data value detected')
|
||||
|
||||
return start_idx
|
||||
if path.check_vals():
|
||||
if self.buffers is None:
|
||||
self._init_buffers(path)
|
||||
|
||||
def clear(self):
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
idx = self._request_idx(n + 1)
|
||||
self._store_path(path, idx)
|
||||
self._add_sample_buffers(idx)
|
||||
|
||||
for key in self._sample_buffers:
|
||||
self._sample_buffers[key].clear()
|
||||
return
|
||||
self.num_paths += 1
|
||||
self.total_count += n + 1
|
||||
start_idx = idx[0]
|
||||
else:
|
||||
Logger.print2('Invalid path data value detected')
|
||||
|
||||
def get_next_idx(self, idx):
|
||||
next_idx = np.mod(idx + 1, self.buffer_size)
|
||||
return next_idx
|
||||
return start_idx
|
||||
|
||||
def is_terminal_state(self, idx):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags != Env.Terminate.Null.value
|
||||
is_end = self.is_path_end(idx)
|
||||
terminal_state = np.logical_and(terminate, is_end)
|
||||
return terminal_state
|
||||
def clear(self):
|
||||
self.buffer_head = 0
|
||||
self.buffer_tail = MathUtil.INVALID_IDX
|
||||
self.num_paths = 0
|
||||
|
||||
def check_terminal_flag(self, idx, flag):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags == flag.value
|
||||
return terminate
|
||||
for key in self._sample_buffers:
|
||||
self._sample_buffers[key].clear()
|
||||
return
|
||||
|
||||
def is_path_end(self, idx):
|
||||
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
|
||||
return is_end
|
||||
def get_next_idx(self, idx):
|
||||
next_idx = np.mod(idx + 1, self.buffer_size)
|
||||
return next_idx
|
||||
|
||||
def add_filter_key(self, key):
|
||||
assert self.get_current_size() == 0
|
||||
if key not in self._sample_buffers:
|
||||
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
|
||||
return
|
||||
def is_terminal_state(self, idx):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags != Env.Terminate.Null.value
|
||||
is_end = self.is_path_end(idx)
|
||||
terminal_state = np.logical_and(terminate, is_end)
|
||||
return terminal_state
|
||||
|
||||
def get_current_size(self):
|
||||
if self.buffer_tail == MathUtil.INVALID_IDX:
|
||||
return 0
|
||||
elif self.buffer_tail < self.buffer_head:
|
||||
return self.buffer_head - self.buffer_tail
|
||||
def check_terminal_flag(self, idx, flag):
|
||||
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||
terminate = terminate_flags == flag.value
|
||||
return terminate
|
||||
|
||||
def is_path_end(self, idx):
|
||||
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
|
||||
return is_end
|
||||
|
||||
def add_filter_key(self, key):
|
||||
assert self.get_current_size() == 0
|
||||
if key not in self._sample_buffers:
|
||||
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
|
||||
return
|
||||
|
||||
def get_current_size(self):
|
||||
if self.buffer_tail == MathUtil.INVALID_IDX:
|
||||
return 0
|
||||
elif self.buffer_tail < self.buffer_head:
|
||||
return self.buffer_head - self.buffer_tail
|
||||
else:
|
||||
return self.buffer_size - self.buffer_tail + self.buffer_head
|
||||
|
||||
def _check_flags(self, key, flags):
|
||||
return (flags & key) == key
|
||||
|
||||
def _add_sample_buffers(self, idx):
|
||||
flags = self.buffers['flags']
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
filter_idx = [
|
||||
i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))
|
||||
]
|
||||
curr_buffer.add(filter_idx)
|
||||
return
|
||||
|
||||
def _free_sample_buffers(self, idx):
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
curr_buffer.free(idx)
|
||||
return
|
||||
|
||||
def _init_buffers(self, path):
|
||||
self.buffers = dict()
|
||||
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
|
||||
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int)
|
||||
|
||||
for key in dir(path):
|
||||
val = getattr(path, key)
|
||||
if not key.startswith('__') and not inspect.ismethod(val):
|
||||
if key == self.TERMINATE_KEY:
|
||||
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
|
||||
else:
|
||||
return self.buffer_size - self.buffer_tail + self.buffer_head
|
||||
val_type = type(val[0])
|
||||
is_array = val_type == np.ndarray
|
||||
if is_array:
|
||||
shape = [self.buffer_size, val[0].shape[0]]
|
||||
dtype = val[0].dtype
|
||||
else:
|
||||
shape = [self.buffer_size]
|
||||
dtype = val_type
|
||||
|
||||
def _check_flags(self, key, flags):
|
||||
return (flags & key) == key
|
||||
self.buffers[key] = np.zeros(shape, dtype=dtype)
|
||||
return
|
||||
|
||||
def _add_sample_buffers(self, idx):
|
||||
flags = self.buffers['flags']
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
|
||||
curr_buffer.add(filter_idx)
|
||||
return
|
||||
def _request_idx(self, n):
|
||||
assert n + 1 < self.buffer_size # bad things can happen if path is too long
|
||||
|
||||
def _free_sample_buffers(self, idx):
|
||||
for key in self._sample_buffers:
|
||||
curr_buffer = self._sample_buffers[key]
|
||||
curr_buffer.free(idx)
|
||||
return
|
||||
remainder = n
|
||||
idx = []
|
||||
|
||||
def _init_buffers(self, path):
|
||||
self.buffers = dict()
|
||||
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||
start_idx = self.buffer_head
|
||||
while remainder > 0:
|
||||
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
|
||||
remainder -= (end_idx - start_idx)
|
||||
|
||||
for key in dir(path):
|
||||
val = getattr(path, key)
|
||||
if not key.startswith('__') and not inspect.ismethod(val):
|
||||
if key == self.TERMINATE_KEY:
|
||||
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
|
||||
else:
|
||||
val_type = type(val[0])
|
||||
is_array = val_type == np.ndarray
|
||||
if is_array:
|
||||
shape = [self.buffer_size, val[0].shape[0]]
|
||||
dtype = val[0].dtype
|
||||
else:
|
||||
shape = [self.buffer_size]
|
||||
dtype = val_type
|
||||
|
||||
self.buffers[key] = np.zeros(shape, dtype=dtype)
|
||||
return
|
||||
free_idx = list(range(start_idx, end_idx))
|
||||
self._free_idx(free_idx)
|
||||
idx += free_idx
|
||||
start_idx = 0
|
||||
|
||||
def _request_idx(self, n):
|
||||
assert n + 1 < self.buffer_size # bad things can happen if path is too long
|
||||
self.buffer_head = (self.buffer_head + n) % self.buffer_size
|
||||
return idx
|
||||
|
||||
remainder = n
|
||||
idx = []
|
||||
def _free_idx(self, idx):
|
||||
assert (idx[0] <= idx[-1])
|
||||
n = len(idx)
|
||||
if self.buffer_tail != MathUtil.INVALID_IDX:
|
||||
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
|
||||
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or
|
||||
idx[-1] >= self.buffer_tail)
|
||||
|
||||
start_idx = self.buffer_head
|
||||
while remainder > 0:
|
||||
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
|
||||
remainder -= (end_idx - start_idx)
|
||||
if update_tail:
|
||||
i = 0
|
||||
while i < n:
|
||||
curr_idx = idx[i]
|
||||
if self.is_valid_path(curr_idx):
|
||||
start_idx = self.get_path_start(curr_idx)
|
||||
end_idx = self.get_path_end(curr_idx)
|
||||
pathlen = self.get_pathlen(curr_idx)
|
||||
|
||||
free_idx = list(range(start_idx, end_idx))
|
||||
self._free_idx(free_idx)
|
||||
idx += free_idx
|
||||
start_idx = 0
|
||||
if start_idx < end_idx:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
|
||||
else:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
|
||||
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
|
||||
self._free_sample_buffers(list(range(0, end_idx + 1)))
|
||||
|
||||
self.buffer_head = (self.buffer_head + n) % self.buffer_size
|
||||
return idx
|
||||
self.num_paths -= 1
|
||||
i += pathlen + 1
|
||||
self.buffer_tail = (end_idx + 1) % self.buffer_size
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
self.buffer_tail = idx[0]
|
||||
return
|
||||
|
||||
def _free_idx(self, idx):
|
||||
assert(idx[0] <= idx[-1])
|
||||
n = len(idx)
|
||||
if self.buffer_tail != MathUtil.INVALID_IDX:
|
||||
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
|
||||
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
|
||||
|
||||
if update_tail:
|
||||
i = 0
|
||||
while i < n:
|
||||
curr_idx = idx[i]
|
||||
if self.is_valid_path(curr_idx):
|
||||
start_idx = self.get_path_start(curr_idx)
|
||||
end_idx = self.get_path_end(curr_idx)
|
||||
pathlen = self.get_pathlen(curr_idx)
|
||||
def _store_path(self, path, idx):
|
||||
n = path.pathlength()
|
||||
for key, data in self.buffers.items():
|
||||
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
|
||||
val = getattr(path, key)
|
||||
val_len = len(val)
|
||||
assert val_len == n or val_len == n + 1
|
||||
data[idx[:val_len]] = val
|
||||
|
||||
if start_idx < end_idx:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
|
||||
else:
|
||||
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
|
||||
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
|
||||
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
|
||||
self._free_sample_buffers(list(range(0, end_idx + 1)))
|
||||
|
||||
self.num_paths -= 1
|
||||
i += pathlen + 1
|
||||
self.buffer_tail = (end_idx + 1) % self.buffer_size;
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
self.buffer_tail = idx[0]
|
||||
return
|
||||
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
|
||||
self.buffers[self.PATH_START_KEY][idx] = idx[0]
|
||||
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
|
||||
return
|
||||
|
||||
def _store_path(self, path, idx):
|
||||
n = path.pathlength()
|
||||
for key, data in self.buffers.items():
|
||||
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
|
||||
val = getattr(path, key)
|
||||
val_len = len(val)
|
||||
assert val_len == n or val_len == n + 1
|
||||
data[idx[:val_len]] = val
|
||||
|
||||
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
|
||||
self.buffers[self.PATH_START_KEY][idx] = idx[0]
|
||||
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
|
||||
return
|
||||
|
||||
class SampleBuffer(object):
|
||||
def __init__(self, size):
|
||||
self.idx_to_slot = np.empty(shape=[size], dtype=int)
|
||||
self.slot_to_idx = np.empty(shape=[size], dtype=int)
|
||||
self.count = 0
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def clear(self):
|
||||
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
|
||||
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
|
||||
self.count = 0
|
||||
return
|
||||
|
||||
def is_valid(self, idx):
|
||||
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
|
||||
def __init__(self, size):
|
||||
self.idx_to_slot = np.empty(shape=[size], dtype=int)
|
||||
self.slot_to_idx = np.empty(shape=[size], dtype=int)
|
||||
self.count = 0
|
||||
self.clear()
|
||||
return
|
||||
|
||||
def get_size(self):
|
||||
return self.idx_to_slot.shape[0]
|
||||
def clear(self):
|
||||
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
|
||||
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
|
||||
self.count = 0
|
||||
return
|
||||
|
||||
def add(self, idx):
|
||||
for i in idx:
|
||||
if not self.is_valid(i):
|
||||
new_slot = self.count
|
||||
assert new_slot >= 0
|
||||
def is_valid(self, idx):
|
||||
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
|
||||
|
||||
self.idx_to_slot[i] = new_slot
|
||||
self.slot_to_idx[new_slot] = i
|
||||
self.count += 1
|
||||
return
|
||||
def get_size(self):
|
||||
return self.idx_to_slot.shape[0]
|
||||
|
||||
def free(self, idx):
|
||||
for i in idx:
|
||||
if self.is_valid(i):
|
||||
slot = self.idx_to_slot[i]
|
||||
last_slot = self.count - 1
|
||||
last_idx = self.slot_to_idx[last_slot]
|
||||
def add(self, idx):
|
||||
for i in idx:
|
||||
if not self.is_valid(i):
|
||||
new_slot = self.count
|
||||
assert new_slot >= 0
|
||||
|
||||
self.idx_to_slot[last_idx] = slot
|
||||
self.slot_to_idx[slot] = last_idx
|
||||
self.idx_to_slot[i] = MathUtil.INVALID_IDX
|
||||
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
|
||||
self.count -= 1
|
||||
return
|
||||
self.idx_to_slot[i] = new_slot
|
||||
self.slot_to_idx[new_slot] = i
|
||||
self.count += 1
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
if self.count > 0:
|
||||
slots = np.random.randint(0, self.count, size=n)
|
||||
idx = self.slot_to_idx[slots]
|
||||
else:
|
||||
idx = np.empty(shape=[0], dtype=int)
|
||||
return idx
|
||||
def free(self, idx):
|
||||
for i in idx:
|
||||
if self.is_valid(i):
|
||||
slot = self.idx_to_slot[i]
|
||||
last_slot = self.count - 1
|
||||
last_idx = self.slot_to_idx[last_slot]
|
||||
|
||||
def check_consistency(self):
|
||||
valid = True
|
||||
if self.count < 0:
|
||||
self.idx_to_slot[last_idx] = slot
|
||||
self.slot_to_idx[slot] = last_idx
|
||||
self.idx_to_slot[i] = MathUtil.INVALID_IDX
|
||||
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
|
||||
self.count -= 1
|
||||
return
|
||||
|
||||
def sample(self, n):
|
||||
if self.count > 0:
|
||||
slots = np.random.randint(0, self.count, size=n)
|
||||
idx = self.slot_to_idx[slots]
|
||||
else:
|
||||
idx = np.empty(shape=[0], dtype=int)
|
||||
return idx
|
||||
|
||||
def check_consistency(self):
|
||||
valid = True
|
||||
if self.count < 0:
|
||||
valid = False
|
||||
|
||||
if valid:
|
||||
for i in range(self.get_size()):
|
||||
if self.is_valid(i):
|
||||
s = self.idx_to_slot[i]
|
||||
if self.slot_to_idx[s] != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
if valid:
|
||||
for i in range(self.get_size()):
|
||||
if self.is_valid(i):
|
||||
s = self.idx_to_slot[i]
|
||||
if self.slot_to_idx[s] != i:
|
||||
valid = False
|
||||
break
|
||||
s2i = self.slot_to_idx[i]
|
||||
if s2i != MathUtil.INVALID_IDX:
|
||||
i2s = self.idx_to_slot[s2i]
|
||||
if i2s != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
s2i = self.slot_to_idx[i]
|
||||
if s2i != MathUtil.INVALID_IDX:
|
||||
i2s = self.idx_to_slot[s2i]
|
||||
if i2s != i:
|
||||
valid = False
|
||||
break
|
||||
|
||||
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
|
||||
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
|
||||
valid &= count0 == count1
|
||||
return valid
|
||||
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
|
||||
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
|
||||
valid &= count0 == count1
|
||||
return valid
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,18 +1,19 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compute_return(rewards, gamma, td_lambda, val_t):
|
||||
# computes td-lambda return of path
|
||||
path_len = len(rewards)
|
||||
assert len(val_t) == path_len + 1
|
||||
# computes td-lambda return of path
|
||||
path_len = len(rewards)
|
||||
assert len(val_t) == path_len + 1
|
||||
|
||||
return_t = np.zeros(path_len)
|
||||
last_val = rewards[-1] + gamma * val_t[-1]
|
||||
return_t[-1] = last_val
|
||||
return_t = np.zeros(path_len)
|
||||
last_val = rewards[-1] + gamma * val_t[-1]
|
||||
return_t[-1] = last_val
|
||||
|
||||
for i in reversed(range(0, path_len - 1)):
|
||||
curr_r = rewards[i]
|
||||
next_ret = return_t[i + 1]
|
||||
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
|
||||
return_t[i] = curr_val
|
||||
|
||||
return return_t
|
||||
for i in reversed(range(0, path_len - 1)):
|
||||
curr_r = rewards[i]
|
||||
next_ret = return_t[i + 1]
|
||||
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
|
||||
return_t[i] = curr_val
|
||||
|
||||
return return_t
|
||||
|
||||
@@ -5,139 +5,140 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
|
||||
from pybullet_utils.logger import Logger
|
||||
import pybullet_data
|
||||
|
||||
|
||||
class RLWorld(object):
|
||||
def __init__(self, env, arg_parser):
|
||||
TFUtil.disable_gpu()
|
||||
|
||||
self.env = env
|
||||
self.arg_parser = arg_parser
|
||||
self._enable_training = True
|
||||
self.train_agents = []
|
||||
self.parse_args(arg_parser)
|
||||
def __init__(self, env, arg_parser):
|
||||
TFUtil.disable_gpu()
|
||||
|
||||
self.build_agents()
|
||||
|
||||
return
|
||||
self.env = env
|
||||
self.arg_parser = arg_parser
|
||||
self._enable_training = True
|
||||
self.train_agents = []
|
||||
self.parse_args(arg_parser)
|
||||
|
||||
def get_enable_training(self):
|
||||
return self._enable_training
|
||||
|
||||
def set_enable_training(self, enable):
|
||||
self._enable_training = enable
|
||||
for i in range(len(self.agents)):
|
||||
curr_agent = self.agents[i]
|
||||
if curr_agent is not None:
|
||||
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
|
||||
curr_agent.enable_training = self.enable_training and enable_curr_train
|
||||
self.build_agents()
|
||||
|
||||
if (self._enable_training):
|
||||
self.env.set_mode(RLAgent.Mode.TRAIN)
|
||||
else:
|
||||
self.env.set_mode(RLAgent.Mode.TEST)
|
||||
return
|
||||
|
||||
return
|
||||
def get_enable_training(self):
|
||||
return self._enable_training
|
||||
|
||||
enable_training = property(get_enable_training, set_enable_training)
|
||||
|
||||
def parse_args(self, arg_parser):
|
||||
self.train_agents = self.arg_parser.parse_bools('train_agents')
|
||||
num_agents = self.env.get_num_agents()
|
||||
assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
|
||||
def set_enable_training(self, enable):
|
||||
self._enable_training = enable
|
||||
for i in range(len(self.agents)):
|
||||
curr_agent = self.agents[i]
|
||||
if curr_agent is not None:
|
||||
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
|
||||
curr_agent.enable_training = self.enable_training and enable_curr_train
|
||||
|
||||
return
|
||||
if (self._enable_training):
|
||||
self.env.set_mode(RLAgent.Mode.TRAIN)
|
||||
else:
|
||||
self.env.set_mode(RLAgent.Mode.TEST)
|
||||
|
||||
def shutdown(self):
|
||||
self.env.shutdown()
|
||||
return
|
||||
return
|
||||
|
||||
def build_agents(self):
|
||||
num_agents = self.env.get_num_agents()
|
||||
print("num_agents=",num_agents)
|
||||
self.agents = []
|
||||
enable_training = property(get_enable_training, set_enable_training)
|
||||
|
||||
Logger.print2('')
|
||||
Logger.print2('Num Agents: {:d}'.format(num_agents))
|
||||
def parse_args(self, arg_parser):
|
||||
self.train_agents = self.arg_parser.parse_bools('train_agents')
|
||||
num_agents = self.env.get_num_agents()
|
||||
assert (len(self.train_agents) == num_agents or len(self.train_agents) == 0)
|
||||
|
||||
agent_files = self.arg_parser.parse_strings('agent_files')
|
||||
print("len(agent_files)=",len(agent_files))
|
||||
assert(len(agent_files) == num_agents or len(agent_files) == 0)
|
||||
return
|
||||
|
||||
model_files = self.arg_parser.parse_strings('model_files')
|
||||
assert(len(model_files) == num_agents or len(model_files) == 0)
|
||||
def shutdown(self):
|
||||
self.env.shutdown()
|
||||
return
|
||||
|
||||
output_path = self.arg_parser.parse_string('output_path')
|
||||
int_output_path = self.arg_parser.parse_string('int_output_path')
|
||||
def build_agents(self):
|
||||
num_agents = self.env.get_num_agents()
|
||||
print("num_agents=", num_agents)
|
||||
self.agents = []
|
||||
|
||||
for i in range(num_agents):
|
||||
curr_file = agent_files[i]
|
||||
curr_agent = self._build_agent(i, curr_file)
|
||||
Logger.print2('')
|
||||
Logger.print2('Num Agents: {:d}'.format(num_agents))
|
||||
|
||||
if curr_agent is not None:
|
||||
curr_agent.output_dir = output_path
|
||||
curr_agent.int_output_dir = int_output_path
|
||||
Logger.print2(str(curr_agent))
|
||||
agent_files = self.arg_parser.parse_strings('agent_files')
|
||||
print("len(agent_files)=", len(agent_files))
|
||||
assert (len(agent_files) == num_agents or len(agent_files) == 0)
|
||||
|
||||
if (len(model_files) > 0):
|
||||
curr_model_file = model_files[i]
|
||||
if curr_model_file != 'none':
|
||||
curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
|
||||
model_files = self.arg_parser.parse_strings('model_files')
|
||||
assert (len(model_files) == num_agents or len(model_files) == 0)
|
||||
|
||||
self.agents.append(curr_agent)
|
||||
Logger.print2('')
|
||||
output_path = self.arg_parser.parse_string('output_path')
|
||||
int_output_path = self.arg_parser.parse_string('int_output_path')
|
||||
|
||||
self.set_enable_training(self.enable_training)
|
||||
for i in range(num_agents):
|
||||
curr_file = agent_files[i]
|
||||
curr_agent = self._build_agent(i, curr_file)
|
||||
|
||||
return
|
||||
if curr_agent is not None:
|
||||
curr_agent.output_dir = output_path
|
||||
curr_agent.int_output_dir = int_output_path
|
||||
Logger.print2(str(curr_agent))
|
||||
|
||||
def update(self, timestep):
|
||||
#print("world update!\n")
|
||||
self._update_agents(timestep)
|
||||
self._update_env(timestep)
|
||||
return
|
||||
if (len(model_files) > 0):
|
||||
curr_model_file = model_files[i]
|
||||
if curr_model_file != 'none':
|
||||
curr_agent.load_model(pybullet_data.getDataPath() + "/" + curr_model_file)
|
||||
|
||||
def reset(self):
|
||||
self._reset_agents()
|
||||
self._reset_env()
|
||||
return
|
||||
self.agents.append(curr_agent)
|
||||
Logger.print2('')
|
||||
|
||||
def end_episode(self):
|
||||
self._end_episode_agents();
|
||||
return
|
||||
self.set_enable_training(self.enable_training)
|
||||
|
||||
def _update_env(self, timestep):
|
||||
self.env.update(timestep)
|
||||
return
|
||||
return
|
||||
|
||||
def _update_agents(self, timestep):
|
||||
#print("len(agents)=",len(self.agents))
|
||||
for agent in self.agents:
|
||||
if (agent is not None):
|
||||
agent.update(timestep)
|
||||
return
|
||||
def update(self, timestep):
|
||||
#print("world update!\n")
|
||||
self._update_agents(timestep)
|
||||
self._update_env(timestep)
|
||||
return
|
||||
|
||||
def _reset_env(self):
|
||||
self.env.reset()
|
||||
return
|
||||
def reset(self):
|
||||
self._reset_agents()
|
||||
self._reset_env()
|
||||
return
|
||||
|
||||
def _reset_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.reset()
|
||||
return
|
||||
def end_episode(self):
|
||||
self._end_episode_agents()
|
||||
return
|
||||
|
||||
def _end_episode_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.end_episode()
|
||||
return
|
||||
def _update_env(self, timestep):
|
||||
self.env.update(timestep)
|
||||
return
|
||||
|
||||
def _build_agent(self, id, agent_file):
|
||||
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
|
||||
if (agent_file == 'none'):
|
||||
agent = None
|
||||
else:
|
||||
agent = AgentBuilder.build_agent(self, id, agent_file)
|
||||
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||
|
||||
return agent
|
||||
|
||||
def _update_agents(self, timestep):
|
||||
#print("len(agents)=",len(self.agents))
|
||||
for agent in self.agents:
|
||||
if (agent is not None):
|
||||
agent.update(timestep)
|
||||
return
|
||||
|
||||
def _reset_env(self):
|
||||
self.env.reset()
|
||||
return
|
||||
|
||||
def _reset_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.reset()
|
||||
return
|
||||
|
||||
def _end_episode_agents(self):
|
||||
for agent in self.agents:
|
||||
if (agent != None):
|
||||
agent.end_episode()
|
||||
return
|
||||
|
||||
def _build_agent(self, id, agent_file):
|
||||
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
|
||||
if (agent_file == 'none'):
|
||||
agent = None
|
||||
else:
|
||||
agent = AgentBuilder.build_agent(self, id, agent_file)
|
||||
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||
|
||||
return agent
|
||||
|
||||
@@ -8,96 +8,97 @@ from pybullet_utils.logger import Logger
|
||||
|
||||
from pybullet_envs.deep_mimic.learning.solvers.solver import Solver
|
||||
|
||||
|
||||
class MPISolver(Solver):
|
||||
CHECK_SYNC_ITERS = 1000
|
||||
CHECK_SYNC_ITERS = 1000
|
||||
|
||||
def __init__(self, sess, optimizer, vars):
|
||||
super().__init__(vars)
|
||||
self.sess = sess
|
||||
self.optimizer = optimizer
|
||||
self._build_grad_feed(vars)
|
||||
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
|
||||
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
|
||||
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
|
||||
def __init__(self, sess, optimizer, vars):
|
||||
super().__init__(vars)
|
||||
self.sess = sess
|
||||
self.optimizer = optimizer
|
||||
self._build_grad_feed(vars)
|
||||
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
|
||||
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
|
||||
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
|
||||
|
||||
self.iter = 0
|
||||
grad_dim = self._calc_grad_dim()
|
||||
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
|
||||
return
|
||||
self.iter = 0
|
||||
grad_dim = self._calc_grad_dim()
|
||||
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||
|
||||
def get_stepsize(self):
|
||||
return self.optimizer._learning_rate_tensor.eval()
|
||||
return
|
||||
|
||||
def update(self, grads=None, grad_scale=1.0):
|
||||
if grads is not None:
|
||||
self._flat_grad = MathUtil.flatten(grads)
|
||||
else:
|
||||
self._flat_grad.fill(0)
|
||||
return self.update_flatgrad(self._flat_grad, grad_scale)
|
||||
def get_stepsize(self):
|
||||
return self.optimizer._learning_rate_tensor.eval()
|
||||
|
||||
def update_flatgrad(self, flat_grad, grad_scale=1.0):
|
||||
if self.iter % self.CHECK_SYNC_ITERS == 0:
|
||||
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
|
||||
|
||||
if grad_scale != 1.0:
|
||||
flat_grad *= grad_scale
|
||||
def update(self, grads=None, grad_scale=1.0):
|
||||
if grads is not None:
|
||||
self._flat_grad = MathUtil.flatten(grads)
|
||||
else:
|
||||
self._flat_grad.fill(0)
|
||||
return self.update_flatgrad(self._flat_grad, grad_scale)
|
||||
|
||||
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
|
||||
self._global_flat_grad /= MPIUtil.get_num_procs()
|
||||
def update_flatgrad(self, flat_grad, grad_scale=1.0):
|
||||
if self.iter % self.CHECK_SYNC_ITERS == 0:
|
||||
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
|
||||
|
||||
self._load_flat_grad(self._global_flat_grad)
|
||||
self.sess.run([self._update], self._grad_feed)
|
||||
self.iter += 1
|
||||
if grad_scale != 1.0:
|
||||
flat_grad *= grad_scale
|
||||
|
||||
return
|
||||
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
|
||||
self._global_flat_grad /= MPIUtil.get_num_procs()
|
||||
|
||||
def sync(self):
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
self._set_flat_vars(vars)
|
||||
return
|
||||
self._load_flat_grad(self._global_flat_grad)
|
||||
self.sess.run([self._update], self._grad_feed)
|
||||
self.iter += 1
|
||||
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if self._is_root():
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = self._get_flat_vars()
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
return
|
||||
|
||||
def _is_root(self):
|
||||
return MPIUtil.is_root_proc()
|
||||
|
||||
def _build_grad_feed(self, vars):
|
||||
self._grad_tf_list = []
|
||||
self._grad_buffers = []
|
||||
for v in self.vars:
|
||||
shape = v.get_shape()
|
||||
grad = np.zeros(shape)
|
||||
grad_tf = tf.placeholder(tf.float32, shape=shape)
|
||||
self._grad_buffers.append(grad)
|
||||
self._grad_tf_list.append(grad_tf)
|
||||
def sync(self):
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
self._set_flat_vars(vars)
|
||||
return
|
||||
|
||||
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
|
||||
|
||||
return
|
||||
def check_synced(self):
|
||||
synced = True
|
||||
if self._is_root():
|
||||
vars = self._get_flat_vars()
|
||||
MPIUtil.bcast(vars)
|
||||
else:
|
||||
vars_local = self._get_flat_vars()
|
||||
vars_root = np.empty_like(vars_local)
|
||||
MPIUtil.bcast(vars_root)
|
||||
synced = (vars_local == vars_root).all()
|
||||
return synced
|
||||
|
||||
def _calc_grad_dim(self):
|
||||
grad_dim = 0
|
||||
for grad in self._grad_buffers:
|
||||
grad_dim += grad.size
|
||||
return grad_dim
|
||||
def _is_root(self):
|
||||
return MPIUtil.is_root_proc()
|
||||
|
||||
def _load_flat_grad(self, flat_grad):
|
||||
start = 0
|
||||
for g in self._grad_buffers:
|
||||
size = g.size
|
||||
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
|
||||
start += size
|
||||
return
|
||||
def _build_grad_feed(self, vars):
|
||||
self._grad_tf_list = []
|
||||
self._grad_buffers = []
|
||||
for v in self.vars:
|
||||
shape = v.get_shape()
|
||||
grad = np.zeros(shape)
|
||||
grad_tf = tf.placeholder(tf.float32, shape=shape)
|
||||
self._grad_buffers.append(grad)
|
||||
self._grad_tf_list.append(grad_tf)
|
||||
|
||||
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
|
||||
|
||||
return
|
||||
|
||||
def _calc_grad_dim(self):
|
||||
grad_dim = 0
|
||||
for grad in self._grad_buffers:
|
||||
grad_dim += grad.size
|
||||
return grad_dim
|
||||
|
||||
def _load_flat_grad(self, flat_grad):
|
||||
start = 0
|
||||
for g in self._grad_buffers:
|
||||
size = g.size
|
||||
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
|
||||
start += size
|
||||
return
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
from abc import abstractmethod
|
||||
import sys, abc
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC
|
||||
ABC = abc.ABC
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
|
||||
class Solver(ABC):
|
||||
def __init__(self, vars):
|
||||
self.vars = vars
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def update(self, grads):
|
||||
pass
|
||||
def __init__(self, vars):
|
||||
self.vars = vars
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def update(self, grads):
|
||||
pass
|
||||
|
||||
@@ -6,144 +6,148 @@ from pybullet_envs.deep_mimic.learning.rl_agent import RLAgent
|
||||
from pybullet_utils.logger import Logger
|
||||
from pybullet_envs.deep_mimic.learning.tf_normalizer import TFNormalizer
|
||||
|
||||
|
||||
class TFAgent(RLAgent):
|
||||
RESOURCE_SCOPE = 'resource'
|
||||
SOLVER_SCOPE = 'solvers'
|
||||
RESOURCE_SCOPE = 'resource'
|
||||
SOLVER_SCOPE = 'solvers'
|
||||
|
||||
def __init__(self, world, id, json_data):
|
||||
self.tf_scope = 'agent'
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
def __init__(self, world, id, json_data):
|
||||
self.tf_scope = 'agent'
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
|
||||
super().__init__(world, id, json_data)
|
||||
self._build_graph(json_data)
|
||||
self._init_normalizers()
|
||||
return
|
||||
super().__init__(world, id, json_data)
|
||||
self._build_graph(json_data)
|
||||
self._init_normalizers()
|
||||
return
|
||||
|
||||
def __del__(self):
|
||||
self.sess.close()
|
||||
return
|
||||
def __del__(self):
|
||||
self.sess.close()
|
||||
return
|
||||
|
||||
def save_model(self, out_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
try:
|
||||
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
|
||||
Logger.print2('Model saved to: ' + save_path)
|
||||
except:
|
||||
Logger.print2("Failed to save model to: " + save_path)
|
||||
return
|
||||
def save_model(self, out_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
try:
|
||||
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
|
||||
Logger.print2('Model saved to: ' + save_path)
|
||||
except:
|
||||
Logger.print2("Failed to save model to: " + save_path)
|
||||
return
|
||||
|
||||
def load_model(self, in_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.saver.restore(self.sess, in_path)
|
||||
self._load_normalizers()
|
||||
Logger.print2('Model loaded from: ' + in_path)
|
||||
return
|
||||
def load_model(self, in_path):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
self.saver.restore(self.sess, in_path)
|
||||
self._load_normalizers()
|
||||
Logger.print2('Model loaded from: ' + in_path)
|
||||
return
|
||||
|
||||
def _get_output_path(self):
|
||||
assert(self.output_dir != '')
|
||||
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
|
||||
return file_path
|
||||
def _get_output_path(self):
|
||||
assert (self.output_dir != '')
|
||||
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
|
||||
return file_path
|
||||
|
||||
def _get_int_output_path(self):
|
||||
assert(self.int_output_dir != '')
|
||||
file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
|
||||
return file_path
|
||||
def _get_int_output_path(self):
|
||||
assert (self.int_output_dir != '')
|
||||
file_path = self.int_output_dir + (
|
||||
'/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
|
||||
return file_path
|
||||
|
||||
def _build_graph(self, json_data):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
with tf.variable_scope(self.tf_scope):
|
||||
self._build_nets(json_data)
|
||||
|
||||
with tf.variable_scope(self.SOLVER_SCOPE):
|
||||
self._build_losses(json_data)
|
||||
self._build_solvers(json_data)
|
||||
def _build_graph(self, json_data):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
with tf.variable_scope(self.tf_scope):
|
||||
self._build_nets(json_data)
|
||||
|
||||
self._initialize_vars()
|
||||
self._build_saver()
|
||||
return
|
||||
with tf.variable_scope(self.SOLVER_SCOPE):
|
||||
self._build_losses(json_data)
|
||||
self._build_solvers(json_data)
|
||||
|
||||
def _init_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
# update normalizers to sync the tensorflow tensors
|
||||
self.s_norm.update()
|
||||
self.g_norm.update()
|
||||
self.a_norm.update()
|
||||
return
|
||||
self._initialize_vars()
|
||||
self._build_saver()
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def _build_nets(self, json_data):
|
||||
pass
|
||||
def _init_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
# update normalizers to sync the tensorflow tensors
|
||||
self.s_norm.update()
|
||||
self.g_norm.update()
|
||||
self.a_norm.update()
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def _build_losses(self, json_data):
|
||||
pass
|
||||
@abstractmethod
|
||||
def _build_nets(self, json_data):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _build_solvers(self, json_data):
|
||||
pass
|
||||
@abstractmethod
|
||||
def _build_losses(self, json_data):
|
||||
pass
|
||||
|
||||
def _tf_vars(self, scope=''):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
|
||||
assert len(res) > 0
|
||||
return res
|
||||
@abstractmethod
|
||||
def _build_solvers(self, json_data):
|
||||
pass
|
||||
|
||||
def _build_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
|
||||
state_offset = -self.world.env.build_state_offset(self.id)
|
||||
print("state_offset=",state_offset)
|
||||
state_scale = 1 / self.world.env.build_state_scale(self.id)
|
||||
print("state_scale=",state_scale)
|
||||
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||
1 / self.world.env.build_state_scale(self.id))
|
||||
|
||||
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
|
||||
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||
1 / self.world.env.build_goal_scale(self.id))
|
||||
def _tf_vars(self, scope=''):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
|
||||
assert len(res) > 0
|
||||
return res
|
||||
|
||||
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
|
||||
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||
1 / self.world.env.build_action_scale(self.id))
|
||||
return
|
||||
def _build_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(),
|
||||
self.world.env.build_state_norm_groups(self.id))
|
||||
state_offset = -self.world.env.build_state_offset(self.id)
|
||||
print("state_offset=", state_offset)
|
||||
state_scale = 1 / self.world.env.build_state_scale(self.id)
|
||||
print("state_scale=", state_scale)
|
||||
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||
1 / self.world.env.build_state_scale(self.id))
|
||||
|
||||
def _load_normalizers(self):
|
||||
self.s_norm.load()
|
||||
self.g_norm.load()
|
||||
self.a_norm.load()
|
||||
return
|
||||
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(),
|
||||
self.world.env.build_goal_norm_groups(self.id))
|
||||
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||
1 / self.world.env.build_goal_scale(self.id))
|
||||
|
||||
def _update_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._update_normalizers()
|
||||
return
|
||||
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
|
||||
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||
1 / self.world.env.build_action_scale(self.id))
|
||||
return
|
||||
|
||||
def _initialize_vars(self):
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
return
|
||||
def _load_normalizers(self):
|
||||
self.s_norm.load()
|
||||
self.g_norm.load()
|
||||
self.a_norm.load()
|
||||
return
|
||||
|
||||
def _build_saver(self):
|
||||
vars = self._get_saver_vars()
|
||||
self.saver = tf.train.Saver(vars, max_to_keep=0)
|
||||
return
|
||||
def _update_normalizers(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._update_normalizers()
|
||||
return
|
||||
|
||||
def _get_saver_vars(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
|
||||
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
|
||||
#vars = [v for v in vars if '/target/' not in v.name]
|
||||
assert len(vars) > 0
|
||||
return vars
|
||||
|
||||
def _weight_decay_loss(self, scope):
|
||||
vars = self._tf_vars(scope)
|
||||
vars_no_bias = [v for v in vars if 'bias' not in v.name]
|
||||
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
|
||||
return loss
|
||||
def _initialize_vars(self):
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
return
|
||||
|
||||
def _train(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._train()
|
||||
return
|
||||
def _build_saver(self):
|
||||
vars = self._get_saver_vars()
|
||||
self.saver = tf.train.Saver(vars, max_to_keep=0)
|
||||
return
|
||||
|
||||
def _get_saver_vars(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
|
||||
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
|
||||
#vars = [v for v in vars if '/target/' not in v.name]
|
||||
assert len(vars) > 0
|
||||
return vars
|
||||
|
||||
def _weight_decay_loss(self, scope):
|
||||
vars = self._tf_vars(scope)
|
||||
vars_no_bias = [v for v in vars if 'bias' not in v.name]
|
||||
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
|
||||
return loss
|
||||
|
||||
def _train(self):
|
||||
with self.sess.as_default(), self.graph.as_default():
|
||||
super()._train()
|
||||
return
|
||||
|
||||
@@ -3,65 +3,72 @@ import copy
|
||||
import tensorflow as tf
|
||||
from pybullet_envs.deep_mimic.learning.normalizer import Normalizer
|
||||
|
||||
|
||||
class TFNormalizer(Normalizer):
|
||||
|
||||
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.sess = sess
|
||||
self.scope = scope
|
||||
super().__init__(size, groups_ids, eps, clip)
|
||||
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||
self.sess = sess
|
||||
self.scope = scope
|
||||
super().__init__(size, groups_ids, eps, clip)
|
||||
|
||||
with tf.variable_scope(self.scope):
|
||||
self._build_resource_tf()
|
||||
return
|
||||
with tf.variable_scope(self.scope):
|
||||
self._build_resource_tf()
|
||||
return
|
||||
|
||||
# initialze count when loading saved values so that things don't change to quickly during updates
|
||||
def load(self):
|
||||
self.count = self.count_tf.eval()[0]
|
||||
self.mean = self.mean_tf.eval()
|
||||
self.std = self.std_tf.eval()
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
# initialze count when loading saved values so that things don't change to quickly during updates
|
||||
def load(self):
|
||||
self.count = self.count_tf.eval()[0]
|
||||
self.mean = self.mean_tf.eval()
|
||||
self.std = self.std_tf.eval()
|
||||
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||
return
|
||||
|
||||
def update(self):
|
||||
super().update()
|
||||
self._update_resource_tf()
|
||||
return
|
||||
def update(self):
|
||||
super().update()
|
||||
self._update_resource_tf()
|
||||
return
|
||||
|
||||
def set_mean_std(self, mean, std):
|
||||
super().set_mean_std(mean, std)
|
||||
self._update_resource_tf()
|
||||
return
|
||||
def set_mean_std(self, mean, std):
|
||||
super().set_mean_std(mean, std)
|
||||
self._update_resource_tf()
|
||||
return
|
||||
|
||||
def normalize_tf(self, x):
|
||||
norm_x = (x - self.mean_tf) / self.std_tf
|
||||
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
def normalize_tf(self, x):
|
||||
norm_x = (x - self.mean_tf) / self.std_tf
|
||||
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
|
||||
return norm_x
|
||||
|
||||
def unnormalize_tf(self, norm_x):
|
||||
x = norm_x * self.std_tf + self.mean_tf
|
||||
return x
|
||||
|
||||
def _build_resource_tf(self):
|
||||
self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
|
||||
self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
|
||||
self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
|
||||
|
||||
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
|
||||
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
|
||||
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
|
||||
|
||||
self._update_op = tf.group(
|
||||
self.count_tf.assign(self.count_ph),
|
||||
self.mean_tf.assign(self.mean_ph),
|
||||
self.std_tf.assign(self.std_ph)
|
||||
)
|
||||
return
|
||||
def unnormalize_tf(self, norm_x):
|
||||
x = norm_x * self.std_tf + self.mean_tf
|
||||
return x
|
||||
|
||||
def _update_resource_tf(self):
|
||||
feed = {
|
||||
self.count_ph: np.array([self.count], dtype=np.int32),
|
||||
self.mean_ph: self.mean,
|
||||
self.std_ph: self.std
|
||||
}
|
||||
self.sess.run(self._update_op, feed_dict=feed)
|
||||
return
|
||||
def _build_resource_tf(self):
|
||||
self.count_tf = tf.get_variable(dtype=tf.int32,
|
||||
name='count',
|
||||
initializer=np.array([self.count], dtype=np.int32),
|
||||
trainable=False)
|
||||
self.mean_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='mean',
|
||||
initializer=self.mean.astype(np.float32),
|
||||
trainable=False)
|
||||
self.std_tf = tf.get_variable(dtype=tf.float32,
|
||||
name='std',
|
||||
initializer=self.std.astype(np.float32),
|
||||
trainable=False)
|
||||
|
||||
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
|
||||
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
|
||||
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
|
||||
|
||||
self._update_op = tf.group(self.count_tf.assign(self.count_ph),
|
||||
self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
|
||||
return
|
||||
|
||||
def _update_resource_tf(self):
|
||||
feed = {
|
||||
self.count_ph: np.array([self.count], dtype=np.int32),
|
||||
self.mean_ph: self.mean,
|
||||
self.std_ph: self.std
|
||||
}
|
||||
self.sess.run(self._update_op, feed_dict=feed)
|
||||
return
|
||||
|
||||
@@ -4,101 +4,116 @@ import os
|
||||
|
||||
xavier_initializer = tf.contrib.layers.xavier_initializer()
|
||||
|
||||
|
||||
def disable_gpu():
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
|
||||
return
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
|
||||
return
|
||||
|
||||
|
||||
def var_shape(x):
|
||||
out = [k.value for k in x.get_shape()]
|
||||
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
|
||||
return out
|
||||
out = [k.value for k in x.get_shape()]
|
||||
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
|
||||
return out
|
||||
|
||||
|
||||
def intprod(x):
|
||||
return int(np.prod(x))
|
||||
return int(np.prod(x))
|
||||
|
||||
|
||||
def numel(x):
|
||||
n = intprod(var_shape(x))
|
||||
return n
|
||||
n = intprod(var_shape(x))
|
||||
return n
|
||||
|
||||
|
||||
def flat_grad(loss, var_list, grad_ys=None):
|
||||
grads = tf.gradients(loss, var_list, grad_ys)
|
||||
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
|
||||
grads = tf.gradients(loss, var_list, grad_ys)
|
||||
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
|
||||
|
||||
def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
|
||||
curr_tf = input
|
||||
for i, size in enumerate(layers_sizes):
|
||||
with tf.variable_scope(str(i), reuse=reuse):
|
||||
curr_tf = tf.layers.dense(inputs=curr_tf,
|
||||
units=size,
|
||||
kernel_initializer=xavier_initializer,
|
||||
activation = activation if i < len(layers_sizes)-1 else None)
|
||||
if flatten:
|
||||
assert layers_sizes[-1] == 1
|
||||
curr_tf = tf.reshape(curr_tf, [-1])
|
||||
|
||||
return curr_tf
|
||||
def fc_net(input, layers_sizes, activation, reuse=None,
|
||||
flatten=False): # build fully connected network
|
||||
curr_tf = input
|
||||
for i, size in enumerate(layers_sizes):
|
||||
with tf.variable_scope(str(i), reuse=reuse):
|
||||
curr_tf = tf.layers.dense(inputs=curr_tf,
|
||||
units=size,
|
||||
kernel_initializer=xavier_initializer,
|
||||
activation=activation if i < len(layers_sizes) - 1 else None)
|
||||
if flatten:
|
||||
assert layers_sizes[-1] == 1
|
||||
curr_tf = tf.reshape(curr_tf, [-1])
|
||||
|
||||
return curr_tf
|
||||
|
||||
|
||||
def copy(sess, src, dst):
|
||||
assert len(src) == len(dst)
|
||||
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
|
||||
return
|
||||
assert len(src) == len(dst)
|
||||
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
|
||||
return
|
||||
|
||||
|
||||
def flat_grad(loss, var_list):
|
||||
grads = tf.gradients(loss, var_list)
|
||||
return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
|
||||
for (v, grad) in zip(var_list, grads)])
|
||||
grads = tf.gradients(loss, var_list)
|
||||
return tf.concat(axis=0,
|
||||
values=[tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)])
|
||||
|
||||
|
||||
def calc_logp_gaussian(x_tf, mean_tf, std_tf):
|
||||
dim = tf.to_float(tf.shape(x_tf)[-1])
|
||||
dim = tf.to_float(tf.shape(x_tf)[-1])
|
||||
|
||||
if mean_tf is None:
|
||||
diff_tf = x_tf
|
||||
else:
|
||||
diff_tf = x_tf - mean_tf
|
||||
if mean_tf is None:
|
||||
diff_tf = x_tf
|
||||
else:
|
||||
diff_tf = x_tf - mean_tf
|
||||
|
||||
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
|
||||
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
|
||||
|
||||
return logp_tf
|
||||
|
||||
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
|
||||
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
|
||||
|
||||
return logp_tf
|
||||
|
||||
def calc_bound_loss(x_tf, bound_min, bound_max):
|
||||
# penalty for violating bounds
|
||||
violation_min = tf.minimum(x_tf - bound_min, 0)
|
||||
violation_max = tf.maximum(x_tf - bound_max, 0)
|
||||
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
|
||||
loss = 0.5 * tf.reduce_mean(violation)
|
||||
return loss
|
||||
# penalty for violating bounds
|
||||
violation_min = tf.minimum(x_tf - bound_min, 0)
|
||||
violation_max = tf.maximum(x_tf - bound_max, 0)
|
||||
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(
|
||||
tf.square(violation_max), axis=-1)
|
||||
loss = 0.5 * tf.reduce_mean(violation)
|
||||
return loss
|
||||
|
||||
|
||||
class SetFromFlat(object):
|
||||
def __init__(self, sess, var_list, dtype=tf.float32):
|
||||
assigns = []
|
||||
shapes = list(map(var_shape, var_list))
|
||||
total_size = np.sum([intprod(shape) for shape in shapes])
|
||||
|
||||
self.sess = sess
|
||||
self.theta = tf.placeholder(dtype,[total_size])
|
||||
start=0
|
||||
assigns = []
|
||||
def __init__(self, sess, var_list, dtype=tf.float32):
|
||||
assigns = []
|
||||
shapes = list(map(var_shape, var_list))
|
||||
total_size = np.sum([intprod(shape) for shape in shapes])
|
||||
|
||||
for (shape,v) in zip(shapes,var_list):
|
||||
size = intprod(shape)
|
||||
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
|
||||
start += size
|
||||
self.sess = sess
|
||||
self.theta = tf.placeholder(dtype, [total_size])
|
||||
start = 0
|
||||
assigns = []
|
||||
|
||||
self.op = tf.group(*assigns)
|
||||
for (shape, v) in zip(shapes, var_list):
|
||||
size = intprod(shape)
|
||||
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start + size], shape)))
|
||||
start += size
|
||||
|
||||
return
|
||||
self.op = tf.group(*assigns)
|
||||
|
||||
return
|
||||
|
||||
def __call__(self, theta):
|
||||
self.sess.run(self.op, feed_dict={self.theta: theta})
|
||||
return
|
||||
|
||||
def __call__(self, theta):
|
||||
self.sess.run(self.op, feed_dict={self.theta:theta})
|
||||
return
|
||||
|
||||
class GetFlat(object):
|
||||
def __init__(self, sess, var_list):
|
||||
self.sess = sess
|
||||
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
|
||||
return
|
||||
|
||||
def __call__(self):
|
||||
return self.sess.run(self.op)
|
||||
def __init__(self, sess, var_list):
|
||||
self.sess = sess
|
||||
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
|
||||
return
|
||||
|
||||
def __call__(self):
|
||||
return self.sess.run(self.op)
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
import numpy as np
|
||||
import numpy as np
|
||||
|
||||
from quaternion import qrot, qinverse
|
||||
|
||||
|
||||
def normalize_screen_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
|
||||
return X/w*2 - [1, h/w]
|
||||
def normalize_screen_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
|
||||
return X / w * 2 - [1, h / w]
|
||||
|
||||
|
||||
def image_coordinates(X, w, h):
|
||||
assert X.shape[-1] == 2
|
||||
# Reverse camera frame normalization
|
||||
return (X + [1, h/w])*w/2
|
||||
assert X.shape[-1] == 2
|
||||
# Reverse camera frame normalization
|
||||
return (X + [1, h / w]) * w / 2
|
||||
|
||||
|
||||
def world_to_camera(X, R, t):
|
||||
Rt = qinverse(R)
|
||||
Q = np.tile(Rt, (*X.shape[:-1], 1))
|
||||
V = X - t
|
||||
return qrot(Q, V)
|
||||
|
||||
def camera_to_world(X, R, t):
|
||||
Q = np.tile(R, (*X.shape[:-1], 1))
|
||||
V = X
|
||||
return qrot(Q, V) + t
|
||||
Rt = qinverse(R)
|
||||
Q = np.tile(Rt, (*X.shape[:-1], 1))
|
||||
V = X - t
|
||||
return qrot(Q, V)
|
||||
|
||||
|
||||
def camera_to_world(X, R, t):
|
||||
Q = np.tile(R, (*X.shape[:-1], 1))
|
||||
V = X
|
||||
return qrot(Q, V) + t
|
||||
|
||||
@@ -3,54 +3,63 @@ from h36m_dataset import Human36mDataset
|
||||
from camera import *
|
||||
|
||||
import numpy as np
|
||||
# In[2]:
|
||||
# In[2]:
|
||||
|
||||
joint_info = {
|
||||
'joint_name':['root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle', 'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist', 'right_shoulder', 'right_elbow', 'right_wrist'],
|
||||
'father':[0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
|
||||
'side':['middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle', 'middle', 'left', 'left', 'left', 'right', 'right', 'right']
|
||||
'joint_name': [
|
||||
'root', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle',
|
||||
'chest', 'neck', 'nose', 'eye', 'left_shoulder', 'left_elbow', 'left_wrist',
|
||||
'right_shoulder', 'right_elbow', 'right_wrist'
|
||||
],
|
||||
'father': [0, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
|
||||
'side': [
|
||||
'middle', 'right', 'right', 'right', 'left', 'left', 'left', 'middle', 'middle', 'middle',
|
||||
'middle', 'left', 'left', 'left', 'right', 'right', 'right'
|
||||
]
|
||||
}
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
def init_fb_h36m_dataset(dataset_path):
|
||||
dataset = Human36mDataset(dataset_path)
|
||||
print('Preparing Facebook Human3.6M Dataset...')
|
||||
for subject in dataset.subjects():
|
||||
for action in dataset[subject].keys():
|
||||
anim = dataset[subject][action]
|
||||
positions_3d = []
|
||||
for cam in anim['cameras']:
|
||||
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
|
||||
pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
|
||||
positions_3d.append(pos_3d)
|
||||
anim['positions_3d'] = positions_3d
|
||||
return dataset
|
||||
dataset = Human36mDataset(dataset_path)
|
||||
print('Preparing Facebook Human3.6M Dataset...')
|
||||
for subject in dataset.subjects():
|
||||
for action in dataset[subject].keys():
|
||||
anim = dataset[subject][action]
|
||||
positions_3d = []
|
||||
for cam in anim['cameras']:
|
||||
pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
|
||||
pos_3d[:, 1:] -= pos_3d[:, :
|
||||
1] # Remove global offset, but keep trajectory in first position
|
||||
positions_3d.append(pos_3d)
|
||||
anim['positions_3d'] = positions_3d
|
||||
return dataset
|
||||
|
||||
|
||||
def pose3D_from_fb_h36m(dataset, subject, action, shift):
|
||||
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
|
||||
trajectory = pose_seq[:, :1]
|
||||
pose_seq[:, 1:] += trajectory
|
||||
# Invert camera transformation
|
||||
cam = dataset.cameras()[subject][0]
|
||||
pose_seq = camera_to_world(pose_seq,
|
||||
R=cam['orientation'],
|
||||
t=cam['translation'])
|
||||
x = pose_seq[:,:,0:1]
|
||||
y = pose_seq[:,:,1:2] * -1
|
||||
z = pose_seq[:,:,2:3]
|
||||
pose_seq = np.concatenate((x,z,y),axis=2)
|
||||
# plus shift
|
||||
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])])
|
||||
return pose_seq
|
||||
pose_seq = dataset[subject][action]['positions_3d'][0].copy()
|
||||
trajectory = pose_seq[:, :1]
|
||||
pose_seq[:, 1:] += trajectory
|
||||
# Invert camera transformation
|
||||
cam = dataset.cameras()[subject][0]
|
||||
pose_seq = camera_to_world(pose_seq, R=cam['orientation'], t=cam['translation'])
|
||||
x = pose_seq[:, :, 0:1]
|
||||
y = pose_seq[:, :, 1:2] * -1
|
||||
z = pose_seq[:, :, 2:3]
|
||||
pose_seq = np.concatenate((x, z, y), axis=2)
|
||||
# plus shift
|
||||
pose_seq += np.array([[shift for i in range(pose_seq.shape[1])] for j in range(pose_seq.shape[0])
|
||||
])
|
||||
return pose_seq
|
||||
|
||||
|
||||
def rot_seq_to_deepmimic_json(rot_seq, loop, json_path):
|
||||
to_json = {"Loop": loop, "Frames":[]}
|
||||
rot_seq = np.around(rot_seq, decimals=6)
|
||||
to_json["Frames"] = rot_seq.tolist()
|
||||
# In[14]:
|
||||
to_file = json.dumps(to_json)
|
||||
file = open(json_path,"w")
|
||||
file.write(to_file)
|
||||
file.close()
|
||||
|
||||
to_json = {"Loop": loop, "Frames": []}
|
||||
rot_seq = np.around(rot_seq, decimals=6)
|
||||
to_json["Frames"] = rot_seq.tolist()
|
||||
# In[14]:
|
||||
to_file = json.dumps(to_json)
|
||||
file = open(json_path, "w")
|
||||
file.write(to_file)
|
||||
file.close()
|
||||
|
||||
@@ -3,11 +3,13 @@ import copy
|
||||
from skeleton import Skeleton
|
||||
from mocap_dataset import MocapDataset
|
||||
from camera import normalize_screen_coordinates, image_coordinates
|
||||
|
||||
h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
|
||||
16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
|
||||
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
|
||||
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
|
||||
|
||||
h36m_skeleton = Skeleton(parents=[
|
||||
-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12, 16, 17, 18, 19, 20, 19, 22, 12, 24,
|
||||
25, 26, 27, 28, 27, 30
|
||||
],
|
||||
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
|
||||
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
|
||||
|
||||
h36m_cameras_intrinsic_params = [
|
||||
{
|
||||
@@ -18,7 +20,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
|
||||
'res_w': 1000,
|
||||
'res_h': 1002,
|
||||
'azimuth': 70, # Only used for visualization
|
||||
'azimuth': 70, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '55011271',
|
||||
@@ -28,7 +30,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
|
||||
'res_w': 1000,
|
||||
'res_h': 1000,
|
||||
'azimuth': -70, # Only used for visualization
|
||||
'azimuth': -70, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '58860488',
|
||||
@@ -38,7 +40,7 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
|
||||
'res_w': 1000,
|
||||
'res_h': 1000,
|
||||
'azimuth': 110, # Only used for visualization
|
||||
'azimuth': 110, # Only used for visualization
|
||||
},
|
||||
{
|
||||
'id': '60457274',
|
||||
@@ -48,26 +50,34 @@ h36m_cameras_intrinsic_params = [
|
||||
'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
|
||||
'res_w': 1000,
|
||||
'res_h': 1002,
|
||||
'azimuth': -110, # Only used for visualization
|
||||
'azimuth': -110, # Only used for visualization
|
||||
},
|
||||
]
|
||||
|
||||
h36m_cameras_extrinsic_params = {
|
||||
'S1': [
|
||||
{
|
||||
'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
|
||||
'orientation': [
|
||||
0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088
|
||||
],
|
||||
'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
|
||||
'orientation': [
|
||||
0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205
|
||||
],
|
||||
'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
|
||||
'orientation': [
|
||||
0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696
|
||||
],
|
||||
'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
|
||||
'orientation': [
|
||||
0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435
|
||||
],
|
||||
'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
|
||||
},
|
||||
],
|
||||
@@ -91,158 +101,206 @@ h36m_cameras_extrinsic_params = {
|
||||
],
|
||||
'S5': [
|
||||
{
|
||||
'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
|
||||
'orientation': [
|
||||
0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332
|
||||
],
|
||||
'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
|
||||
'orientation': [
|
||||
0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915
|
||||
],
|
||||
'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
|
||||
'orientation': [
|
||||
0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576
|
||||
],
|
||||
'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
|
||||
'orientation': [
|
||||
0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092
|
||||
],
|
||||
'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
|
||||
},
|
||||
],
|
||||
'S6': [
|
||||
{
|
||||
'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
|
||||
'orientation': [
|
||||
0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938
|
||||
],
|
||||
'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
|
||||
'orientation': [
|
||||
0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428
|
||||
],
|
||||
'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
|
||||
'orientation': [
|
||||
0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334
|
||||
],
|
||||
'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
|
||||
'orientation': [
|
||||
0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802
|
||||
],
|
||||
'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
|
||||
},
|
||||
],
|
||||
'S7': [
|
||||
{
|
||||
'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
|
||||
'orientation': [
|
||||
0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778
|
||||
],
|
||||
'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
|
||||
'orientation': [
|
||||
0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508
|
||||
],
|
||||
'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
|
||||
'orientation': [
|
||||
0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278
|
||||
],
|
||||
'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
|
||||
'orientation': [
|
||||
0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523
|
||||
],
|
||||
'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
|
||||
},
|
||||
],
|
||||
'S8': [
|
||||
{
|
||||
'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
|
||||
'orientation': [
|
||||
0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773
|
||||
],
|
||||
'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
|
||||
'orientation': [
|
||||
0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615
|
||||
],
|
||||
'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
|
||||
'orientation': [
|
||||
0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755
|
||||
],
|
||||
'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
|
||||
'orientation': [
|
||||
0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708
|
||||
],
|
||||
'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
|
||||
},
|
||||
],
|
||||
'S9': [
|
||||
{
|
||||
'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
|
||||
'orientation': [
|
||||
0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243
|
||||
],
|
||||
'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
|
||||
'orientation': [
|
||||
0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801
|
||||
],
|
||||
'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
|
||||
'orientation': [
|
||||
0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915
|
||||
],
|
||||
'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
|
||||
'orientation': [
|
||||
0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822
|
||||
],
|
||||
'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
|
||||
},
|
||||
],
|
||||
'S11': [
|
||||
{
|
||||
'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
|
||||
'orientation': [
|
||||
0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467
|
||||
],
|
||||
'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
|
||||
'orientation': [
|
||||
0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085
|
||||
],
|
||||
'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
|
||||
},
|
||||
{
|
||||
'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
|
||||
'orientation': [
|
||||
0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407
|
||||
],
|
||||
'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
|
||||
},
|
||||
{
|
||||
'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
|
||||
'orientation': [
|
||||
0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809
|
||||
],
|
||||
'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class Human36mDataset(MocapDataset):
|
||||
def __init__(self, path, remove_static_joints=True):
|
||||
super().__init__(fps=50, skeleton=h36m_skeleton)
|
||||
|
||||
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
|
||||
for cameras in self._cameras.values():
|
||||
for i, cam in enumerate(cameras):
|
||||
cam.update(h36m_cameras_intrinsic_params[i])
|
||||
for k, v in cam.items():
|
||||
if k not in ['id', 'res_w', 'res_h']:
|
||||
cam[k] = np.array(v, dtype='float32')
|
||||
|
||||
# Normalize camera frame
|
||||
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype('float32')
|
||||
cam['focal_length'] = cam['focal_length']/cam['res_w']*2
|
||||
if 'translation' in cam:
|
||||
cam['translation'] = cam['translation']/1000 # mm to meters
|
||||
|
||||
# Add intrinsic parameters vector
|
||||
cam['intrinsic'] = np.concatenate((cam['focal_length'],
|
||||
cam['center'],
|
||||
cam['radial_distortion'],
|
||||
cam['tangential_distortion']))
|
||||
|
||||
# Load serialized dataset
|
||||
data = np.load(path)['positions_3d'].item()
|
||||
|
||||
self._data = {}
|
||||
for subject, actions in data.items():
|
||||
self._data[subject] = {}
|
||||
for action_name, positions in actions.items():
|
||||
self._data[subject][action_name] = {
|
||||
'positions': positions,
|
||||
'cameras': self._cameras[subject],
|
||||
}
|
||||
|
||||
if remove_static_joints:
|
||||
# Bring the skeleton to 17 joints instead of the original 32
|
||||
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
|
||||
|
||||
# Rewire shoulders to the correct parents
|
||||
self._skeleton._parents[11] = 8
|
||||
self._skeleton._parents[14] = 8
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
return True
|
||||
|
||||
|
||||
def __init__(self, path, remove_static_joints=True):
|
||||
super().__init__(fps=50, skeleton=h36m_skeleton)
|
||||
|
||||
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
|
||||
for cameras in self._cameras.values():
|
||||
for i, cam in enumerate(cameras):
|
||||
cam.update(h36m_cameras_intrinsic_params[i])
|
||||
for k, v in cam.items():
|
||||
if k not in ['id', 'res_w', 'res_h']:
|
||||
cam[k] = np.array(v, dtype='float32')
|
||||
|
||||
# Normalize camera frame
|
||||
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'],
|
||||
h=cam['res_h']).astype('float32')
|
||||
cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
|
||||
if 'translation' in cam:
|
||||
cam['translation'] = cam['translation'] / 1000 # mm to meters
|
||||
|
||||
# Add intrinsic parameters vector
|
||||
cam['intrinsic'] = np.concatenate((cam['focal_length'], cam['center'],
|
||||
cam['radial_distortion'], cam['tangential_distortion']))
|
||||
|
||||
# Load serialized dataset
|
||||
data = np.load(path)['positions_3d'].item()
|
||||
|
||||
self._data = {}
|
||||
for subject, actions in data.items():
|
||||
self._data[subject] = {}
|
||||
for action_name, positions in actions.items():
|
||||
self._data[subject][action_name] = {
|
||||
'positions': positions,
|
||||
'cameras': self._cameras[subject],
|
||||
}
|
||||
|
||||
if remove_static_joints:
|
||||
# Bring the skeleton to 17 joints instead of the original 32
|
||||
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
|
||||
|
||||
# Rewire shoulders to the correct parents
|
||||
self._skeleton._parents[11] = 8
|
||||
self._skeleton._parents[14] = 8
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
return True
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,142 +9,138 @@ from transformation import *
|
||||
|
||||
from pyquaternion import Quaternion
|
||||
|
||||
|
||||
def get_angle(vec1, vec2):
|
||||
cos_theta = np.dot(vec1, vec2)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
return acos(cos_theta)
|
||||
cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
||||
return acos(cos_theta)
|
||||
|
||||
|
||||
def get_quaternion(ox, oy, oz, x, y, z):
|
||||
# given transformed axis in x-y-z order return a quaternion
|
||||
ox /= np.linalg.norm(ox)
|
||||
oy /= np.linalg.norm(oy)
|
||||
oz /= np.linalg.norm(oz)
|
||||
# given transformed axis in x-y-z order return a quaternion
|
||||
ox /= np.linalg.norm(ox)
|
||||
oy /= np.linalg.norm(oy)
|
||||
oz /= np.linalg.norm(oz)
|
||||
|
||||
set1 = np.vstack((ox,oy,oz))
|
||||
set1 = np.vstack((ox, oy, oz))
|
||||
|
||||
x /= np.linalg.norm(x)
|
||||
y /= np.linalg.norm(y)
|
||||
z /= np.linalg.norm(z)
|
||||
x /= np.linalg.norm(x)
|
||||
y /= np.linalg.norm(y)
|
||||
z /= np.linalg.norm(z)
|
||||
|
||||
set2 = np.vstack((x,y,z))
|
||||
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
|
||||
rot_qua = quaternion_from_matrix(rot_mat)
|
||||
set2 = np.vstack((x, y, z))
|
||||
rot_mat = superimposition_matrix(set1, set2, scale=False, usesvd=True)
|
||||
rot_qua = quaternion_from_matrix(rot_mat)
|
||||
|
||||
return rot_qua
|
||||
return rot_qua
|
||||
|
||||
|
||||
# 3D coord to deepmimic rotations
|
||||
def coord_to_rot(frameNum, frame, frame_duration):
|
||||
eps = 0.001
|
||||
axis_rotate_rate = 0.3
|
||||
eps = 0.001
|
||||
axis_rotate_rate = 0.3
|
||||
|
||||
frame = np.array(frame)
|
||||
tmp = [[] for i in range(15)]
|
||||
# duration of frame in seconds (1D),
|
||||
tmp[0] = [frame_duration]
|
||||
# root position (3D),
|
||||
tmp[1] = frame[0]
|
||||
# root rotation (4D),
|
||||
root_y = (frame[7] - frame[0])
|
||||
root_z = (frame[1] - frame[0])
|
||||
root_x = np.cross(root_y, root_z)
|
||||
frame = np.array(frame)
|
||||
tmp = [[] for i in range(15)]
|
||||
# duration of frame in seconds (1D),
|
||||
tmp[0] = [frame_duration]
|
||||
# root position (3D),
|
||||
tmp[1] = frame[0]
|
||||
# root rotation (4D),
|
||||
root_y = (frame[7] - frame[0])
|
||||
root_z = (frame[1] - frame[0])
|
||||
root_x = np.cross(root_y, root_z)
|
||||
|
||||
x = np.array([1.0,0,0])
|
||||
y = np.array([0,1.0,0])
|
||||
z = np.array([0,0,1.0])
|
||||
|
||||
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
|
||||
tmp[2] = list(rot_qua)
|
||||
x = np.array([1.0, 0, 0])
|
||||
y = np.array([0, 1.0, 0])
|
||||
z = np.array([0, 0, 1.0])
|
||||
|
||||
# chest rotation (4D),
|
||||
chest_y = (frame[8] - frame[7])
|
||||
chest_z = (frame[14] - frame[8])
|
||||
chest_x = np.cross(chest_y, chest_z)
|
||||
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
|
||||
tmp[3] = list(rot_qua)
|
||||
rot_qua = get_quaternion(root_x, root_y, root_z, x, y, z)
|
||||
tmp[2] = list(rot_qua)
|
||||
|
||||
# neck rotation (4D),
|
||||
neck_y = (frame[10] - frame[8])
|
||||
neck_z = np.cross(frame[10]-frame[9], frame[8]-frame[9])
|
||||
neck_x = np.cross(neck_y, neck_z)
|
||||
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
|
||||
tmp[4] = list(rot_qua)
|
||||
# chest rotation (4D),
|
||||
chest_y = (frame[8] - frame[7])
|
||||
chest_z = (frame[14] - frame[8])
|
||||
chest_x = np.cross(chest_y, chest_z)
|
||||
rot_qua = get_quaternion(chest_x, chest_y, chest_z, root_x, root_y, root_z)
|
||||
tmp[3] = list(rot_qua)
|
||||
|
||||
# right hip rotation (4D),
|
||||
r_hip_y = (frame[1] - frame[2])
|
||||
r_hip_z = np.cross(frame[1]-frame[2], frame[3]-frame[2])
|
||||
r_hip_x = np.cross(r_hip_y, r_hip_z)
|
||||
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
|
||||
tmp[5] = list(rot_qua)
|
||||
# neck rotation (4D),
|
||||
neck_y = (frame[10] - frame[8])
|
||||
neck_z = np.cross(frame[10] - frame[9], frame[8] - frame[9])
|
||||
neck_x = np.cross(neck_y, neck_z)
|
||||
rot_qua = get_quaternion(neck_x, neck_y, neck_z, chest_x, chest_y, chest_z)
|
||||
tmp[4] = list(rot_qua)
|
||||
|
||||
# right knee rotation (1D),
|
||||
vec1 = frame[1] - frame[2]
|
||||
vec2 = frame[3] - frame[2]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[6] = [angle1-pi]
|
||||
# right hip rotation (4D),
|
||||
r_hip_y = (frame[1] - frame[2])
|
||||
r_hip_z = np.cross(frame[1] - frame[2], frame[3] - frame[2])
|
||||
r_hip_x = np.cross(r_hip_y, r_hip_z)
|
||||
rot_qua = get_quaternion(r_hip_x, r_hip_y, r_hip_z, root_x, root_y, root_z)
|
||||
tmp[5] = list(rot_qua)
|
||||
|
||||
# right ankle rotation (4D),
|
||||
tmp[7] = [1,0,0,0]
|
||||
# right knee rotation (1D),
|
||||
vec1 = frame[1] - frame[2]
|
||||
vec2 = frame[3] - frame[2]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[6] = [angle1 - pi]
|
||||
|
||||
# right shoulder rotation (4D),
|
||||
r_shou_y = (frame[14] - frame[15])
|
||||
r_shou_z = np.cross(frame[16]-frame[15], frame[14]-frame[15])
|
||||
r_shou_x = np.cross(r_shou_y, r_shou_z)
|
||||
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[8] = list(rot_qua)
|
||||
# right ankle rotation (4D),
|
||||
tmp[7] = [1, 0, 0, 0]
|
||||
|
||||
# right elbow rotation (1D),
|
||||
vec1 = frame[14] - frame[15]
|
||||
vec2 = frame[16] - frame[15]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[9] = [pi-angle1]
|
||||
# right shoulder rotation (4D),
|
||||
r_shou_y = (frame[14] - frame[15])
|
||||
r_shou_z = np.cross(frame[16] - frame[15], frame[14] - frame[15])
|
||||
r_shou_x = np.cross(r_shou_y, r_shou_z)
|
||||
rot_qua = get_quaternion(r_shou_x, r_shou_y, r_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[8] = list(rot_qua)
|
||||
|
||||
# left hip rotation (4D),
|
||||
l_hip_y = (frame[4] - frame[5])
|
||||
l_hip_z = np.cross(frame[4]-frame[5], frame[6]-frame[5])
|
||||
l_hip_x = np.cross(l_hip_y, l_hip_z)
|
||||
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
|
||||
tmp[10] = list(rot_qua)
|
||||
|
||||
# left knee rotation (1D),
|
||||
vec1 = frame[4] - frame[5]
|
||||
vec2 = frame[6] - frame[5]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[11] = [angle1-pi]
|
||||
|
||||
# left ankle rotation (4D),
|
||||
tmp[12] = [1,0,0,0]
|
||||
# right elbow rotation (1D),
|
||||
vec1 = frame[14] - frame[15]
|
||||
vec2 = frame[16] - frame[15]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[9] = [pi - angle1]
|
||||
|
||||
# left shoulder rotation (4D),
|
||||
l_shou_y = (frame[11] - frame[12])
|
||||
l_shou_z = np.cross(frame[13]-frame[12], frame[11]-frame[12])
|
||||
l_shou_x = np.cross(l_shou_y, l_shou_z)
|
||||
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[13] = list(rot_qua)
|
||||
# left hip rotation (4D),
|
||||
l_hip_y = (frame[4] - frame[5])
|
||||
l_hip_z = np.cross(frame[4] - frame[5], frame[6] - frame[5])
|
||||
l_hip_x = np.cross(l_hip_y, l_hip_z)
|
||||
rot_qua = get_quaternion(l_hip_x, l_hip_y, l_hip_z, root_x, root_y, root_z)
|
||||
tmp[10] = list(rot_qua)
|
||||
|
||||
# left elbow rotation (1D)
|
||||
vec1 = frame[11] - frame[12]
|
||||
vec2 = frame[13] - frame[12]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[14] = [pi-angle1]
|
||||
# left knee rotation (1D),
|
||||
vec1 = frame[4] - frame[5]
|
||||
vec2 = frame[6] - frame[5]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[11] = [angle1 - pi]
|
||||
|
||||
# left ankle rotation (4D),
|
||||
tmp[12] = [1, 0, 0, 0]
|
||||
|
||||
# left shoulder rotation (4D),
|
||||
l_shou_y = (frame[11] - frame[12])
|
||||
l_shou_z = np.cross(frame[13] - frame[12], frame[11] - frame[12])
|
||||
l_shou_x = np.cross(l_shou_y, l_shou_z)
|
||||
rot_qua = get_quaternion(l_shou_x, l_shou_y, l_shou_z, chest_x, chest_y, chest_z)
|
||||
tmp[13] = list(rot_qua)
|
||||
|
||||
# left elbow rotation (1D)
|
||||
vec1 = frame[11] - frame[12]
|
||||
vec2 = frame[13] - frame[12]
|
||||
angle1 = get_angle(vec1, vec2)
|
||||
tmp[14] = [pi - angle1]
|
||||
|
||||
ret = []
|
||||
for i in tmp:
|
||||
ret += list(i)
|
||||
return np.array(ret)
|
||||
|
||||
ret = []
|
||||
for i in tmp:
|
||||
ret += list(i)
|
||||
return np.array(ret)
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
def coord_seq_to_rot_seq(coord_seq, frame_duration):
|
||||
ret = []
|
||||
for i in range(len(coord_seq)):
|
||||
tmp = coord_to_rot( i, coord_seq[i], frame_duration)
|
||||
ret.append(list(tmp))
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ret = []
|
||||
for i in range(len(coord_seq)):
|
||||
tmp = coord_to_rot(i, coord_seq[i], frame_duration)
|
||||
ret.append(list(tmp))
|
||||
return ret
|
||||
|
||||
@@ -1,36 +1,37 @@
|
||||
import numpy as np
|
||||
from skeleton import Skeleton
|
||||
|
||||
|
||||
class MocapDataset:
|
||||
def __init__(self, fps, skeleton):
|
||||
self._skeleton = skeleton
|
||||
self._fps = fps
|
||||
self._data = None # Must be filled by subclass
|
||||
self._cameras = None # Must be filled by subclass
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
kept_joints = self._skeleton.remove_joints(joints_to_remove)
|
||||
for subject in self._data.keys():
|
||||
for action in self._data[subject].keys():
|
||||
s = self._data[subject][action]
|
||||
s['positions'] = s['positions'][:, kept_joints]
|
||||
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def subjects(self):
|
||||
return self._data.keys()
|
||||
|
||||
def fps(self):
|
||||
return self._fps
|
||||
|
||||
def skeleton(self):
|
||||
return self._skeleton
|
||||
|
||||
def cameras(self):
|
||||
return self._cameras
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
# This method can be overridden
|
||||
return False
|
||||
|
||||
def __init__(self, fps, skeleton):
|
||||
self._skeleton = skeleton
|
||||
self._fps = fps
|
||||
self._data = None # Must be filled by subclass
|
||||
self._cameras = None # Must be filled by subclass
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
kept_joints = self._skeleton.remove_joints(joints_to_remove)
|
||||
for subject in self._data.keys():
|
||||
for action in self._data[subject].keys():
|
||||
s = self._data[subject][action]
|
||||
s['positions'] = s['positions'][:, kept_joints]
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def subjects(self):
|
||||
return self._data.keys()
|
||||
|
||||
def fps(self):
|
||||
return self._fps
|
||||
|
||||
def skeleton(self):
|
||||
return self._skeleton
|
||||
|
||||
def cameras(self):
|
||||
return self._cameras
|
||||
|
||||
def supports_semi_supervised(self):
|
||||
# This method can be overridden
|
||||
return False
|
||||
|
||||
@@ -1,30 +1,31 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def qrot(q, v):
|
||||
"""
|
||||
"""
|
||||
Rotate vector(s) v about the rotation described by quaternion(s) q.
|
||||
Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
|
||||
where * denotes any number of dimensions.
|
||||
Returns a tensor of shape (*, 3).
|
||||
"""
|
||||
assert q.shape[-1] == 4
|
||||
assert v.shape[-1] == 3
|
||||
assert q.shape[:-1] == v.shape[:-1]
|
||||
assert q.shape[-1] == 4
|
||||
assert v.shape[-1] == 3
|
||||
assert q.shape[:-1] == v.shape[:-1]
|
||||
|
||||
qvec = q[..., 1:]
|
||||
|
||||
uv = np.cross(qvec, v)
|
||||
uuv = np.cross(qvec, uv)
|
||||
|
||||
return (v + 2 * (q[..., :1] * uv + uuv))
|
||||
|
||||
qvec = q[..., 1:]
|
||||
|
||||
uv = np.cross(qvec, v)
|
||||
uuv = np.cross(qvec, uv)
|
||||
|
||||
return (v + 2 * (q[..., :1] * uv + uuv))
|
||||
|
||||
def qinverse(q, inplace=False):
|
||||
# We assume the quaternion to be normalized
|
||||
if inplace:
|
||||
q[..., 1:] *= -1
|
||||
return q
|
||||
else:
|
||||
w = q[..., :1]
|
||||
xyz = q[..., 1:]
|
||||
return np.hstack((w, -xyz))
|
||||
|
||||
# We assume the quaternion to be normalized
|
||||
if inplace:
|
||||
q[..., 1:] *= -1
|
||||
return q
|
||||
else:
|
||||
w = q[..., :1]
|
||||
xyz = q[..., 1:]
|
||||
return np.hstack((w, -xyz))
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os, inspect
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
print('parent:', parentdir)
|
||||
|
||||
@@ -10,7 +10,6 @@ import pybullet
|
||||
import time
|
||||
import random
|
||||
|
||||
|
||||
from pybullet_utils.bullet_client import BulletClient
|
||||
from deep_mimic.env.motion_capture_data import MotionCaptureData
|
||||
|
||||
@@ -26,21 +25,28 @@ import pybullet as p
|
||||
import numpy as np
|
||||
import argparse
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Arguments for loading reference for learning.')
|
||||
|
||||
# General arguments
|
||||
parser.add_argument('--dataset_path', default='data/data_3d_h36m.npz', type=str, help='target dataset') # h36m or humaneva
|
||||
parser.add_argument('--json_path', default='data/Walking.json', type=str, help='json file path for storing the deepmimic-format json created by inverse-kinect.')
|
||||
parser.add_argument('--dataset_path',
|
||||
default='data/data_3d_h36m.npz',
|
||||
type=str,
|
||||
help='target dataset') # h36m or humaneva
|
||||
parser.add_argument(
|
||||
'--json_path',
|
||||
default='data/Walking.json',
|
||||
type=str,
|
||||
help='json file path for storing the deepmimic-format json created by inverse-kinect.')
|
||||
parser.add_argument('--fps', default=24, type=int, help='frame per second')
|
||||
parser.add_argument('--subject', default='S11', type=str, help='camera subject.')
|
||||
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
|
||||
parser.add_argument('--loop', default='wrap', type=str, help='loop information in deepmimic, wrap or none.')
|
||||
parser.add_argument('--action', default='Walking', type=str, help='name of the action.')
|
||||
parser.add_argument('--loop',
|
||||
default='wrap',
|
||||
type=str,
|
||||
help='loop information in deepmimic, wrap or none.')
|
||||
parser.add_argument('--draw_gt', action='store_true', help='draw ground truth or not.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset_path = args.dataset_path
|
||||
json_path = args.json_path
|
||||
@@ -52,93 +58,75 @@ draw_gt = args.draw_gt
|
||||
|
||||
|
||||
def draw_ground_truth(coord_seq, frame, duration, shift):
|
||||
global joint_info
|
||||
joint = coord_seq[frame]
|
||||
shift = np.array(shift)
|
||||
for i in range(1, 17):
|
||||
# print(x[11], x[14])
|
||||
joint_fa = joint_info['father'][i]
|
||||
if joint_info['side'][i] == 'right':
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
|
||||
lineToXYZ=joint[joint_fa]+shift,
|
||||
lineColorRGB=(255,0,0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
else:
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i]+shift,
|
||||
lineToXYZ=joint[joint_fa]+shift,
|
||||
lineColorRGB=(0,0,0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
global joint_info
|
||||
joint = coord_seq[frame]
|
||||
shift = np.array(shift)
|
||||
for i in range(1, 17):
|
||||
# print(x[11], x[14])
|
||||
joint_fa = joint_info['father'][i]
|
||||
if joint_info['side'][i] == 'right':
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
|
||||
lineToXYZ=joint[joint_fa] + shift,
|
||||
lineColorRGB=(255, 0, 0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
else:
|
||||
p.addUserDebugLine(lineFromXYZ=joint[i] + shift,
|
||||
lineToXYZ=joint[joint_fa] + shift,
|
||||
lineColorRGB=(0, 0, 0),
|
||||
lineWidth=1,
|
||||
lifeTime=duration)
|
||||
|
||||
|
||||
dataset = init_fb_h36m_dataset(dataset_path)
|
||||
ground_truth = pose3D_from_fb_h36m(dataset,
|
||||
subject = subject,
|
||||
action = action,
|
||||
shift = [1.0,0.0,0.0])
|
||||
|
||||
rot_seq = coord_seq_to_rot_seq(coord_seq = ground_truth,
|
||||
frame_duration = 1/fps)
|
||||
|
||||
|
||||
rot_seq_to_deepmimic_json( rot_seq = rot_seq,
|
||||
loop = loop,
|
||||
json_path = json_path)
|
||||
ground_truth = pose3D_from_fb_h36m(dataset, subject=subject, action=action, shift=[1.0, 0.0, 0.0])
|
||||
|
||||
rot_seq = coord_seq_to_rot_seq(coord_seq=ground_truth, frame_duration=1 / fps)
|
||||
|
||||
rot_seq_to_deepmimic_json(rot_seq=rot_seq, loop=loop, json_path=json_path)
|
||||
|
||||
bc = BulletClient(connection_mode=pybullet.GUI)
|
||||
bc.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP,1)
|
||||
bc.setGravity(0,-9.8,0)
|
||||
motion=MotionCaptureData()
|
||||
bc.configureDebugVisualizer(bc.COV_ENABLE_Y_AXIS_UP, 1)
|
||||
bc.setGravity(0, -9.8, 0)
|
||||
motion = MotionCaptureData()
|
||||
|
||||
motionPath = json_path
|
||||
motion.Load(motionPath)
|
||||
print("numFrames = ", motion.NumFrames())
|
||||
|
||||
simTimeId = bc.addUserDebugParameter("simTime", 0, motion.NumFrames() - 1.1, 0)
|
||||
|
||||
y2zOrn = bc.getQuaternionFromEuler([-1.57, 0, 0])
|
||||
bc.loadURDF("plane.urdf", [0, -0.04, 0], y2zOrn)
|
||||
|
||||
simTimeId= bc.addUserDebugParameter("simTime",0,motion.NumFrames()-1.1,0)
|
||||
|
||||
y2zOrn = bc.getQuaternionFromEuler([-1.57,0,0])
|
||||
bc.loadURDF("plane.urdf",[0,-0.04,0], y2zOrn)
|
||||
|
||||
humanoid = Humanoid(bc, motion, [0,0,0]) #这是初始位置的坐标
|
||||
|
||||
|
||||
humanoid = Humanoid(bc, motion, [0, 0, 0]) #这是初始位置的坐标
|
||||
|
||||
print(p.getBasePositionAndOrientation(humanoid._humanoid))
|
||||
|
||||
|
||||
|
||||
simTime = 0
|
||||
keyFrameDuration = motion.KeyFrameDuraction()
|
||||
print("keyFrameDuration=",keyFrameDuration)
|
||||
print("keyFrameDuration=", keyFrameDuration)
|
||||
for utNum in range(motion.NumFrames()):
|
||||
bc.stepSimulation()
|
||||
humanoid.RenderReference(utNum * keyFrameDuration)
|
||||
if draw_gt:
|
||||
draw_ground_truth(coord_seq = ground_truth,
|
||||
frame = utNum,
|
||||
duration = keyFrameDuration,
|
||||
shift = [-1.0, 0.0, 1.0])
|
||||
time.sleep(0.001)
|
||||
bc.stepSimulation()
|
||||
humanoid.RenderReference(utNum * keyFrameDuration)
|
||||
if draw_gt:
|
||||
draw_ground_truth(coord_seq=ground_truth,
|
||||
frame=utNum,
|
||||
duration=keyFrameDuration,
|
||||
shift=[-1.0, 0.0, 1.0])
|
||||
time.sleep(0.001)
|
||||
stage = 0
|
||||
|
||||
|
||||
|
||||
def Reset(humanoid):
|
||||
global simTime
|
||||
humanoid.Reset()
|
||||
simTime = 0
|
||||
humanoid.SetSimTime(simTime)
|
||||
pose = humanoid.InitializePoseFromMotionData()
|
||||
humanoid.ApplyPose(pose, True, True, humanoid._humanoid,bc)
|
||||
global simTime
|
||||
humanoid.Reset()
|
||||
simTime = 0
|
||||
humanoid.SetSimTime(simTime)
|
||||
pose = humanoid.InitializePoseFromMotionData()
|
||||
humanoid.ApplyPose(pose, True, True, humanoid._humanoid, bc)
|
||||
|
||||
|
||||
Reset(humanoid)
|
||||
p.disconnect()
|
||||
|
||||
|
||||
|
||||
@@ -1,81 +1,82 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Skeleton:
|
||||
def __init__(self, parents, joints_left, joints_right):
|
||||
assert len(joints_left) == len(joints_right)
|
||||
|
||||
self._parents = np.array(parents)
|
||||
self._joints_left = joints_left
|
||||
self._joints_right = joints_right
|
||||
self._compute_metadata()
|
||||
|
||||
def num_joints(self):
|
||||
return len(self._parents)
|
||||
|
||||
def parents(self):
|
||||
return self._parents
|
||||
|
||||
def has_children(self):
|
||||
return self._has_children
|
||||
|
||||
def children(self):
|
||||
return self._children
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
"""
|
||||
|
||||
def __init__(self, parents, joints_left, joints_right):
|
||||
assert len(joints_left) == len(joints_right)
|
||||
|
||||
self._parents = np.array(parents)
|
||||
self._joints_left = joints_left
|
||||
self._joints_right = joints_right
|
||||
self._compute_metadata()
|
||||
|
||||
def num_joints(self):
|
||||
return len(self._parents)
|
||||
|
||||
def parents(self):
|
||||
return self._parents
|
||||
|
||||
def has_children(self):
|
||||
return self._has_children
|
||||
|
||||
def children(self):
|
||||
return self._children
|
||||
|
||||
def remove_joints(self, joints_to_remove):
|
||||
"""
|
||||
Remove the joints specified in 'joints_to_remove'.
|
||||
"""
|
||||
valid_joints = []
|
||||
for joint in range(len(self._parents)):
|
||||
if joint not in joints_to_remove:
|
||||
valid_joints.append(joint)
|
||||
valid_joints = []
|
||||
for joint in range(len(self._parents)):
|
||||
if joint not in joints_to_remove:
|
||||
valid_joints.append(joint)
|
||||
|
||||
for i in range(len(self._parents)):
|
||||
while self._parents[i] in joints_to_remove:
|
||||
self._parents[i] = self._parents[self._parents[i]]
|
||||
|
||||
index_offsets = np.zeros(len(self._parents), dtype=int)
|
||||
new_parents = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
if i not in joints_to_remove:
|
||||
new_parents.append(parent - index_offsets[parent])
|
||||
else:
|
||||
index_offsets[i:] += 1
|
||||
self._parents = np.array(new_parents)
|
||||
|
||||
|
||||
if self._joints_left is not None:
|
||||
new_joints_left = []
|
||||
for joint in self._joints_left:
|
||||
if joint in valid_joints:
|
||||
new_joints_left.append(joint - index_offsets[joint])
|
||||
self._joints_left = new_joints_left
|
||||
if self._joints_right is not None:
|
||||
new_joints_right = []
|
||||
for joint in self._joints_right:
|
||||
if joint in valid_joints:
|
||||
new_joints_right.append(joint - index_offsets[joint])
|
||||
self._joints_right = new_joints_right
|
||||
for i in range(len(self._parents)):
|
||||
while self._parents[i] in joints_to_remove:
|
||||
self._parents[i] = self._parents[self._parents[i]]
|
||||
|
||||
self._compute_metadata()
|
||||
|
||||
return valid_joints
|
||||
|
||||
def joints_left(self):
|
||||
return self._joints_left
|
||||
|
||||
def joints_right(self):
|
||||
return self._joints_right
|
||||
|
||||
def _compute_metadata(self):
|
||||
self._has_children = np.zeros(len(self._parents)).astype(bool)
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._has_children[parent] = True
|
||||
index_offsets = np.zeros(len(self._parents), dtype=int)
|
||||
new_parents = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
if i not in joints_to_remove:
|
||||
new_parents.append(parent - index_offsets[parent])
|
||||
else:
|
||||
index_offsets[i:] += 1
|
||||
self._parents = np.array(new_parents)
|
||||
|
||||
self._children = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
self._children.append([])
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._children[parent].append(i)
|
||||
if self._joints_left is not None:
|
||||
new_joints_left = []
|
||||
for joint in self._joints_left:
|
||||
if joint in valid_joints:
|
||||
new_joints_left.append(joint - index_offsets[joint])
|
||||
self._joints_left = new_joints_left
|
||||
if self._joints_right is not None:
|
||||
new_joints_right = []
|
||||
for joint in self._joints_right:
|
||||
if joint in valid_joints:
|
||||
new_joints_right.append(joint - index_offsets[joint])
|
||||
self._joints_right = new_joints_right
|
||||
|
||||
self._compute_metadata()
|
||||
|
||||
return valid_joints
|
||||
|
||||
def joints_left(self):
|
||||
return self._joints_left
|
||||
|
||||
def joints_right(self):
|
||||
return self._joints_right
|
||||
|
||||
def _compute_metadata(self):
|
||||
self._has_children = np.zeros(len(self._parents)).astype(bool)
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._has_children[parent] = True
|
||||
|
||||
self._children = []
|
||||
for i, parent in enumerate(self._parents):
|
||||
self._children.append([])
|
||||
for i, parent in enumerate(self._parents):
|
||||
if parent != -1:
|
||||
self._children[parent].append(i)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,21 +3,23 @@ import subprocess
|
||||
from pybullet_utils.arg_parser import ArgParser
|
||||
from pybullet_utils.logger import Logger
|
||||
|
||||
|
||||
def main():
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
# Command line arguments
|
||||
args = sys.argv[1:]
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
|
||||
num_workers = arg_parser.parse_int('num_workers', 1)
|
||||
assert(num_workers > 0)
|
||||
num_workers = arg_parser.parse_int('num_workers', 1)
|
||||
assert (num_workers > 0)
|
||||
|
||||
Logger.print2('Running with {:d} workers'.format(num_workers))
|
||||
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
|
||||
cmd += ' '.join(args)
|
||||
Logger.print2('cmd: ' + cmd)
|
||||
subprocess.call(cmd, shell=True)
|
||||
return
|
||||
|
||||
Logger.print2('Running with {:d} workers'.format(num_workers))
|
||||
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
|
||||
cmd += ' '.join(args)
|
||||
Logger.print2('cmd: ' + cmd)
|
||||
subprocess.call(cmd, shell=True)
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,8 +2,8 @@ import os
|
||||
import inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
print("parentdir=",parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
print("parentdir=", parentdir)
|
||||
import json
|
||||
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||
from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
|
||||
@@ -15,81 +15,78 @@ from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMim
|
||||
import sys
|
||||
import random
|
||||
|
||||
update_timestep = 1./240.
|
||||
update_timestep = 1. / 240.
|
||||
animating = True
|
||||
|
||||
|
||||
def update_world(world, time_elapsed):
|
||||
timeStep = update_timestep
|
||||
world.update(timeStep)
|
||||
reward = world.env.calc_reward(agent_id=0)
|
||||
#print("reward=",reward)
|
||||
end_episode = world.env.is_episode_end()
|
||||
if (end_episode):
|
||||
world.end_episode()
|
||||
world.reset()
|
||||
return
|
||||
timeStep = update_timestep
|
||||
world.update(timeStep)
|
||||
reward = world.env.calc_reward(agent_id=0)
|
||||
#print("reward=",reward)
|
||||
end_episode = world.env.is_episode_end()
|
||||
if (end_episode):
|
||||
world.end_episode()
|
||||
world.reset()
|
||||
return
|
||||
|
||||
|
||||
def build_arg_parser(args):
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
arg_parser = ArgParser()
|
||||
arg_parser.load_args(args)
|
||||
|
||||
arg_file = arg_parser.parse_string('arg_file', '')
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath() + "/args/" + arg_file
|
||||
succ = arg_parser.load_file(path)
|
||||
Logger.print2(arg_file)
|
||||
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||
return arg_parser
|
||||
|
||||
arg_file = arg_parser.parse_string('arg_file', '')
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath()+"/args/"+arg_file
|
||||
succ = arg_parser.load_file(path)
|
||||
Logger.print2(arg_file)
|
||||
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||
return arg_parser
|
||||
|
||||
args = sys.argv[1:]
|
||||
|
||||
|
||||
|
||||
def build_world(args, enable_draw):
|
||||
arg_parser = build_arg_parser(args)
|
||||
print("enable_draw=",enable_draw)
|
||||
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
|
||||
world = RLWorld(env, arg_parser)
|
||||
#world.env.set_playback_speed(playback_speed)
|
||||
arg_parser = build_arg_parser(args)
|
||||
print("enable_draw=", enable_draw)
|
||||
env = PyBulletDeepMimicEnv(arg_parser, enable_draw)
|
||||
world = RLWorld(env, arg_parser)
|
||||
#world.env.set_playback_speed(playback_speed)
|
||||
|
||||
motion_file = arg_parser.parse_string("motion_file")
|
||||
print("motion_file=",motion_file)
|
||||
bodies = arg_parser.parse_ints("fall_contact_bodies")
|
||||
print("bodies=",bodies)
|
||||
int_output_path = arg_parser.parse_string("int_output_path")
|
||||
print("int_output_path=",int_output_path)
|
||||
agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
|
||||
motion_file = arg_parser.parse_string("motion_file")
|
||||
print("motion_file=", motion_file)
|
||||
bodies = arg_parser.parse_ints("fall_contact_bodies")
|
||||
print("bodies=", bodies)
|
||||
int_output_path = arg_parser.parse_string("int_output_path")
|
||||
print("int_output_path=", int_output_path)
|
||||
agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")
|
||||
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
AGENT_TYPE_KEY = "AgentType"
|
||||
|
||||
print("agent_file=",agent_files)
|
||||
with open(agent_files) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
print("json_data=",json_data)
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
print("agent_type=",agent_type)
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
|
||||
agent.set_enable_training(False)
|
||||
world.reset()
|
||||
return world
|
||||
print("agent_file=", agent_files)
|
||||
with open(agent_files) as data_file:
|
||||
json_data = json.load(data_file)
|
||||
print("json_data=", json_data)
|
||||
assert AGENT_TYPE_KEY in json_data
|
||||
agent_type = json_data[AGENT_TYPE_KEY]
|
||||
print("agent_type=", agent_type)
|
||||
agent = PPOAgent(world, id, json_data)
|
||||
|
||||
agent.set_enable_training(False)
|
||||
world.reset()
|
||||
return world
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
world = build_world(args, True)
|
||||
while (world.env._pybullet_client.isConnected()):
|
||||
timeStep = update_timestep
|
||||
keys = world.env.getKeyboardEvents()
|
||||
|
||||
|
||||
if world.env.isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
if (animating):
|
||||
update_world(world, timeStep)
|
||||
#animating=False
|
||||
|
||||
world = build_world(args, True)
|
||||
while (world.env._pybullet_client.isConnected()):
|
||||
timeStep = update_timestep
|
||||
keys = world.env.getKeyboardEvents()
|
||||
|
||||
if world.env.isKeyTriggered(keys, ' '):
|
||||
animating = not animating
|
||||
if (animating):
|
||||
update_world(world, timeStep)
|
||||
#animating=False
|
||||
|
||||
@@ -5,134 +5,134 @@ from pybullet_utils import bullet_client
|
||||
|
||||
from pkg_resources import parse_version
|
||||
|
||||
|
||||
class MJCFBaseBulletEnv(gym.Env):
|
||||
"""
|
||||
"""
|
||||
Base class for Bullet physics simulation loading MJCF (MuJoCo .xml) environments in a Scene.
|
||||
These environments create single-player scenes and behave like normal Gym environments, if
|
||||
you don't use multiplayer.
|
||||
"""
|
||||
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second': 60
|
||||
}
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60}
|
||||
|
||||
def __init__(self, robot, render=False):
|
||||
self.scene = None
|
||||
self.physicsClientId = -1
|
||||
self.ownsPhysicsClient = 0
|
||||
self.camera = Camera()
|
||||
self.isRender = render
|
||||
self.robot = robot
|
||||
self.seed()
|
||||
self._cam_dist = 3
|
||||
self._cam_yaw = 0
|
||||
self._cam_pitch = -30
|
||||
self._render_width =320
|
||||
self._render_height = 240
|
||||
def __init__(self, robot, render=False):
|
||||
self.scene = None
|
||||
self.physicsClientId = -1
|
||||
self.ownsPhysicsClient = 0
|
||||
self.camera = Camera()
|
||||
self.isRender = render
|
||||
self.robot = robot
|
||||
self.seed()
|
||||
self._cam_dist = 3
|
||||
self._cam_yaw = 0
|
||||
self._cam_pitch = -30
|
||||
self._render_width = 320
|
||||
self._render_height = 240
|
||||
|
||||
self.action_space = robot.action_space
|
||||
self.observation_space = robot.observation_space
|
||||
self.action_space = robot.action_space
|
||||
self.observation_space = robot.observation_space
|
||||
|
||||
def configure(self, args):
|
||||
self.robot.args = args
|
||||
def configure(self, args):
|
||||
self.robot.args = args
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
||||
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
||||
return [seed]
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = gym.utils.seeding.np_random(seed)
|
||||
self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
|
||||
return [seed]
|
||||
|
||||
def reset(self):
|
||||
if (self.physicsClientId<0):
|
||||
self.ownsPhysicsClient = True
|
||||
def reset(self):
|
||||
if (self.physicsClientId < 0):
|
||||
self.ownsPhysicsClient = True
|
||||
|
||||
if self.isRender:
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
if self.isRender:
|
||||
self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
|
||||
self.physicsClientId = self._p._client
|
||||
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0)
|
||||
self.physicsClientId = self._p._client
|
||||
self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
|
||||
|
||||
if self.scene is None:
|
||||
self.scene = self.create_single_player_scene(self._p)
|
||||
if not self.scene.multiplayer and self.ownsPhysicsClient:
|
||||
self.scene.episode_restart(self._p)
|
||||
if self.scene is None:
|
||||
self.scene = self.create_single_player_scene(self._p)
|
||||
if not self.scene.multiplayer and self.ownsPhysicsClient:
|
||||
self.scene.episode_restart(self._p)
|
||||
|
||||
self.robot.scene = self.scene
|
||||
self.robot.scene = self.scene
|
||||
|
||||
self.frame = 0
|
||||
self.done = 0
|
||||
self.reward = 0
|
||||
dump = 0
|
||||
s = self.robot.reset(self._p)
|
||||
self.potential = self.robot.calc_potential()
|
||||
return s
|
||||
self.frame = 0
|
||||
self.done = 0
|
||||
self.reward = 0
|
||||
dump = 0
|
||||
s = self.robot.reset(self._p)
|
||||
self.potential = self.robot.calc_potential()
|
||||
return s
|
||||
|
||||
def render(self, mode='human', close=False):
|
||||
if mode == "human":
|
||||
self.isRender = True
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
def render(self, mode='human', close=False):
|
||||
if mode == "human":
|
||||
self.isRender = True
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
|
||||
base_pos=[0,0,0]
|
||||
if (hasattr(self,'robot')):
|
||||
if (hasattr(self.robot,'body_xyz')):
|
||||
base_pos = self.robot.body_xyz
|
||||
base_pos = [0, 0, 0]
|
||||
if (hasattr(self, 'robot')):
|
||||
if (hasattr(self.robot, 'body_xyz')):
|
||||
base_pos = self.robot.body_xyz
|
||||
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(
|
||||
fov=60, aspect=float(self._render_width)/self._render_height,
|
||||
nearVal=0.1, farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=self._render_width, height=self._render_height, viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL
|
||||
)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(self._render_width) /
|
||||
self._render_height,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(width=self._render_width,
|
||||
height=self._render_height,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix,
|
||||
renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
|
||||
rgb_array = np.array(px)
|
||||
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
def close(self):
|
||||
if (self.ownsPhysicsClient):
|
||||
if (self.physicsClientId >= 0):
|
||||
self._p.disconnect()
|
||||
self.physicsClientId = -1
|
||||
|
||||
def close(self):
|
||||
if (self.ownsPhysicsClient):
|
||||
if (self.physicsClientId>=0):
|
||||
self._p.disconnect()
|
||||
self.physicsClientId = -1
|
||||
def HUD(self, state, a, done):
|
||||
pass
|
||||
|
||||
def HUD(self, state, a, done):
|
||||
pass
|
||||
# def step(self, *args, **kwargs):
|
||||
# if self.isRender:
|
||||
# base_pos=[0,0,0]
|
||||
# if (hasattr(self,'robot')):
|
||||
# if (hasattr(self.robot,'body_xyz')):
|
||||
# base_pos = self.robot.body_xyz
|
||||
# # Keep the previous orientation of the camera set by the user.
|
||||
# #[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
|
||||
# self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
|
||||
#
|
||||
#
|
||||
# return self.step(*args, **kwargs)
|
||||
if parse_version(gym.__version__) < parse_version('0.9.6'):
|
||||
_render = render
|
||||
_reset = reset
|
||||
_seed = seed
|
||||
|
||||
# def step(self, *args, **kwargs):
|
||||
# if self.isRender:
|
||||
# base_pos=[0,0,0]
|
||||
# if (hasattr(self,'robot')):
|
||||
# if (hasattr(self.robot,'body_xyz')):
|
||||
# base_pos = self.robot.body_xyz
|
||||
# # Keep the previous orientation of the camera set by the user.
|
||||
# #[yaw, pitch, dist] = self._p.getDebugVisualizerCamera()[8:11]
|
||||
# self._p.resetDebugVisualizerCamera(3,0,0, base_pos)
|
||||
#
|
||||
#
|
||||
# return self.step(*args, **kwargs)
|
||||
if parse_version(gym.__version__) < parse_version('0.9.6'):
|
||||
_render = render
|
||||
_reset = reset
|
||||
_seed = seed
|
||||
|
||||
class Camera:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def move_and_look_at(self,i,j,k,x,y,z):
|
||||
lookat = [x,y,z]
|
||||
distance = 10
|
||||
yaw = 10
|
||||
self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def move_and_look_at(self, i, j, k, x, y, z):
|
||||
lookat = [x, y, z]
|
||||
distance = 10
|
||||
yaw = 10
|
||||
self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
|
||||
|
||||
@@ -7,31 +7,38 @@ import time
|
||||
p = bc.BulletClient(connection_mode=pybullet.GUI)
|
||||
p.setAdditionalSearchPath(pd.getDataPath())
|
||||
p.loadURDF("plane_transparent.urdf", useMaximalCoordinates=True)
|
||||
p#.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)
|
||||
p #.setPhysicsEngineParameter(numSolverIterations=10, fixedTimeStep=0.01)
|
||||
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION, 1)
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 0)
|
||||
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_PLANAR_REFLECTION,1)
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,0)
|
||||
y2z = p.getQuaternionFromEuler([0, 0, 1.57])
|
||||
meshScale = [1, 1, 1]
|
||||
visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,
|
||||
fileName="domino/domino.obj",
|
||||
rgbaColor=[1, 1, 1, 1],
|
||||
specularColor=[0.4, .4, 0],
|
||||
visualFrameOrientation=y2z,
|
||||
meshScale=meshScale)
|
||||
|
||||
y2z = p.getQuaternionFromEuler([0,0,1.57])
|
||||
meshScale = [1,1,1]
|
||||
visualShapeId = p.createVisualShape(shapeType=p.GEOM_MESH,fileName="domino/domino.obj", rgbaColor=[1,1,1,1], specularColor=[0.4,.4,0], visualFrameOrientation=y2z, meshScale=meshScale)
|
||||
boxDimensions = [0.5 * 0.00635, 0.5 * 0.0254, 0.5 * 0.0508]
|
||||
collisionShapeId = p.createCollisionShape(p.GEOM_BOX, halfExtents=boxDimensions)
|
||||
|
||||
boxDimensions = [0.5*0.00635, 0.5*0.0254, 0.5*0.0508]
|
||||
collisionShapeId = p.createCollisionShape(p.GEOM_BOX,halfExtents=boxDimensions)
|
||||
|
||||
|
||||
for j in range (12):
|
||||
print("j=",j)
|
||||
for i in range (35):
|
||||
for j in range(12):
|
||||
print("j=", j)
|
||||
for i in range(35):
|
||||
#p.loadURDF("domino/domino.urdf",[i*0.04,0, 0.06])
|
||||
p.createMultiBody(baseMass=0.025,baseCollisionShapeIndex = collisionShapeId,baseVisualShapeIndex = visualShapeId, basePosition = [i*0.04,j*0.05, 0.06], useMaximalCoordinates=True)
|
||||
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
|
||||
p.createMultiBody(baseMass=0.025,
|
||||
baseCollisionShapeIndex=collisionShapeId,
|
||||
baseVisualShapeIndex=visualShapeId,
|
||||
basePosition=[i * 0.04, j * 0.05, 0.06],
|
||||
useMaximalCoordinates=True)
|
||||
|
||||
p.setGravity(0,0,-9.8)
|
||||
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, 1)
|
||||
|
||||
p.setGravity(0, 0, -9.8)
|
||||
p.setRealTimeSimulation(1)
|
||||
while (1):
|
||||
p.setGravity(0,0,-9.8)
|
||||
p.setGravity(0, 0, -9.8)
|
||||
#p.stepSimulation(1./100.)
|
||||
time.sleep(1./240.)
|
||||
time.sleep(1. / 240.)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,179 +2,582 @@
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0,parentdir)
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
import pybullet_envs
|
||||
import time
|
||||
|
||||
|
||||
def relu(x):
|
||||
return np.maximum(x, 0)
|
||||
return np.maximum(x, 0)
|
||||
|
||||
|
||||
class SmallReactivePolicy:
|
||||
"Simple multi-layer perceptron policy, no internal state"
|
||||
def __init__(self, observation_space, action_space):
|
||||
assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
|
||||
assert weights_dense2_w.shape == (64.0, 32.0)
|
||||
assert weights_final_w.shape == (32.0, action_space.shape[0])
|
||||
"Simple multi-layer perceptron policy, no internal state"
|
||||
|
||||
def __init__(self, observation_space, action_space):
|
||||
assert weights_dense1_w.shape == (observation_space.shape[0], 64.0)
|
||||
assert weights_dense2_w.shape == (64.0, 32.0)
|
||||
assert weights_final_w.shape == (32.0, action_space.shape[0])
|
||||
|
||||
def act(self, ob):
|
||||
x = ob
|
||||
x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
|
||||
x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
|
||||
x = np.dot(x, weights_final_w) + weights_final_b
|
||||
return x
|
||||
|
||||
def act(self, ob):
|
||||
x = ob
|
||||
x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
|
||||
x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
|
||||
x = np.dot(x, weights_final_w) + weights_final_b
|
||||
return x
|
||||
|
||||
def main():
|
||||
env = gym.make("InvertedDoublePendulumBulletEnv-v0")
|
||||
env.render(mode="human")
|
||||
|
||||
pi = SmallReactivePolicy(env.observation_space, env.action_space)
|
||||
env = gym.make("InvertedDoublePendulumBulletEnv-v0")
|
||||
env.render(mode="human")
|
||||
|
||||
pi = SmallReactivePolicy(env.observation_space, env.action_space)
|
||||
|
||||
while 1:
|
||||
frame = 0
|
||||
score = 0
|
||||
restart_delay = 0
|
||||
obs = env.reset()
|
||||
|
||||
while 1:
|
||||
frame = 0
|
||||
score = 0
|
||||
restart_delay = 0
|
||||
obs = env.reset()
|
||||
time.sleep(1. / 60.)
|
||||
a = pi.act(obs)
|
||||
obs, r, done, _ = env.step(a)
|
||||
score += r
|
||||
frame += 1
|
||||
still_open = env.render("human")
|
||||
if still_open == False:
|
||||
return
|
||||
if not done: continue
|
||||
if restart_delay == 0:
|
||||
print("score=%0.2f in %i frames" % (score, frame))
|
||||
restart_delay = 60 * 2 # 2 sec at 60 fps
|
||||
else:
|
||||
restart_delay -= 1
|
||||
if restart_delay > 0: continue
|
||||
break
|
||||
|
||||
while 1:
|
||||
time.sleep(1./60.)
|
||||
a = pi.act(obs)
|
||||
obs, r, done, _ = env.step(a)
|
||||
score += r
|
||||
frame += 1
|
||||
still_open = env.render("human")
|
||||
if still_open==False:
|
||||
return
|
||||
if not done: continue
|
||||
if restart_delay==0:
|
||||
print("score=%0.2f in %i frames" % (score, frame))
|
||||
restart_delay = 60*2 # 2 sec at 60 fps
|
||||
else:
|
||||
restart_delay -= 1
|
||||
if restart_delay > 0: continue
|
||||
break
|
||||
|
||||
weights_dense1_w = np.array([
|
||||
[ -0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635, +0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103, -0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873, +0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775, +0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772, -0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238, +0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136, +0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053],
|
||||
[ +0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329, +0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789, +0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111, -0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025, -0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544, -0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160, -0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338, +0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542, +0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497],
|
||||
[ +0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275, -0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475, +0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116, +0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376, -0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006, +0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254, -0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988, -0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917, +0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461],
|
||||
[ -0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620, -0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623, -0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583, -0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976, +0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028, -0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796, -0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178, +0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677, -0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499],
|
||||
[ -0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923, +0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939, +0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605, +0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779, -1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488, +0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217, +0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856, +0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319, -1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379],
|
||||
[ -0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654, -0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597, +0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612, -0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645, -0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316, -0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299, +0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317, +0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417, +0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510],
|
||||
[ -0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278, -0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225, -0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054, -0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766, -0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353, +0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276, -0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668, -0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096, +0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095],
|
||||
[ -0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748, -0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466, +0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886, +0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910, +1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375, +0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775, +0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551, +2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858, -0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526],
|
||||
[ -0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696, +0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166, +0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092, -0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364, +0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155, +1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972, +0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436, +0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860, -0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653]
|
||||
# yapf: disable
|
||||
weights_dense1_w = np.array(
|
||||
[[
|
||||
-0.5857, +0.1810, +0.2839, +0.1278, -0.4302, -0.3152, +0.5916, -0.0635,
|
||||
+0.6259, +0.2873, -0.0572, -0.3538, -0.8121, +0.2707, +0.1656, -0.2103,
|
||||
-0.1614, -0.2789, -0.5856, -0.4733, +0.1838, +0.1063, +0.7629, +0.0873,
|
||||
+0.1480, +0.1768, +0.6522, +0.1158, -0.0816, +0.6542, -0.8870, +0.1775,
|
||||
+0.1532, +0.2268, -0.0313, -0.0470, +0.5328, -0.0570, +0.4820, -0.3772,
|
||||
-0.7581, +0.2835, -0.3566, +0.9371, -0.0441, -0.1797, -0.2859, -0.0238,
|
||||
+0.0261, -0.0296, -0.1406, +0.2869, +0.1279, +0.6653, +0.5643, -0.3136,
|
||||
+0.7751, +0.2341, +0.1903, +0.8283, -0.0697, +0.1276, -0.0250, -0.0053
|
||||
],
|
||||
[
|
||||
+0.3741, +0.4844, -0.0638, -0.3205, +0.3137, +0.9636, +0.5329,
|
||||
+0.6882, +0.2983, -0.6675, -0.6372, +0.2065, -0.2645, -0.4789,
|
||||
+0.2326, -0.0691, -0.5905, -0.3354, +0.3428, +0.4253, +0.9111,
|
||||
-0.4751, -0.2124, +0.3920, +0.2897, -1.1101, +0.1894, -0.4025,
|
||||
-0.1125, -0.0627, +0.2347, -0.8787, +0.1019, +0.9128, +0.2544,
|
||||
-0.3933, +0.6485, -0.1936, -0.2402, +0.5012, -0.0918, +0.3160,
|
||||
-0.7860, +0.3439, -0.4268, -0.1788, -0.3930, +0.5128, +0.2338,
|
||||
+0.2571, +0.1343, +0.9850, -0.7074, +0.3532, +0.3048, -0.4542,
|
||||
+0.5539, -0.4409, -0.2003, -0.4837, -0.3554, -0.4447, -0.0817, -0.8497
|
||||
],
|
||||
[
|
||||
+0.0825, +0.5847, +0.4837, +0.5144, +0.4770, +0.0199, +0.4275,
|
||||
-0.4530, +0.8499, -0.2840, +0.3817, -0.5098, -0.2155, -0.1475,
|
||||
+0.1145, -0.1871, -0.0526, +0.3583, -0.3537, -0.7111, -0.6116,
|
||||
+0.3406, -0.6360, +0.7786, +0.6628, -0.0493, +0.3505, -0.0376,
|
||||
-0.6556, +1.0748, -0.5329, +0.6477, -0.7117, +0.3723, +0.6006,
|
||||
+0.0171, +0.0012, +0.4910, -0.5651, -0.6868, +0.2403, +0.0254,
|
||||
-0.4416, +0.7534, -0.0138, -1.1298, +0.5447, +0.0974, +0.1988,
|
||||
-0.2161, -0.3126, -0.5731, -0.1278, +0.2995, -0.1200, -0.7917,
|
||||
+0.5326, +0.4562, -0.0144, +0.5717, -0.4065, +0.1494, +0.7100, +0.2461
|
||||
],
|
||||
[
|
||||
-0.2861, +0.4314, -0.2982, -0.1401, -0.1033, +0.5287, -0.6620,
|
||||
-0.3975, +0.0038, +0.1991, -0.7079, -0.9000, +0.1659, +0.3623,
|
||||
-0.0752, -0.1907, -0.2335, -0.5143, +0.2324, -0.0487, +0.1583,
|
||||
-0.5989, +0.5957, +0.2150, -0.0335, +0.2154, +0.3279, -0.7976,
|
||||
+0.5320, -0.4438, +0.2170, -0.3841, -0.0039, -0.0847, -0.0028,
|
||||
-0.4278, -0.2393, -0.9239, +0.2880, -0.1437, -0.0941, -0.0796,
|
||||
-0.3906, -0.3224, +0.1038, -0.1929, -0.2713, -0.4157, -0.2178,
|
||||
+0.5729, -0.2065, +0.0059, +0.3879, +0.0590, +0.1759, +0.0677,
|
||||
-0.0170, -0.2252, +0.3301, -0.0599, +0.3791, -0.1455, +0.2200, -0.0499
|
||||
],
|
||||
[
|
||||
-0.4403, +0.7182, +0.7941, +0.1469, +1.5777, +0.3426, +0.0923,
|
||||
+0.2160, +1.1492, -0.5206, -0.2659, -0.1504, +0.2739, -1.3939,
|
||||
+0.8992, -1.1433, -0.3828, -0.2497, -0.2172, +0.2318, -0.3605,
|
||||
+0.6413, -1.9095, +1.4785, -0.1274, -0.7208, -0.0802, -0.8779,
|
||||
-1.6260, +0.9151, +0.8289, -0.0902, -0.3551, +0.6198, +1.7488,
|
||||
+0.0739, -1.2022, -0.3536, -1.5187, +0.1839, +1.4258, +0.4217,
|
||||
+0.1503, -0.0460, +0.2327, -0.4139, -0.3668, +0.2997, +0.6856,
|
||||
+0.6917, -0.3856, -0.3620, +0.1578, -0.8349, -1.0796, -0.0319,
|
||||
-1.1966, -0.8122, +0.5053, -0.5033, -0.9207, -0.1193, -0.7625, +0.1379
|
||||
],
|
||||
[
|
||||
-0.0321, -0.3206, -0.4516, +0.3420, +1.0964, +0.0311, +0.4654,
|
||||
-0.2367, +0.3347, -0.2798, -0.8169, -0.1555, +0.9397, -0.5597,
|
||||
+0.7113, -0.3642, -0.2840, -0.1323, -0.1000, +0.2283, +0.3612,
|
||||
-0.4784, +0.0504, +0.5310, -0.0887, +0.2926, +0.5069, -0.5645,
|
||||
-0.0976, -0.2594, +0.4425, +0.9223, -0.5637, -0.2336, -0.1316,
|
||||
-0.6564, -0.2780, -0.2409, -0.1637, +0.4506, +0.7018, -0.1299,
|
||||
+0.7172, +0.1207, +0.4375, +0.3836, +0.2781, -0.7792, -0.5317,
|
||||
+0.4510, +0.2423, -0.0588, -0.4254, -0.6381, -0.8205, +0.6417,
|
||||
+0.1904, -0.2618, +0.5900, -0.3899, -0.7851, -0.4769, -0.3688, -0.3510
|
||||
],
|
||||
[
|
||||
-0.8366, -0.3157, -0.1130, +0.2005, +0.3713, -0.4351, -0.1278,
|
||||
-0.5689, +0.3229, -0.5981, -0.4917, -0.4160, -0.5504, +0.2225,
|
||||
-0.1581, -0.6457, +0.1001, -1.0635, +0.2368, +0.2494, -0.4054,
|
||||
-0.1699, -0.1316, +0.2614, +0.3016, +0.4222, -0.1548, -0.0766,
|
||||
-0.5226, -0.3576, -0.2433, -0.5495, +0.0056, +0.0193, +0.2353,
|
||||
+0.3986, +0.3580, -0.7886, +0.3928, +0.1831, +0.4319, +0.2276,
|
||||
-0.3062, +0.0413, -0.4288, +0.1365, +0.3176, +0.3564, +0.5668,
|
||||
-0.4401, -0.9576, -0.1435, +0.0304, -0.5575, +0.0412, -0.1096,
|
||||
+0.2207, +0.1227, -0.0051, +0.5808, -0.1331, +0.1368, +0.4170, -0.8095
|
||||
],
|
||||
[
|
||||
-0.6368, -1.3221, -0.4492, -1.5414, +0.4004, -2.8780, -0.1748,
|
||||
-0.8166, +1.7066, +1.0714, -0.4755, +0.3020, +0.0422, +0.3466,
|
||||
+0.4472, -0.6209, -3.3768, -0.0806, +1.3624, -2.4155, +1.0886,
|
||||
+0.3412, +0.0891, +1.6821, -0.5361, +0.3952, +1.5120, +0.3910,
|
||||
+1.9500, -0.9065, -1.3452, +0.0904, -0.0389, +0.2817, -1.8375,
|
||||
+0.8131, -1.5287, +0.3115, +1.4069, -0.3424, +1.6101, +2.6775,
|
||||
+0.5516, +1.6500, -0.4138, -0.0170, +1.0008, -0.7865, +0.0551,
|
||||
+2.2068, -0.0108, +0.3207, -1.1884, +0.3792, -0.6435, +0.2858,
|
||||
-0.6881, +0.1554, -1.6926, -0.0975, -1.4120, -0.0827, -1.5186, +0.2526
|
||||
],
|
||||
[
|
||||
-0.2900, -0.2805, +0.9182, -0.8893, +0.7345, -0.9015, -0.2696,
|
||||
+0.2344, +0.3889, +0.6790, +0.3657, -0.1995, -0.6738, -0.4166,
|
||||
+0.1690, -0.3798, -0.9872, -0.2558, -0.4205, -0.6190, -0.0092,
|
||||
-0.2261, -0.2738, +0.2977, -0.7348, +0.4872, +0.4776, -0.1364,
|
||||
+0.5836, -0.2688, -0.4261, -0.3612, -0.3533, +0.4665, +0.0155,
|
||||
+1.0116, -0.7139, -0.3707, -0.4429, -0.0383, +0.6716, +0.5972,
|
||||
+0.3506, +0.3294, -1.3734, -0.5905, -0.1168, -0.2609, +0.3436,
|
||||
+0.8277, +0.4965, +0.3005, -0.2929, +0.1501, -0.2655, +0.3860,
|
||||
-0.3946, +0.8764, +0.7927, +0.0541, -1.0912, -0.2006, -0.6928, +0.4653
|
||||
]])
|
||||
|
||||
weights_dense1_b = np.array([
|
||||
-0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653,
|
||||
-0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766,
|
||||
-0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020,
|
||||
+0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861,
|
||||
-0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341,
|
||||
+0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965,
|
||||
+0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015,
|
||||
-0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002
|
||||
])
|
||||
|
||||
weights_dense1_b = np.array([ -0.1146, +0.2897, +0.0137, +0.0822, +0.0367, +0.0951, -0.0657, +0.0653, -0.0729, -0.0501, -0.6380, -0.4403, +0.0660, +0.0693, -0.4353, -0.2766, -0.1258, -0.6947, -0.1616, -0.0453, +0.1498, -0.2340, -0.0764, +0.2020, +0.4812, +0.0908, -0.1883, -0.0753, -0.0373, -0.4172, -0.1071, +0.0861, -0.1550, -0.0648, -0.1473, +0.1507, -0.0121, -0.5468, -0.1529, -0.3341, +0.0239, -0.0463, -0.0044, -0.0541, +0.0384, +0.3028, +0.3378, +0.0965, +0.0740, +0.1948, -0.1655, +0.1558, +0.1367, -0.1514, +0.0540, -0.0015, -0.1256, +0.3402, -0.0273, -0.2239, -0.0073, -0.6246, +0.0755, -0.2002])
|
||||
|
||||
weights_dense2_w = np.array([
|
||||
[ +0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487, -0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587, +0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651, +0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464],
|
||||
[ -0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537, +0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456, -1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150, +0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069],
|
||||
[ -0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368, -0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538, -0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137, +0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103],
|
||||
[ +0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471, -0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623, -0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380, +0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408],
|
||||
[ -0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138, -0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430, +0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884, +0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970],
|
||||
[ -0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775, +0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001, -2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046, -0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834],
|
||||
[ +0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120, -0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580, -0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292, +0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216],
|
||||
[ -0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024, -0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753, -0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135, -0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326],
|
||||
[ -0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622, -0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433, +0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523, -0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888],
|
||||
[ -0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912, -0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493, +0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154, +0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376],
|
||||
[ +0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421, +0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814, -0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114, +0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472],
|
||||
[ -0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903, +0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340, -0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040, +0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559],
|
||||
[ -2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148, +0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294, -0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810, -0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121],
|
||||
[ +0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243, -0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378, +0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373, -0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342],
|
||||
[ -0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869, -0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602, +0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859, +0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606],
|
||||
[ -0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674, +0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866, +0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561, -0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292],
|
||||
[ +0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751, -0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726, -0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577, +0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703],
|
||||
[ +0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834, -0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480, -1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690, +0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458],
|
||||
[ -0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322, -0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259, +0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802, -0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385],
|
||||
[ -0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096, -0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244, -0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607, -0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395],
|
||||
[ -0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933, -0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092, +0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312, -1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362],
|
||||
[ -0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326, +0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396, -0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956, -0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031],
|
||||
[ -0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019, +0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483, +0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817, +0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293],
|
||||
[ -0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420, -0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275, +0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132, +0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180],
|
||||
[ +0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570, +0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583, -0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517, +0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957],
|
||||
[ -0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281, -0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552, +0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115, +0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348],
|
||||
[ +0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334, -0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206, +0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920, +0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610],
|
||||
[ -0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155, +0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819, -0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387, -0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126],
|
||||
[ -0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029, +0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843, +0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216, -0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118],
|
||||
[ +0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258, -0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226, -0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867, +0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963],
|
||||
[ -0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855, -0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356, -0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960, +0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031],
|
||||
[ -0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370, +0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931, -1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618, +0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057],
|
||||
[ -0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112, +0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134, -0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309, -0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595],
|
||||
[ -0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012, -0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965, -0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998, -0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348],
|
||||
[ -0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324, -0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282, -0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981, +0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953],
|
||||
[ -1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996, +0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737, +0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547, -0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028],
|
||||
[ -0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505, -0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640, +0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320, -0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598],
|
||||
[ +0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197, +0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527, -0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201, +0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352],
|
||||
[ +0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081, -0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971, +0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751, -0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704],
|
||||
[ +0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459, -0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804, +0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337, +0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116],
|
||||
[ -0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673, -0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075, +0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300, +0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208],
|
||||
[ -0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458, +0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083, +0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889, -0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233],
|
||||
[ -1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200, +0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202, -0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213, -0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751],
|
||||
[ -0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622, -0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433, -0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791, -0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889],
|
||||
[ -0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755, -0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882, -1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337, +0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094],
|
||||
[ -0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384, +0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254, -1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178, -0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780],
|
||||
[ -0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675, +0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719, +0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575, +0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501],
|
||||
[ +0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824, -0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309, -1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339, -0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057],
|
||||
[ -1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777, +0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094, +0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276, +0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592],
|
||||
[ -0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112, +0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131, +0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610, +0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988],
|
||||
[ +0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340, -0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615, -0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777, -0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007],
|
||||
[ -0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609, -0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724, -0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266, -0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877],
|
||||
[ +0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262, -0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311, +0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197, +0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254],
|
||||
[ -0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151, -0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641, -0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869, +0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954],
|
||||
[ -0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488, -0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223, -0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526, -0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021],
|
||||
[ -1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977, +0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785, +0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432, +0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737],
|
||||
[ +0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536, +0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048, +0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776, -0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029],
|
||||
[ -0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881, -0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195, -0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149, +0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349],
|
||||
[ +0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919, -0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240, +0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926, +0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688],
|
||||
[ +0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318, +0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741, +0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212, +0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240],
|
||||
[ -0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519, -0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904, -0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612, +0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115],
|
||||
[ -0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213, -0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276, +0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660, -0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325],
|
||||
[ +0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695, +0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828, -0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883, +0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273],
|
||||
[ +0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365, -0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427, -0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372, +0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229]
|
||||
[
|
||||
+0.5019, +0.3831, +0.6726, +0.3767, +0.2021, -0.1615, +0.3882, -0.0487,
|
||||
-0.2713, +0.1173, -0.2218, +0.0598, +0.0819, -0.1157, +0.5879, -0.3587,
|
||||
+0.1376, -0.2595, +0.0257, -0.1182, +0.0723, +0.5612, -0.4087, -0.4651,
|
||||
+0.0631, +0.1786, +0.1206, +0.4791, +0.5922, -0.4444, +0.3446, -0.0464
|
||||
],
|
||||
[
|
||||
-0.0485, +0.0739, -0.6915, +0.5446, -0.2461, +0.1557, +0.8993, -0.7537,
|
||||
+0.1149, +0.0575, -0.1714, -0.3796, +0.3508, -0.2315, +0.4389, -1.4456,
|
||||
-1.3490, -0.1598, -1.0354, -0.2320, -0.3765, +0.1070, -0.7107, +0.4150,
|
||||
+0.2711, -0.2915, -0.7957, +0.7753, -0.0425, -0.1352, +0.3018, -0.0069
|
||||
],
|
||||
[
|
||||
-0.4047, +1.0040, -0.4570, +0.3017, +0.1477, -0.0163, +0.4087, -0.6368,
|
||||
-0.0764, -0.0695, +0.0208, -0.2411, +0.1936, +0.0047, +0.0107, -0.8538,
|
||||
-0.5887, -0.0524, -1.4902, +0.2858, +0.4396, -0.3433, -0.6778, -0.7137,
|
||||
+0.4587, +0.3359, -0.7350, -1.0813, -0.1296, +0.1748, -0.3830, -0.2103
|
||||
],
|
||||
[
|
||||
+0.0503, -0.3342, -0.6057, +0.2217, +0.3164, -0.1881, -0.5867, -0.2471,
|
||||
-0.2527, -0.0444, +0.1874, -0.0960, +0.2039, -0.0488, +0.1741, -0.1623,
|
||||
-0.0758, -0.2354, -0.5986, -0.2129, -0.2470, +0.3317, -0.4795, -0.6380,
|
||||
+0.1494, +0.0115, -0.2746, -0.8428, -0.0118, -0.0604, +0.0886, -0.0408
|
||||
],
|
||||
[
|
||||
-0.1932, -1.3896, +0.3919, -0.4700, -0.9806, -0.1554, +0.3132, +0.4138,
|
||||
-0.4943, -0.1408, -0.0976, +0.1551, -0.0180, +0.0864, -0.0053, -0.2430,
|
||||
+0.4948, +0.2709, -0.3488, +0.2085, -0.2124, -0.3025, -0.0692, +0.3884,
|
||||
+0.5764, +0.5783, +0.4351, -0.2633, -0.9288, +0.2218, -0.9049, -0.2970
|
||||
],
|
||||
[
|
||||
-0.2841, -0.3393, -0.1062, -0.1415, +0.0257, +0.0816, -0.4833, -0.2775,
|
||||
+0.0308, -0.0344, +0.5451, +0.1588, -0.7454, -0.1444, +0.4189, -0.2001,
|
||||
-2.0586, -0.0616, -1.4463, +0.0076, -0.7703, +0.3279, -0.7009, +0.6046,
|
||||
-0.1615, -0.5188, -0.7503, +0.0615, +0.1815, -0.2512, +0.0321, -0.1834
|
||||
],
|
||||
[
|
||||
+0.3751, +0.2932, -0.6815, +0.3771, +0.0603, -0.2035, -0.2644, -1.0120,
|
||||
-0.0661, -0.0846, +0.1209, +0.0367, +0.0493, -0.2603, -0.1608, -0.7580,
|
||||
-0.8609, +0.1415, -0.7626, -1.0209, -0.7498, -0.0732, -0.8138, -0.2292,
|
||||
+0.5803, -0.2718, -1.4684, -0.1584, +0.2096, +0.1336, +0.3632, +0.0216
|
||||
],
|
||||
[
|
||||
-0.0625, -0.1233, -0.2715, +0.5541, +0.3121, +0.0265, +0.4501, -1.1024,
|
||||
-0.1160, -0.1005, -0.0844, -0.0516, +0.0916, +0.0901, +0.3710, -0.5753,
|
||||
-0.3728, -0.1103, -0.6285, -0.2179, +0.1570, +0.1168, -0.9312, +0.0135,
|
||||
-0.0376, -0.1693, -0.5358, -0.0028, +0.2105, -0.7373, +0.2776, +0.2326
|
||||
],
|
||||
[
|
||||
-0.5378, -0.3201, +0.3606, +0.1331, +0.0120, -0.2421, -0.0230, +0.4622,
|
||||
-0.3140, +0.0803, -0.6897, -0.4704, +0.2685, +0.0803, -0.7654, -0.1433,
|
||||
+0.0242, +0.0917, +0.2458, +0.0457, -0.2503, -0.1197, +0.1454, -0.1523,
|
||||
-0.4095, +0.1856, +0.0678, -1.0011, +0.0117, +0.1789, -0.4063, -0.0888
|
||||
],
|
||||
[
|
||||
-0.6352, -0.6358, -0.2066, +0.0758, -0.2973, -0.3014, -0.0556, -0.0912,
|
||||
-0.2729, -0.1492, -0.1928, -1.8768, +0.2183, +0.0801, +0.1288, -1.2493,
|
||||
+0.1115, +0.2797, -0.1458, +0.0062, -0.0402, -0.8945, -0.2231, -0.1154,
|
||||
+0.3635, -0.3021, +0.1402, -0.7347, +0.2772, +0.3182, -0.9708, +0.0376
|
||||
],
|
||||
[
|
||||
+0.6899, +0.3471, -0.5863, +0.1497, +0.1616, -0.0497, +0.3579, -0.6421,
|
||||
+0.4529, -0.1588, +0.9250, +0.2357, -0.0712, -0.1793, -0.0231, -0.4814,
|
||||
-0.7654, +0.0591, -0.6866, -0.1705, +0.2255, -0.0007, -0.3890, +0.6114,
|
||||
+0.0443, -0.6929, -0.7734, +0.2314, -0.0231, -0.6386, +0.1237, +0.0472
|
||||
],
|
||||
[
|
||||
-0.2496, -0.1687, +0.1234, +0.4152, +0.4207, -0.1398, +0.1287, +0.5903,
|
||||
+0.0530, -0.1181, +0.0803, -0.0641, -0.1198, -0.4702, -0.3669, +0.2340,
|
||||
-0.3778, +0.4341, +0.2411, -0.2171, -0.3051, -0.2397, +0.1756, +0.4040,
|
||||
+0.0682, +0.1575, +0.4137, +0.0887, -0.1998, +0.2221, -0.2474, -0.0559
|
||||
],
|
||||
[
|
||||
-2.2466, -1.2725, +0.5947, -0.3192, -0.2665, -0.0129, -0.7615, +0.1148,
|
||||
+0.2745, -0.0556, -1.3313, -0.7143, -0.5119, -0.0572, -0.1892, -0.3294,
|
||||
-0.0187, -0.7696, +1.0413, +0.4226, +0.1378, -1.3668, +0.9806, -0.1810,
|
||||
-0.2307, -0.4924, +0.7163, -1.2529, -0.3216, +0.1683, -0.6657, -0.1121
|
||||
],
|
||||
[
|
||||
+0.1227, +0.2433, -0.1292, -0.7152, -0.1342, -0.1116, -0.2695, +0.0243,
|
||||
-0.0770, -0.1713, +0.2837, +0.2076, -0.7322, -0.1657, -0.3407, -0.4378,
|
||||
+0.0679, -0.3777, +0.3025, -0.6780, -0.2326, +0.1463, +0.0535, -0.6373,
|
||||
-0.2027, -0.5404, -0.1598, +0.1511, -0.1776, +0.0854, +0.1753, -0.0342
|
||||
],
|
||||
[
|
||||
-0.1772, -0.2654, -0.4170, -0.3301, +0.2956, -0.4234, +0.0835, +0.2869,
|
||||
-0.2804, -0.2073, -0.3291, -0.5897, -0.4116, -0.0447, +0.1601, +0.1602,
|
||||
+0.1691, -0.2014, -0.0502, +0.1167, -1.0103, -0.4297, -0.2039, -0.0859,
|
||||
+0.2756, -0.1768, -0.2726, -0.0256, -0.0834, +0.0852, +0.0930, -0.0606
|
||||
],
|
||||
[
|
||||
-0.5390, -0.5441, +0.3202, -0.1018, +0.0059, +0.1799, -0.1917, +0.3674,
|
||||
+0.2576, -0.0707, -0.4401, -0.3990, +0.0565, +0.0751, -0.5959, +0.3866,
|
||||
+0.2763, -0.2564, +0.4937, +0.5076, +0.3941, -0.3593, +0.4346, +0.2561,
|
||||
-0.0762, -0.2873, +0.6820, -0.3032, -0.3268, +0.1319, -0.3643, +0.0292
|
||||
],
|
||||
[
|
||||
+0.1816, -0.0451, -0.9370, +0.1335, -0.1030, -0.0400, +0.0311, -1.3751,
|
||||
-0.1860, +0.1559, +0.5395, +0.3994, -0.1703, -0.1157, +0.6342, -0.4726,
|
||||
-0.6213, -0.2096, -0.7549, -0.9815, -0.3798, +0.5286, -0.8413, +0.2577,
|
||||
+0.2223, -1.2260, -1.3571, -0.0970, +0.3334, -0.2096, +0.3566, -0.1703
|
||||
],
|
||||
[
|
||||
+0.0635, +0.1541, -0.2206, +0.0924, +0.1302, +0.1947, -0.3868, -0.6834,
|
||||
-0.0603, -0.3752, +0.3103, -0.1699, -0.0833, -0.1190, -0.0310, -0.5480,
|
||||
-1.1421, -0.0020, -0.3611, -0.3800, -0.0638, +0.0811, -0.5886, +0.0690,
|
||||
+0.1925, +0.0710, -0.3142, +0.1837, +0.2125, -0.1217, +0.2185, +0.0458
|
||||
],
|
||||
[
|
||||
-0.3973, +0.0486, +0.2518, -0.3208, +0.1218, -0.5324, -0.3417, +0.0322,
|
||||
-0.0088, +0.0214, +0.2725, +0.0960, -0.2949, -0.1770, -0.1511, +0.0259,
|
||||
+0.1161, -0.8829, +0.2415, +0.0939, -0.7213, +0.2220, +0.1687, -0.1802,
|
||||
-0.0539, +0.1786, +0.6638, +0.3559, +0.2343, +0.3212, +0.4396, -0.1385
|
||||
],
|
||||
[
|
||||
-0.2384, -0.5346, -0.2323, -0.2277, +0.3503, -0.0308, -0.2004, -0.1096,
|
||||
-0.2587, -0.1143, +0.2579, +0.2382, -0.5883, -0.1277, +0.2257, -0.0244,
|
||||
-0.9605, -0.4244, -0.7321, +0.3017, -1.6256, -0.2074, -0.8327, +0.0607,
|
||||
-0.0751, -0.0153, -0.4485, +0.1758, +0.1821, +0.2625, +0.0108, -0.2395
|
||||
],
|
||||
[
|
||||
-0.5639, -0.3613, +0.1291, -0.2132, +0.4927, -0.0604, -0.8067, +0.0933,
|
||||
-0.1483, -0.0321, -0.6843, -0.3064, -0.5646, -0.2040, -0.0414, +0.6092,
|
||||
+0.4903, -0.9346, +0.3389, +0.2040, -0.0295, -0.2196, +0.4264, +0.0312,
|
||||
-1.1801, +0.3008, +0.7719, +0.2140, -0.0257, +0.5275, -0.0553, +0.0362
|
||||
],
|
||||
[
|
||||
-0.6039, -1.2848, +0.6301, -0.1285, +0.2338, -0.2585, -0.3217, +0.4326,
|
||||
+0.0441, -0.0356, -0.5720, -0.8739, -0.3924, +0.2499, -0.2620, +0.1396,
|
||||
-0.0701, -0.2239, +0.2612, +0.1646, +0.7769, -0.6382, +0.8720, -0.1956,
|
||||
-0.1779, -0.1608, -0.0358, -0.4453, -0.1154, +0.5532, -0.9282, +0.0031
|
||||
],
|
||||
[
|
||||
-0.1990, +0.3708, -0.0049, -0.3260, -0.0465, +0.0415, +0.1601, +0.0019,
|
||||
+0.0114, +0.0438, +0.0893, +0.3056, -0.6166, +0.1145, -0.6742, +0.0483,
|
||||
+0.0739, -0.1139, +0.5772, -1.5569, +0.4253, -0.0769, +0.4014, -0.6817,
|
||||
+0.0228, -0.0383, -0.0844, -0.1560, +0.1414, -0.3420, +0.3664, -0.2293
|
||||
],
|
||||
[
|
||||
-0.0917, -0.8692, +0.4704, +0.1796, -0.1346, -0.5246, +0.0622, +0.3420,
|
||||
-0.5879, -0.0445, -0.3444, -0.0490, +0.0956, -0.0753, -0.8856, +0.1275,
|
||||
+0.1592, +0.3569, +0.1774, +0.2723, +0.1125, -0.1718, +0.2451, -0.0132,
|
||||
+0.1584, -0.0197, +0.0700, -0.2156, +0.0094, +0.4639, -0.6721, -0.2180
|
||||
],
|
||||
[
|
||||
+0.0578, -0.1570, -0.1623, -0.1359, +0.1346, +0.1821, -0.0696, -0.0570,
|
||||
+0.0011, +0.1216, +0.1069, -0.0841, +0.1017, -0.1663, -0.6005, -0.4583,
|
||||
-0.2606, -0.0292, +0.0321, -0.5614, -0.4416, +0.0355, +0.2081, +0.3517,
|
||||
+0.0619, -1.0007, -0.0765, +0.1769, -0.1286, +0.5833, -0.1758, -0.1957
|
||||
],
|
||||
[
|
||||
-0.0013, +0.3157, +0.0395, -1.0792, -0.1198, -0.2945, -0.0090, +0.3281,
|
||||
-0.0618, -0.0806, +0.0768, +0.2802, -0.2311, -0.2302, +0.0506, +0.0552,
|
||||
+0.3727, +0.3610, +0.2029, -0.1743, +0.4557, -0.1761, -0.5039, -0.9115,
|
||||
+0.2842, +0.1317, -0.5961, -0.4214, -1.0727, +0.3308, +0.2380, -0.3348
|
||||
],
|
||||
[
|
||||
+0.2455, -0.1299, +0.3117, -1.0169, -0.3417, +0.0310, -0.4793, +0.5334,
|
||||
-0.4799, -0.3291, -0.1344, +0.3732, -0.1514, +0.1574, -0.1819, -0.0206,
|
||||
+0.5675, -0.6992, +0.4815, -0.1497, -0.3804, +0.1389, +0.5850, -0.2920,
|
||||
+0.2569, -0.3527, +0.3641, -0.2014, -0.1457, +0.2365, -0.2335, -0.2610
|
||||
],
|
||||
[
|
||||
-0.2252, +0.1225, +0.0953, -0.0193, +0.3955, -0.0800, +0.0090, -0.4155,
|
||||
+0.1851, +0.3392, -0.3260, -0.3907, +0.1320, +0.1266, +0.0579, +0.1819,
|
||||
-0.5793, -0.2230, +0.1351, -0.1519, -0.0527, -0.0036, +0.1243, +0.1387,
|
||||
-0.2874, -0.4997, -0.3251, +0.0435, -0.5244, +0.1051, -0.2081, +0.2126
|
||||
],
|
||||
[
|
||||
-0.6574, +0.6789, +0.1026, -0.5191, +0.1058, -0.6812, +0.1798, -0.1029,
|
||||
+0.0757, -0.0089, +0.1539, +0.4688, -0.1306, +0.0595, -0.8136, -0.4843,
|
||||
+0.3675, +0.1800, +0.2641, -0.0589, +0.0613, +0.2019, -0.0765, -0.1216,
|
||||
-0.4588, +0.0629, +0.1133, +0.7055, -2.8075, +0.3867, +0.4791, -0.1118
|
||||
],
|
||||
[
|
||||
+0.2771, +0.3461, -0.8556, -0.0316, +0.3640, -0.1380, -0.3765, -0.9258,
|
||||
-0.0693, -0.1283, +0.0576, -0.0792, +0.4468, -0.5001, +0.5939, -1.2226,
|
||||
-0.9252, -0.3980, -1.3444, -0.9488, -0.7199, +0.4289, -1.8749, -0.0867,
|
||||
+0.3905, -0.4535, -0.5607, -0.2247, -0.0359, -0.4125, +0.7124, -0.1963
|
||||
],
|
||||
[
|
||||
-0.2584, -0.5358, -0.0916, +0.0765, +0.0615, -0.1887, -0.2253, -0.7855,
|
||||
-0.0061, -0.1887, +0.5511, +0.3207, -0.2055, -0.1694, +0.4772, -1.0356,
|
||||
-0.9884, -0.2620, -0.1214, +0.9733, -0.9700, -0.3205, -0.7005, -0.2960,
|
||||
+0.1132, -0.0352, +0.3491, -0.2440, +0.1108, +0.1083, +0.3029, -0.0031
|
||||
],
|
||||
[
|
||||
-0.6217, +0.1238, +0.0245, -0.1769, -0.2487, +0.0526, -0.0090, +0.1370,
|
||||
+0.2666, -0.0743, -0.8230, -0.7723, -0.0929, -0.1532, +0.6103, -0.4931,
|
||||
-1.3329, -0.3735, +0.0217, -0.1539, -0.4946, -1.0838, -0.5840, +0.1618,
|
||||
+0.2584, +0.4200, +0.1171, -0.5601, +0.1604, +0.0864, +0.2287, -0.0057
|
||||
],
|
||||
[
|
||||
-0.2220, +0.4837, -0.0825, +0.0143, +0.2734, -0.0853, +0.1578, -0.0112,
|
||||
+0.1829, +0.0390, +0.2151, -0.1538, -0.1111, -0.0773, +0.3439, -0.2134,
|
||||
-0.2884, -0.3831, +0.2767, -0.3149, +0.1463, +0.3230, +0.2187, -0.2309,
|
||||
-0.1096, +0.3709, -0.0105, +0.3709, +0.3034, -0.7602, +0.5988, -0.0595
|
||||
],
|
||||
[
|
||||
-0.6073, +0.1780, +0.1682, +0.1604, +0.3662, -0.0385, -0.1495, +0.3012,
|
||||
-0.2065, -0.0163, -1.0465, -0.8268, -0.0190, +0.0964, -0.2755, +0.0965,
|
||||
-0.3466, -0.3758, -0.1113, +0.1462, +0.3280, -0.1600, +0.1023, +0.1998,
|
||||
-0.3642, +0.2736, +0.3782, -0.2681, +0.2334, +0.1721, +0.0385, +0.0348
|
||||
],
|
||||
[
|
||||
-0.0582, -0.5750, +0.1279, +0.3630, -0.2404, -0.1511, +0.2650, -0.0324,
|
||||
-0.2258, +0.0007, +0.3051, -0.1875, -0.5106, +0.0104, +0.1335, -0.5282,
|
||||
-0.2210, +0.2648, -0.7506, +0.4975, -1.7048, +0.2378, -0.1771, +0.2981,
|
||||
+0.1252, +0.1384, -0.3384, -0.0830, +0.0966, +0.3728, -0.1980, -0.1953
|
||||
],
|
||||
[
|
||||
-1.0735, -0.2780, +0.1428, -0.0624, -0.0311, -0.2687, -0.1623, +0.2996,
|
||||
+0.1782, -0.1403, -0.3761, -1.3413, -0.2020, -0.0492, -0.6636, -0.2737,
|
||||
+0.2228, +0.3109, +0.1596, +0.0172, +0.1325, -1.4936, -0.0615, -0.1547,
|
||||
-0.2285, +0.2648, -0.1008, -1.6756, -0.2352, +0.0998, -0.4550, +0.2028
|
||||
],
|
||||
[
|
||||
-0.3866, -0.0107, +0.1052, +0.1423, +0.1160, +0.1712, -0.6206, -0.3505,
|
||||
-0.3298, -0.0362, +0.6768, +0.2086, -0.4348, -0.3577, +0.0131, -0.1640,
|
||||
+0.0160, -0.3891, -0.0180, -0.1064, -0.2494, +0.0340, +0.2225, -0.1320,
|
||||
-0.3550, -0.3005, +0.0118, +0.2782, +0.4691, -1.3792, +0.1971, -0.0598
|
||||
],
|
||||
[
|
||||
+0.0215, +0.1885, -0.5360, -0.1283, +0.4689, +0.1426, -0.2809, -0.8197,
|
||||
+0.1951, -0.1620, +0.0627, +0.2864, -0.3069, -0.1170, +0.0545, -0.4527,
|
||||
-0.6646, -0.1546, -0.1794, -0.5350, -0.1060, -0.0198, -0.5782, -0.2201,
|
||||
+0.0361, -0.2497, -0.1527, -0.1489, +0.1034, +0.0925, +0.0368, -0.0352
|
||||
],
|
||||
[
|
||||
+0.2459, +0.3230, -0.0494, -0.5631, +0.0600, -0.3036, -0.5443, +0.1081,
|
||||
-0.2231, +0.0734, +0.0289, +0.4205, -0.6415, -0.1305, -0.0717, +0.2971,
|
||||
+0.0476, -1.3001, +0.5122, -0.0005, -0.3572, +0.0727, +0.1713, -0.4751,
|
||||
-0.3614, -0.0957, -0.0942, +0.0580, +0.2393, +0.0038, +0.1938, -0.1704
|
||||
],
|
||||
[
|
||||
+0.3352, -0.0882, -0.0349, -0.6093, +0.4262, -0.1350, -0.0687, -0.2459,
|
||||
-0.5564, -0.2956, +0.1619, -0.0813, -0.5128, -0.2209, +0.3870, -0.0804,
|
||||
+0.7676, -0.1745, -0.3860, -0.5517, -0.6899, -0.6400, +0.6095, -0.5337,
|
||||
+0.3452, -0.6608, +0.0662, +0.1741, +0.1653, -0.4191, +0.1051, -0.3116
|
||||
],
|
||||
[
|
||||
-0.0527, -1.3119, +0.3441, -0.0041, -0.5938, -0.4224, +0.3973, +0.4673,
|
||||
-0.0613, -0.0191, +0.1297, -0.2211, -0.0880, +0.0319, +0.0661, -0.2075,
|
||||
+0.4380, +0.3197, +0.0989, +0.2346, -0.0142, -1.2137, +0.1618, -0.3300,
|
||||
+0.4591, +0.4910, +0.3537, -0.5902, -0.0616, +0.2882, -0.0900, -0.0208
|
||||
],
|
||||
[
|
||||
-0.7068, -0.7952, +0.4496, +0.1237, -0.2000, -0.5966, +0.3920, +0.3458,
|
||||
+0.0036, -0.0666, -0.3061, -0.1172, +0.0446, +0.1768, -0.5318, +0.2083,
|
||||
+0.3371, +0.1497, +0.4244, +0.3980, +0.2023, -0.8931, +0.1860, -0.6889,
|
||||
-0.3250, +0.1250, +0.1510, -0.3405, -0.4040, +0.1598, -0.9933, +0.0233
|
||||
],
|
||||
[
|
||||
-1.2305, -0.3178, +0.0536, -0.0585, -0.7097, +0.3196, +0.2899, +0.8200,
|
||||
+0.0384, +0.1733, -1.1839, -2.2545, +0.0653, +0.1376, -0.1359, -0.1202,
|
||||
-0.0831, -0.5397, +0.1100, +0.1386, -0.1271, -0.6298, +0.1038, -0.1213,
|
||||
-0.1461, -0.4508, +0.5106, -0.8266, -0.6204, +0.3753, -0.4897, -0.0751
|
||||
],
|
||||
[
|
||||
-0.3676, -0.5547, +0.0897, -0.0230, -0.3045, -0.1885, -0.5990, +0.3622,
|
||||
-0.2240, -0.1205, -0.3056, +0.7085, +0.0053, -0.1213, -0.3023, +0.1433,
|
||||
-0.2090, -0.0412, +0.2561, +0.1313, -0.2803, +0.2543, +0.0571, -0.9791,
|
||||
-0.0167, -0.2928, -0.3020, -0.2271, +0.0507, -0.1310, -0.6347, -0.0889
|
||||
],
|
||||
[
|
||||
-0.2794, +0.0675, -1.0020, -0.2234, +0.3937, -0.2857, +0.1058, -1.0755,
|
||||
-0.0377, -0.2753, -0.0501, -0.0493, -0.2987, -0.2214, +0.2869, -1.0882,
|
||||
-1.2635, -1.2235, -0.5762, -0.4528, -0.1372, -0.0192, -1.3768, +0.2337,
|
||||
+0.2008, -0.2517, -0.3918, -0.6362, -0.1762, -0.9261, +0.1711, -0.0094
|
||||
],
|
||||
[
|
||||
-0.1099, -0.2142, -0.0006, -0.4617, -0.0286, +0.3482, -0.7728, -0.4384,
|
||||
+0.0050, -0.0151, +0.1974, +0.2815, -0.5295, -0.2581, +0.3404, -1.6254,
|
||||
-1.3208, -0.1648, -0.5207, +0.4104, -0.2795, +0.0613, -1.5642, -0.1178,
|
||||
-0.1354, +0.0375, +0.3323, +0.0540, +0.2038, -0.3223, +0.4603, -0.3780
|
||||
],
|
||||
[
|
||||
-0.3999, -0.3719, +0.1918, -0.4738, -0.0009, +0.0419, +0.1046, +0.2675,
|
||||
+0.1359, -0.2536, -0.3485, -0.3118, -0.3613, +0.0914, -0.4486, +0.2719,
|
||||
+0.2876, -0.0685, +0.4309, +0.1856, +0.4678, -0.3314, +0.0211, +0.2575,
|
||||
+0.5077, -0.1494, +0.5110, -0.6869, -1.4053, +0.3093, -0.2914, -0.1501
|
||||
],
|
||||
[
|
||||
+0.3543, +0.3915, +0.0536, +0.3995, +0.2165, -0.1133, -0.1209, +0.0824,
|
||||
-0.0723, -0.0774, -0.4248, -0.0243, -0.1089, -0.1408, +0.2072, -0.1309,
|
||||
-1.5186, -0.4079, -0.0530, -0.3525, +0.6782, +0.1991, -0.0292, +0.1339,
|
||||
-0.1074, +0.2312, +0.1969, +0.4662, +0.5312, -0.3306, +0.0622, +0.1057
|
||||
],
|
||||
[
|
||||
-1.1778, +0.2978, +0.0443, +0.1657, +0.1317, -0.1250, -0.0459, +0.0777,
|
||||
+0.1359, -0.0055, +0.2364, -2.3659, +0.2214, -0.1489, -0.3051, -0.5094,
|
||||
+0.1495, +0.3328, +0.1264, -0.0217, +0.2321, -0.6466, -0.1813, +0.5276,
|
||||
+0.1975, +0.3752, +0.1469, -0.8019, +0.2427, +0.1543, +0.2140, -0.1592
|
||||
],
|
||||
[
|
||||
-0.7753, -1.3502, +0.3157, +0.1847, +0.0661, -0.5501, +0.3482, +0.6112,
|
||||
+0.0207, +0.0534, -0.2106, -1.0144, -0.0836, -0.0275, -1.0761, +0.2131,
|
||||
+0.3135, +0.3134, +0.1974, +0.0182, +0.1975, -1.1221, +0.2958, -0.2610,
|
||||
+0.0865, +0.3592, +0.4317, -0.3505, -0.4557, +0.3033, -0.5797, -0.2988
|
||||
],
|
||||
[
|
||||
+0.4103, -0.0643, +0.0803, +0.2177, +0.1028, -0.2668, +0.0084, -0.2340,
|
||||
-0.2571, +0.0334, +0.3451, -0.0055, +0.0216, -0.1460, +0.5293, -0.2615,
|
||||
-0.3035, +0.1736, -0.4206, -0.2186, +0.1343, +0.6001, -0.0499, -0.2777,
|
||||
-0.0160, -0.4303, -0.2795, +0.1932, +0.4219, -0.0800, +0.1819, -0.1007
|
||||
],
|
||||
[
|
||||
-0.7074, -0.0546, +0.4495, +0.1427, +0.3306, +0.0811, -0.5433, -0.0609,
|
||||
-0.2128, -0.1059, -1.0477, -0.4679, -0.1780, -0.1373, -0.3672, +0.0724,
|
||||
-0.0554, -0.5400, +0.0457, -0.0469, -0.0367, -0.4609, +0.1668, -0.0266,
|
||||
-0.9007, +0.2975, +0.5204, -0.0453, -0.1314, -0.0980, +0.1424, -0.1877
|
||||
],
|
||||
[
|
||||
+0.0657, +0.1230, -0.2558, +0.3103, -0.0795, -0.1243, +0.1956, +0.0262,
|
||||
-0.2626, -0.0554, +0.3760, +0.3076, -0.4633, +0.0790, +0.2363, -0.3311,
|
||||
+0.1235, -0.1727, -0.2468, +0.0188, -0.1121, -0.2807, -0.5865, -0.4197,
|
||||
+0.1949, -0.4970, -1.0413, -0.1698, +0.1798, +0.2004, -0.0514, +0.0254
|
||||
],
|
||||
[
|
||||
-0.1566, -1.1156, +0.4431, -0.1503, -0.5682, +0.1822, -0.1201, +0.5151,
|
||||
-0.1386, -0.1764, +0.2063, -0.8582, +0.3750, -0.1405, +0.0852, +0.2641,
|
||||
-0.1951, -0.0575, -0.4181, +0.2273, +0.1332, -0.2797, +0.5406, -0.0869,
|
||||
+0.2453, +0.0648, +0.2252, -0.0628, -0.6882, -0.0514, -0.4663, -0.0954
|
||||
],
|
||||
[
|
||||
-0.4780, +0.5844, +0.1782, -0.0831, +0.1547, -0.0595, -0.5646, -0.0488,
|
||||
-0.1774, -0.0098, +0.1833, +0.3520, -0.3359, -0.1492, +0.1139, -0.1223,
|
||||
-0.5312, -0.5361, +0.1689, -0.2020, +0.1069, +0.2327, +0.2887, +0.0526,
|
||||
-0.5916, -0.2435, -0.2342, +0.3422, +0.4399, -1.1880, +0.1293, -0.1021
|
||||
],
|
||||
[
|
||||
-1.2784, -1.8266, +0.0630, -0.3332, -0.5833, -0.3733, +0.3265, +0.1977,
|
||||
+0.0716, -0.2575, +0.0403, -0.1961, +0.1541, -0.2311, -0.1734, -0.1785,
|
||||
+0.0168, +0.1134, +0.0407, -0.1661, +0.5985, -1.9681, +0.1342, +0.3432,
|
||||
+0.3934, +0.0663, +0.3141, -2.0177, -1.7071, +0.2942, -1.0684, -0.0737
|
||||
],
|
||||
[
|
||||
+0.1763, +0.2191, +0.2609, +0.0723, +0.1038, -0.2516, -0.9086, +0.1536,
|
||||
+0.0153, +0.1061, +0.1675, +0.3839, -0.5326, +0.2007, -0.4943, -0.1048,
|
||||
+0.1614, -0.4703, +0.3453, -0.7441, -0.6187, +0.4247, +0.1721, -0.1776,
|
||||
-0.0919, -0.8387, +0.0798, -0.0598, +0.2711, -0.0508, +0.1761, +0.0029
|
||||
],
|
||||
[
|
||||
-0.2003, +0.2194, -0.6280, +0.1593, +0.1648, -0.1007, +0.3162, -0.3881,
|
||||
-0.1584, -0.0148, +0.7057, +0.0085, +0.3488, +0.0977, +0.4018, -0.8195,
|
||||
-0.1944, +0.4359, -0.6605, -0.1929, +0.2237, +0.1087, -0.4213, -0.7149,
|
||||
+0.3972, -0.1313, -0.2815, -0.7234, -0.0561, -0.5364, +0.0178, +0.0349
|
||||
],
|
||||
[
|
||||
+0.0567, +0.1687, +0.0007, +0.2939, -1.3854, +0.0168, +0.1909, +0.4919,
|
||||
-0.4547, +0.0562, -0.1188, +0.1653, -0.0265, -0.0541, -0.1117, -0.3240,
|
||||
+0.2545, +0.6516, +0.0124, -0.1258, -0.0656, -0.3524, +0.0174, +0.3926,
|
||||
+0.1125, +0.2834, -0.1961, -0.3603, +0.1783, -0.0224, -0.6900, -0.1688
|
||||
],
|
||||
[
|
||||
+0.0672, +0.6339, -0.3839, +0.0077, +0.8224, -0.3197, -0.0589, -0.1318,
|
||||
+0.0222, -0.1530, +0.1237, +0.4014, -0.1952, -0.1130, +0.4214, -0.2741,
|
||||
+0.2291, +0.0757, +0.0563, -0.0967, +0.4210, +0.5133, +0.0412, -0.9212,
|
||||
+0.1377, -0.4068, -0.3652, +0.4283, +0.6182, -0.6187, +0.1997, +0.1240
|
||||
],
|
||||
[
|
||||
-0.0067, +0.3307, -0.7751, -0.2084, +0.4740, -0.0264, -0.0768, -0.9519,
|
||||
-0.0632, -0.0753, +0.3293, +0.5260, -0.6023, +0.0060, +0.2799, -0.2904,
|
||||
-0.8262, -0.6644, -0.3900, -0.1461, +0.4965, +0.3996, -0.7569, +0.0612,
|
||||
+0.5168, -0.5160, -0.4875, +0.3759, +0.0295, +0.1027, +0.6096, -0.0115
|
||||
],
|
||||
[
|
||||
-0.0110, +0.4652, -0.1486, -0.6029, +0.2581, -0.3184, -0.3759, +0.3213,
|
||||
-0.2748, -0.0630, +0.0953, +0.2101, -1.2738, -0.1353, +0.2710, -0.2276,
|
||||
+0.2586, -0.2347, -0.3320, +0.0487, -0.2318, -0.1002, +0.1236, +0.2660,
|
||||
-0.1172, +0.1437, -0.0850, +0.1659, -0.2152, -0.0764, +0.2838, -0.1325
|
||||
],
|
||||
[
|
||||
+0.0152, -0.0906, -0.1897, -0.3521, -0.1836, -0.1694, -0.4150, -0.1695,
|
||||
+0.0509, -0.0716, +0.3118, +0.2422, -0.5058, -0.0637, -0.1038, -0.2828,
|
||||
-0.0528, -0.2051, +0.2062, -0.2105, -0.7317, +0.1881, -0.2992, -0.0883,
|
||||
+0.0115, -1.5295, -0.1671, +0.0411, +0.0648, -0.0119, -0.2941, +0.0273
|
||||
],
|
||||
[
|
||||
+0.5028, +0.1780, -0.4643, -0.0373, +0.3067, -0.1974, +0.2643, -0.2365,
|
||||
-0.2083, +0.0472, +0.4830, +0.0630, +0.2155, -0.0916, +0.6290, -0.4427,
|
||||
-0.6266, +0.3576, -0.3541, -0.2034, +0.3733, +0.8247, -0.5837, -0.4372,
|
||||
+0.2696, -0.4042, -0.3436, +0.0355, -0.2288, -0.6382, +0.7358, -0.1229
|
||||
]
|
||||
])
|
||||
|
||||
weights_dense2_b = np.array([ -0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403, -0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473, +0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726, -0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111])
|
||||
|
||||
weights_final_w = np.array([
|
||||
[ +1.0397],
|
||||
[ +0.7049],
|
||||
[ -0.2128],
|
||||
[ +0.2172],
|
||||
[ +0.3027],
|
||||
[ -0.1991],
|
||||
[ +0.3398],
|
||||
[ -0.5932],
|
||||
[ -0.1439],
|
||||
[ -0.0236],
|
||||
[ +0.5679],
|
||||
[ +0.8571],
|
||||
[ +0.1934],
|
||||
[ -0.1652],
|
||||
[ +0.6933],
|
||||
[ -0.5510],
|
||||
[ -1.0587],
|
||||
[ +0.6996],
|
||||
[ -0.5009],
|
||||
[ -0.4000],
|
||||
[ -0.6958],
|
||||
[ +0.7716],
|
||||
[ -0.5342],
|
||||
[ -0.5095],
|
||||
[ +0.3040],
|
||||
[ -1.1986],
|
||||
[ -0.4900],
|
||||
[ +0.7726],
|
||||
[ +0.5871],
|
||||
[ -0.2533],
|
||||
[ +0.2633],
|
||||
[ -0.0004]
|
||||
weights_dense2_b = np.array([
|
||||
-0.0730, +0.0456, +0.0877, -0.2607, +0.0029, -0.2705, -0.1420, +0.2403,
|
||||
-0.2135, -0.0646, +0.1378, +0.1105, -0.4639, -0.0583, -0.0872, -0.1473,
|
||||
+0.1460, -0.0234, +0.0740, -0.0745, -0.1283, +0.0316, +0.0361, -0.0726,
|
||||
-0.0304, +0.0417, -0.0313, +0.0935, +0.0815, +0.0814, +0.0818, -0.1111
|
||||
])
|
||||
|
||||
weights_final_b = np.array([ +0.0190])
|
||||
weights_final_w = np.array([[+1.0397], [+0.7049], [-0.2128], [+0.2172],
|
||||
[+0.3027], [-0.1991], [+0.3398], [-0.5932],
|
||||
[-0.1439], [-0.0236], [+0.5679], [+0.8571],
|
||||
[+0.1934], [-0.1652], [+0.6933], [-0.5510],
|
||||
[-1.0587], [+0.6996], [-0.5009], [-0.4000],
|
||||
[-0.6958], [+0.7716], [-0.5342], [-0.5095],
|
||||
[+0.3040], [-1.1986], [-0.4900], [+0.7726],
|
||||
[+0.5871], [-0.2533], [+0.2633], [-0.0004]])
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
weights_final_b = np.array([+0.0190])
|
||||
# yapf: enable
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user