diff --git a/examples/SharedMemory/IKTrajectoryHelper.cpp b/examples/SharedMemory/IKTrajectoryHelper.cpp index 0a6cbbf80..0145251bb 100644 --- a/examples/SharedMemory/IKTrajectoryHelper.cpp +++ b/examples/SharedMemory/IKTrajectoryHelper.cpp @@ -185,7 +185,9 @@ bool IKTrajectoryHelper::computeNullspaceVel(int numQ, const double* q_current, { m_data->m_nullSpaceVelocity.SetLength(numQ); m_data->m_nullSpaceVelocity.SetZero(); - double stayCloseToZeroGain = 0.1; + // TODO: Expose the coefficents of the null space term so that the user can choose to balance the null space task and the IK target task. + // Can also adaptively adjust the coefficients based on the residual of the null space velocity in the IK target task space. + double stayCloseToZeroGain = 0.001; double stayAwayFromLimitsGain = 10.0; // Stay close to zero diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index 7f3223b59..f9e0cd6a9 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -6667,6 +6667,11 @@ bool PhysicsServerCommandProcessor::processCommand(const struct SharedMemoryComm } // Only calculate if the localPosition is non-zero. if (btInverseDynamics::maxAbs(localPosition) > 0.0) { + // Write the localPosition into world coordinates. + btInverseDynamics::mat33 world_rotation_body; + tree->getBodyTransform(clientCmd.m_calculateJacobianArguments.m_linkIndex + 1, &world_rotation_body); + localPosition = world_rotation_body * localPosition; + // Correct the translational jacobian. btInverseDynamics::mat33 skewCrossProduct; btInverseDynamics::skew(localPosition, &skewCrossProduct); btInverseDynamics::mat3x jac_l(3, numDofs + baseDofs); diff --git a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp index 301910a9c..c082bef31 100644 --- a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp +++ b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp @@ -354,6 +354,12 @@ void Jacobian::CalcDeltaThetasDLSwithNullspace(const VectorRn& desiredV) // Compute null space velocity VectorRn nullV(J.GetNumColumns()); P.Multiply(desiredV, nullV); + + // Compute residual + VectorRn residual(J.GetNumRows()); + J.Multiply(nullV, residual); + // TODO: Use residual to set the null space term coefficient adaptively. + //printf("residual: %f\n", residual.Norm()); // Add null space velocity dTheta += nullV; diff --git a/examples/pybullet/examples/inverse_kinematics.py b/examples/pybullet/examples/inverse_kinematics.py index 0dba11bf9..9aa32afed 100644 --- a/examples/pybullet/examples/inverse_kinematics.py +++ b/examples/pybullet/examples/inverse_kinematics.py @@ -37,20 +37,21 @@ hasPrevPose = 0 useNullSpace = 0 useOrientation = 1 -useSimulation = 1 +#If we set useSimulation=0, it sets the arm pose to be the IK result directly without using dynamic control. +#This can be used to test the IK result accuracy. +useSimulation = 0 useRealTimeSimulation = 1 p.setRealTimeSimulation(useRealTimeSimulation) #trailDuration is duration (in seconds) after debug lines will be removed automatically #use 0 for no-removal trailDuration = 15 - while 1: if (useRealTimeSimulation): dt = datetime.now() t = (dt.second/60.)*2.*math.pi else: - t=t+0.1 + t=t+0.001 if (useSimulation and useRealTimeSimulation==0): p.stepSimulation() diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py new file mode 100644 index 000000000..17638bfb3 --- /dev/null +++ b/examples/pybullet/examples/jacobian.py @@ -0,0 +1,66 @@ +import pybullet as p + + +def getJointStates(robot): + joint_states = p.getJointStates(robot, range(p.getNumJoints(robot))) + joint_positions = [state[0] for state in joint_states] + joint_velocities = [state[1] for state in joint_states] + joint_torques = [state[3] for state in joint_states] + return joint_positions, joint_velocities, joint_torques + +def setJointPosition(robot, position, kp=1.0, kv=0.3): + num_joints = p.getNumJoints(robot) + zero_vec = [0.0] * num_joints + if len(position) == num_joints: + p.setJointMotorControlArray(robot, range(num_joints), p.POSITION_CONTROL, + targetPositions=position, targetVelocities=zero_vec, + positionGains=[kp] * num_joints, velocityGains=[kv] * num_joints) + else: + print("Not setting torque. " + "Expected torque vector of " + "length {}, got {}".format(num_joints, len(torque))) + +def multiplyJacobian(jacobian, vector): + result = [0.0, 0.0, 0.0] + for c in range(len(vector)): + for r in range(3): + result[r] += jacobian[r][c] * vector[c] + return result + + +clid = p.connect(p.SHARED_MEMORY) +if (clid<0): + p.connect(p.DIRECT) +time_step = 0.001 +gravity_constant = -9.81 +p.resetSimulation() +p.setTimeStep(time_step) +p.setGravity(0.0, 0.0, gravity_constant) +p.loadURDF("plane.urdf",[0,0,-0.3]) +kukaId = p.loadURDF("kuka_iiwa/model.urdf",[0,0,0]) +p.resetBasePositionAndOrientation(kukaId,[0,0,0],[0,0,0,1]) +kukaEndEffectorIndex = 6 +numJoints = p.getNumJoints(kukaId) +if (numJoints!=7): + exit() +# Set a joint target for the position control and step the sim. +setJointPosition(kukaId, [0.1] * p.getNumJoints(kukaId)) +p.stepSimulation() +# Get the joint and link state directly from Bullet. +pos, vel, torq = getJointStates(kukaId) +result = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1, computeForwardKinematics=1) +link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result +# Get the Jacobians for the CoM of the end-effector link. +# Note that in this example com_rot = identity, and we would need to use com_rot.T * com_trn. +# The localPosition is always defined in terms of the link frame coordinates. +zero_vec = [0.0] * numJoints +jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, zero_vec, zero_vec) + +print ("Link linear velocity of CoM from getLinkState:") +print (link_vt) +print ("Link linear velocity of CoM from linearJacobian * q_dot:") +print (multiplyJacobian(jac_t, vel)) +print ("Link angular velocity of CoM from getLinkState:") +print (link_vr) +print ("Link angular velocity of CoM from angularJacobian * q_dot:") +print (multiplyJacobian(jac_r, vel)) diff --git a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py similarity index 73% rename from examples/pybullet/gym/pybullet_envs/agents/config_ppo.py rename to examples/pybullet/gym/pybullet_envs/agents/configs.py index 2ccd38adc..9b2cb2da2 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py @@ -1,8 +1,25 @@ -"""The PPO training configuration file for minitaur environments.""" +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example configurations using the PPO algorithm.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function + import functools + from agents import ppo from agents.scripts import networks from pybullet_envs.bullet import minitaur_gym_env @@ -11,33 +28,33 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env import pybullet_envs -# pylint: disable=unused-variable def default(): - """The default configurations.""" + """Default configuration for PPO.""" # General algorithm = ppo.PPOAlgorithm - num_agents = 25 + num_agents = 10 eval_episodes = 25 use_gpu = False # Network network = networks.ForwardGaussianPolicy weight_summaries = dict( - all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*') + all=r'.*', + policy=r'.*/policy/.*', + value=r'.*/value/.*') policy_layers = 200, 100 value_layers = 200, 100 - init_mean_factor = 0.2 + init_mean_factor = 0.05 init_logstd = -1 - network_config = dict() # Optimization update_every = 25 policy_optimizer = 'AdamOptimizer' value_optimizer = 'AdamOptimizer' - update_epochs_policy = 25 - update_epochs_value = 25 - value_lr = 1e-3 + update_epochs_policy = 50 + update_epochs_value = 50 policy_lr = 1e-4 + value_lr = 3e-4 # Losses - discount = 0.99 + discount = 0.985 kl_target = 1e-2 kl_cutoff_factor = 2 kl_cutoff_coef = 1000 @@ -107,4 +124,3 @@ def pybullet_minitaur(): steps = 3e7 # 30M return locals() - diff --git a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py index 195df7df8..6b6a49766 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py @@ -1,8 +1,22 @@ -r"""Script to use Proximal Policy Gradient for the minitaur environments. +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -Run: - python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet +r"""Script to train a batch reinforcement learning algorithm. +Command line: + + python3 -m agents.scripts.train --logdir=/path/to/logdir --config=pendulum """ from __future__ import absolute_import @@ -10,39 +24,142 @@ from __future__ import division from __future__ import print_function import datetime +import functools import os + +import gym import tensorflow as tf from agents import tools -from agents.scripts import train +from . import configs from agents.scripts import utility -from . import config_ppo -flags = tf.app.flags -FLAGS = tf.app.flags.FLAGS +def _create_environment(config): + """Constructor for an instance of the environment. -flags.DEFINE_string( - 'logdir', None, - 'Base directory to store logs.') -flags.DEFINE_string( - 'config', None, - 'Configuration to execute.') -flags.DEFINE_string( - 'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'), - 'Sub directory to store logs.') + Args: + config: Object providing configurations via attributes. + + Returns: + Wrapped OpenAI Gym environment. + """ + if isinstance(config.env, str): + env = gym.make(config.env) + else: + env = config.env() + if config.max_length: + env = tools.wrappers.LimitDuration(env, config.max_length) + env = tools.wrappers.RangeNormalize(env) + env = tools.wrappers.ClipAction(env) + env = tools.wrappers.ConvertTo32Bit(env) + return env + + +def _define_loop(graph, logdir, train_steps, eval_steps): + """Create and configure a training loop with training and evaluation phases. + + Args: + graph: Object providing graph elements via attributes. + logdir: Log directory for storing checkpoints and summaries. + train_steps: Number of training steps per epoch. + eval_steps: Number of evaluation steps per epoch. + + Returns: + Loop object. + """ + loop = tools.Loop( + logdir, graph.step, graph.should_log, graph.do_report, + graph.force_reset) + loop.add_phase( + 'train', graph.done, graph.score, graph.summary, train_steps, + report_every=None, + log_every=train_steps // 2, + checkpoint_every=None, + feed={graph.is_training: True}) + loop.add_phase( + 'eval', graph.done, graph.score, graph.summary, eval_steps, + report_every=eval_steps, + log_every=eval_steps // 2, + checkpoint_every=10 * eval_steps, + feed={graph.is_training: False}) + return loop + + +def train(config, env_processes): + """Training and evaluation entry point yielding scores. + + Resolves some configuration attributes, creates environments, graph, and + training loop. By default, assigns all operations to the CPU. + + Args: + config: Object providing configurations via attributes. + env_processes: Whether to step environments in separate processes. + + Yields: + Evaluation scores. + """ + tf.reset_default_graph() + with config.unlocked: + config.network = functools.partial( + utility.define_network, config.network, config) + config.policy_optimizer = getattr(tf.train, config.policy_optimizer) + config.value_optimizer = getattr(tf.train, config.value_optimizer) + if config.update_every % config.num_agents: + tf.logging.warn('Number of agents should divide episodes per update.') + with tf.device('/cpu:0'): + batch_env = utility.define_batch_env( + lambda: _create_environment(config), + config.num_agents, env_processes) + graph = utility.define_simulation_graph( + batch_env, config.algorithm, config) + loop = _define_loop( + graph, config.logdir, + config.update_every * config.max_length, + config.eval_episodes * config.max_length) + total_steps = int( + config.steps / config.update_every * + (config.update_every + config.eval_episodes)) + # Exclude episode related variables since the Python state of environments is + # not checkpointed and thus new episodes start after resuming. + saver = utility.define_saver(exclude=(r'.*_temporary/.*',)) + sess_config = tf.ConfigProto(allow_soft_placement=True) + sess_config.gpu_options.allow_growth = True + with tf.Session(config=sess_config) as sess: + utility.initialize_variables(sess, saver, config.logdir) + for score in loop.run(sess, saver, total_steps): + yield score + batch_env.close() def main(_): """Create or load configuration and launch the trainer.""" - config = tools.AttrDict(getattr(config_ppo, FLAGS.config)()) - logdir = FLAGS.logdir and os.path.join( - FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)) - utility.save_config(config, logdir) - for score in train.train(config, env_processes=True): - tf.logging.info(str(score)) + utility.set_up_logging() + if not FLAGS.config: + raise KeyError('You must specify a configuration.') + logdir = FLAGS.logdir and os.path.expanduser(os.path.join( + FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) + try: + config = utility.load_config(logdir) + except IOError: + config = tools.AttrDict(getattr(configs, FLAGS.config)()) + config = utility.save_config(config, logdir) + for score in train(config, FLAGS.env_processes): + tf.logging.info('Score {}.'.format(score)) if __name__ == '__main__': + FLAGS = tf.app.flags.FLAGS + tf.app.flags.DEFINE_string( + 'logdir', None, + 'Base directory to store logs.') + tf.app.flags.DEFINE_string( + 'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'), + 'Sub directory to store logs.') + tf.app.flags.DEFINE_string( + 'config', None, + 'Configuration to execute.') + tf.app.flags.DEFINE_boolean( + 'env_processes', True, + 'Step environments in separate processes to circumvent the GIL.') tf.app.run() - diff --git a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py index fa5f7b21c..f6673dc1d 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py @@ -1,42 +1,157 @@ +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -r"""Script to visualize the trained PPO agent. +r"""Script to render videos of the Proximal Policy Gradient algorithm. -python -m pybullet_envs.agents.visualize \ ---logdir=ppo ---outdir=/tmp/video/ +Command line: + python3 -m agents.scripts.visualize \ + --logdir=/path/to/logdir/